Skip to content

Instantly share code, notes, and snippets.

@thenonameguy
Created August 26, 2012 21:00
Show Gist options
  • Save thenonameguy/3483567 to your computer and use it in GitHub Desktop.
Save thenonameguy/3483567 to your computer and use it in GitHub Desktop.
Download a 4chan thread to a directory
import os
import sys
import urllib
import urllib2
import re
import time
if not len(sys.argv) >= 3:
print "Missing parameters."
print "Usage: python 4chan.py <url> <folder>"
sys.exit()
threadurl = sys.argv[1]
subfolder = sys.argv[2]
exp_imgurl = re.compile('4chan\.org/\w+/src/\d+\.(?:jpg|gif|png|jpeg)')
exp_picname = re.compile('\d+\.(?:jpg|gif|png|jpeg)')
ua = "Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US; rv:1.9.1.4) Gecko/20091007 Firefox/3.5.4"
head = {'User-agent': ua}
print "Thread %s going to folder %s" % (threadurl, subfolder)
print "Fetching html..."
req = urllib2.Request(threadurl, None, head)
try:
response = urllib2.urlopen(req)
except urllib2.HTTPError, e:
if errorcount < 1:
errorcount = 1
print "Request failed"
response = urllib2.urlopen(req)
except urllib2.URLError, e:
if errorcount < 1:
errorcount = 1
print "Request failed"
response = urllib2.urlopen(req)
msg = response.read()
errorcount = 0
print "Received %d bytes" % len(msg)
imgurls = exp_imgurl.findall(msg)
print "Found %d images" % len(imgurls)
if not os.path.exists(subfolder):
print "Folder %s does not exist. Creating..." % subfolder
os.makedirs(subfolder)
else:
print "Folder %s exists. I will just put all files in there." % subfolder
totalnumber = len(list(set(imgurls)))
for i, img in enumerate(list(set(imgurls))):
source = "http://images."+str(img)
filename = exp_picname.findall(source)[0]
destination = os.path.join(subfolder, filename)
if not os.path.isfile(destination):
try:
print "Downloading %d/%d: %s" % (i+1, totalnumber, source)
urllib.urlretrieve(source, destination)
time.sleep(0.25) # why?
except urllib.ContentTooShortError:
print "Image download failed, retrying..."
time.sleep(1)
urllib.urlretrieve(source, destination)
time.sleep(0.5) # why?
else:
print "File %s exists. Skipping..." % str(filename)
print "Aaaaaaand we are done. See you next time."
print "by thenonameguy <3"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment