thenonameguy/4chandown.py

## 4chandown.py
import os
import sys
import urllib
import urllib2
import re
import time

if not len(sys.argv) >= 3:
    print "Missing parameters."
    print "Usage:    python 4chan.py <url> <folder>"
    sys.exit()

threadurl = sys.argv[1]
subfolder = sys.argv[2]

exp_imgurl = re.compile('4chan\.org/\w+/src/\d+\.(?:jpg|gif|png|jpeg)')
exp_picname = re.compile('\d+\.(?:jpg|gif|png|jpeg)')

ua = "Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US; rv:1.9.1.4) Gecko/20091007 Firefox/3.5.4"
head = {'User-agent': ua}

print "Thread %s going to folder %s" % (threadurl, subfolder)

print "Fetching html..."

req = urllib2.Request(threadurl, None, head)
try:
    response = urllib2.urlopen(req)
except urllib2.HTTPError, e:
    if errorcount < 1:
        errorcount = 1
        print "Request failed"
        response = urllib2.urlopen(req)
except urllib2.URLError, e:
    if errorcount < 1:
        errorcount = 1
        print "Request failed"
        response = urllib2.urlopen(req)

msg = response.read()
errorcount = 0

print "Received %d bytes" % len(msg)

imgurls = exp_imgurl.findall(msg)

print "Found %d images" % len(imgurls)

if not os.path.exists(subfolder):
    print "Folder %s does not exist. Creating..." % subfolder
    os.makedirs(subfolder)
else:
    print "Folder %s exists. I will just put all files in there." % subfolder

totalnumber = len(list(set(imgurls)))

for i, img in enumerate(list(set(imgurls))):
    source = "http://images."+str(img)
    filename = exp_picname.findall(source)[0]
    destination = os.path.join(subfolder, filename)
    if not os.path.isfile(destination):
        try:
            print "Downloading %d/%d: %s" % (i+1, totalnumber, source)
            urllib.urlretrieve(source, destination)
            time.sleep(0.25) # why?
        except urllib.ContentTooShortError:
            print "Image download failed, retrying..."
            time.sleep(1)
            urllib.urlretrieve(source, destination)
            time.sleep(0.5) # why?
    else:
        print "File %s exists. Skipping..." % str(filename)

print "Aaaaaaand we are done. See you next time."
print "by thenonameguy <3"
	import os
	import sys
	import urllib
	import urllib2
	import re
	import time

	if not len(sys.argv) >= 3:
	print "Missing parameters."
	print "Usage: python 4chan.py <url> <folder>"
	sys.exit()

	threadurl = sys.argv[1]
	subfolder = sys.argv[2]

	exp_imgurl = re.compile('4chan\.org/\w+/src/\d+\.(?:jpg\|gif\|png\|jpeg)')
	exp_picname = re.compile('\d+\.(?:jpg\|gif\|png\|jpeg)')

	ua = "Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US; rv:1.9.1.4) Gecko/20091007 Firefox/3.5.4"
	head = {'User-agent': ua}

	print "Thread %s going to folder %s" % (threadurl, subfolder)

	print "Fetching html..."

	req = urllib2.Request(threadurl, None, head)
	try:
	response = urllib2.urlopen(req)
	except urllib2.HTTPError, e:
	if errorcount < 1:
	errorcount = 1
	print "Request failed"
	response = urllib2.urlopen(req)
	except urllib2.URLError, e:
	if errorcount < 1:
	errorcount = 1
	print "Request failed"
	response = urllib2.urlopen(req)

	msg = response.read()
	errorcount = 0

	print "Received %d bytes" % len(msg)

	imgurls = exp_imgurl.findall(msg)

	print "Found %d images" % len(imgurls)

	if not os.path.exists(subfolder):
	print "Folder %s does not exist. Creating..." % subfolder
	os.makedirs(subfolder)
	else:
	print "Folder %s exists. I will just put all files in there." % subfolder

	totalnumber = len(list(set(imgurls)))

	for i, img in enumerate(list(set(imgurls))):
	source = "http://images."+str(img)
	filename = exp_picname.findall(source)[0]
	destination = os.path.join(subfolder, filename)
	if not os.path.isfile(destination):
	try:
	print "Downloading %d/%d: %s" % (i+1, totalnumber, source)
	urllib.urlretrieve(source, destination)
	time.sleep(0.25) # why?
	except urllib.ContentTooShortError:
	print "Image download failed, retrying..."
	time.sleep(1)
	urllib.urlretrieve(source, destination)
	time.sleep(0.5) # why?
	else:
	print "File %s exists. Skipping..." % str(filename)

	print "Aaaaaaand we are done. See you next time."
	print "by thenonameguy <3"