Skip to content

Instantly share code, notes, and snippets.

@jself
Created August 15, 2012 02:00
Show Gist options
  • Save jself/3354803 to your computer and use it in GitHub Desktop.
Save jself/3354803 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python2
import urllib2
import sys
import re
import threading
from hashlib import md5
def get_images(url):
r = re.compile('//images.4chan.org/[^"]*(?:.jpg|.jpeg|.bmp|.gif|.png)')
return r.findall(urllib2.urlopen(url).read())
def download(url, folder = ''):
web_file = urllib2.urlopen(url)
if folder != '' and folder[-1] != '/':
folder += '/'
local_file = open(folder + url.split('/')[-1], 'w')
#begin change
data = web_file.read()
m = md5()
m.update(data)
print "Digest: " + str(m.hexdigest())
local_file.write(data)
#end change
web_file.close()
local_file.close()
class Download(threading.Thread):
def __init__(self, url, folder = ''):
threading.Thread.__init__(self)
self.url = url
self.folder = folder
def run(self):
download(self.url, self.folder)
if __name__ == '__main__':
if len(sys.argv) == 2 or len(sys.argv) == 3:
list = get_images(sys.argv[1])
done = []
threads = []
for i in list:
if i not in done:
done.append(i)
i = 'https:%s' % i
print i
if len(sys.argv) == 3:
thread = Download(i, sys.argv[2])
thread.start()
threads.append(thread)
else:
thread = Download(i)
thread.start()
threads.append(thread)
for thread in threads:
thread.join()
else:
import os
print 'This is a script to download images from 4chan.'
print 'usage: %s http://server.com/ [directory]' % os.path.basename(sys.argv[0])
print 'usage: %s http://server.com/' % os.path.basename(sys.argv[0])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment