Skip to content

Instantly share code, notes, and snippets.

@vwood
Created August 1, 2012 03:58
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save vwood/3223505 to your computer and use it in GitHub Desktop.
Save vwood/3223505 to your computer and use it in GitHub Desktop.
random imgur downloader
import os
import sys
import random
import time
import string
import Queue
import threading
from urllib2 import Request, urlopen, URLError, HTTPError
import hashlib
IMAGE_NOT_FOUND_SHA224 = "ed27658fa7d5a375be00ea3a4669687eef1b12a0906b1495997f114b"
CHARS = string.letters + string.digits
num_q = Queue.Queue() # won't store any data, just how many pics you want threads to get
# functions
def rand_string(string_length):
return ''.join([random.choice(CHARS) for x in range(string_length)])
#class
class ThreadGet(threading.Thread):
def __init__(self, queue):
threading.Thread.__init__(self)
self.queue = queue
def get_images(self, num_pics):
if sys.platform.startswith('win32'):
path = os.getcwd() + '\\output\\'
else:
path = os.getcwd() + '/output/'
if not os.path.exists(path):
os.makedirs(path)
print 'saving to: ' + path
for k in xrange(num_pics):
good = False
while not good:
img_name = rand_string(5)
url = "http://i.imgur.com/" + img_name + ".jpg"
req = Request(url)
f = None
try:
f = urlopen(req)
print "downloading " + url
except HTTPError, e:
print "HTTP Error:",e.code , url
print 'trying again...'
except URLError, e:
print "URL Error:",e.reason , url
print 'trying again...'
if f:
try:
contents = f.read()
if hashlib.sha224(contents).hexdigest() == IMAGE_NOT_FOUND_SHA224:
print 'Image not found'
print 'trying again...'
continue
local_file = open(path + img_name + '.jpg', "wb")
local_file.write(contents)
local_file.close()
good = True
except:
print e, path + img_name + '.jpg'
def run(self):
while True:
#grabs num from queue - note that num is arbitrary and isn't used
num = self.queue.get()
#grabs a pic
self.get_images(1)
#signals to queue job is done
self.queue.task_done()
# main
if __name__ == '__main__':
start_time = time.time()
# syntax: random_imgur.py <how_many>; defaults to 5 if no input
parsed_number = ''.join(sys.argv[1:])
if not parsed_number:
HOW_MANY = 5
else:
HOW_MANY = int(parsed_number)
print 'getting ' + str(HOW_MANY) + ' random pics'
#spawn a pool of threads, and pass them queue instance
for i in range(5):
t = ThreadGet(num_q)
t.setDaemon(True)
t.start()
#populate queue with data
for n in range(HOW_MANY):
num_q.put(n)
#wait on the queue until everything has been processed
num_q.join()
print 'done!'
end_time = time.time()
print 'completed in: ' + str(round(end_time - start_time, 2)) + ' seconds'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment