Last active
August 29, 2015 14:10
-
-
Save mahendrakalkura/ab05517b187bfb16edef to your computer and use it in GitHub Desktop.
For http://kittenskittenskittens.tumblr.com/: python tumblr.py kittenskittenskittens
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
from multiprocessing import JoinableQueue, Process | |
from os import makedirs | |
from os.path import dirname, isdir, isfile, join | |
from requests import get | |
from simplejson import loads | |
from sys import argv, exit, stdout | |
class worker(Process): | |
def __init__(self, *args, **kwargs): | |
super(worker, self).__init__() | |
self.tasks = kwargs['tasks'] | |
def run(self): | |
while True: | |
task = self.tasks.get() | |
if task == 'STOP': | |
self.tasks.task_done() | |
break | |
task() | |
self.tasks.task_done() | |
return | |
class task(object): | |
def __init__(self, *args, **kwargs): | |
self.path_1 = kwargs['path_1'] | |
self.path_2 = kwargs['path_2'] | |
self.url = kwargs['url'] | |
def __call__(self): | |
if not isfile(self.path_1) and not isfile(self.path_2): | |
self.download() | |
print self.path_1 | |
stdout.flush() | |
def download(self): | |
response = None | |
try: | |
response = get(self.url) | |
except: | |
pass | |
if not response: | |
return | |
if not response.status_code == 200: | |
return | |
with open(self.path_1, 'wb') as resource: | |
resource.write(response.content) | |
with open(self.path_2, 'wb') as resource: | |
resource.write('') | |
stdout.flush() | |
if len(argv) != 2: | |
exit() | |
name = argv[1] | |
start = 0 | |
num = 50 | |
if not isdir(name): | |
makedirs(name) | |
tasks = JoinableQueue() | |
workers = [worker(tasks=tasks) for index in range(25)] | |
for index, _ in enumerate(workers): | |
workers[index].start() | |
while True: | |
response = None | |
try: | |
response = get( | |
'http://%(name)s.tumblr.com/api/read/json' % { | |
'name': name, | |
}, | |
params={ | |
'num': num, | |
'start': start, | |
'type': 'photo', | |
} | |
) | |
except: | |
pass | |
if not response: | |
continue | |
if not response.status_code == 200: | |
continue | |
contents = loads(response.text[22:-2]) | |
if not len(contents['posts']): | |
break | |
for post in contents['posts']: | |
id = post['id'] | |
urls = [] | |
if 'photo-url-1280' in post: | |
urls.append(post['photo-url-1280']) | |
if 'photos' in post: | |
for photo in post['photos']: | |
if 'photo-url-1280' in photo: | |
urls.append(photo['photo-url-1280']) | |
for index, url in enumerate(urls): | |
path_1 = join( | |
dirname(__file__), | |
name, | |
'%(id)s.%(index)s.%(extension)s' % { | |
'extension': url.split('.')[-1], | |
'id': id, | |
'index': index + 1, | |
} | |
) | |
path_2 = join( | |
dirname(__file__), | |
name, | |
'.%(id)s.%(index)s.%(extension)s.' % { | |
'extension': url.split('.')[-1], | |
'id': id, | |
'index': index + 1, | |
} | |
) | |
tasks.put(task(**{ | |
'path_1': path_1, | |
'path_2': path_2, | |
'url': url, | |
})) | |
start += num | |
for index, _ in enumerate(workers): | |
tasks.put('STOP') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment