Skip to content

Instantly share code, notes, and snippets.

@mahendrakalkura
Last active August 29, 2015 14:10
Show Gist options
  • Save mahendrakalkura/ab05517b187bfb16edef to your computer and use it in GitHub Desktop.
Save mahendrakalkura/ab05517b187bfb16edef to your computer and use it in GitHub Desktop.
For http://kittenskittenskittens.tumblr.com/: python tumblr.py kittenskittenskittens
# -*- coding: utf-8 -*-
from multiprocessing import JoinableQueue, Process
from os import makedirs
from os.path import dirname, isdir, isfile, join
from requests import get
from simplejson import loads
from sys import argv, exit, stdout
class worker(Process):
def __init__(self, *args, **kwargs):
super(worker, self).__init__()
self.tasks = kwargs['tasks']
def run(self):
while True:
task = self.tasks.get()
if task == 'STOP':
self.tasks.task_done()
break
task()
self.tasks.task_done()
return
class task(object):
def __init__(self, *args, **kwargs):
self.path_1 = kwargs['path_1']
self.path_2 = kwargs['path_2']
self.url = kwargs['url']
def __call__(self):
if not isfile(self.path_1) and not isfile(self.path_2):
self.download()
print self.path_1
stdout.flush()
def download(self):
response = None
try:
response = get(self.url)
except:
pass
if not response:
return
if not response.status_code == 200:
return
with open(self.path_1, 'wb') as resource:
resource.write(response.content)
with open(self.path_2, 'wb') as resource:
resource.write('')
stdout.flush()
if len(argv) != 2:
exit()
name = argv[1]
start = 0
num = 50
if not isdir(name):
makedirs(name)
tasks = JoinableQueue()
workers = [worker(tasks=tasks) for index in range(25)]
for index, _ in enumerate(workers):
workers[index].start()
while True:
response = None
try:
response = get(
'http://%(name)s.tumblr.com/api/read/json' % {
'name': name,
},
params={
'num': num,
'start': start,
'type': 'photo',
}
)
except:
pass
if not response:
continue
if not response.status_code == 200:
continue
contents = loads(response.text[22:-2])
if not len(contents['posts']):
break
for post in contents['posts']:
id = post['id']
urls = []
if 'photo-url-1280' in post:
urls.append(post['photo-url-1280'])
if 'photos' in post:
for photo in post['photos']:
if 'photo-url-1280' in photo:
urls.append(photo['photo-url-1280'])
for index, url in enumerate(urls):
path_1 = join(
dirname(__file__),
name,
'%(id)s.%(index)s.%(extension)s' % {
'extension': url.split('.')[-1],
'id': id,
'index': index + 1,
}
)
path_2 = join(
dirname(__file__),
name,
'.%(id)s.%(index)s.%(extension)s.' % {
'extension': url.split('.')[-1],
'id': id,
'index': index + 1,
}
)
tasks.put(task(**{
'path_1': path_1,
'path_2': path_2,
'url': url,
}))
start += num
for index, _ in enumerate(workers):
tasks.put('STOP')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment