Skip to content

Instantly share code, notes, and snippets.

@WP-LKL
Last active March 13, 2021 17:19
Show Gist options
  • Save WP-LKL/629b1f817a954ef51271cdf027f76b8a to your computer and use it in GitHub Desktop.
Save WP-LKL/629b1f817a954ef51271cdf027f76b8a to your computer and use it in GitHub Desktop.
Scraping images from Tumblr via Tumblr API (pytumblr). //Please consult website TOS/robots.txt prior to use.
import pytumblr
import io
def get_all_posts(client, blog):
offset = 0
while True:
# Note: similar for ``client.tagged()`` etc.
response = client.posts(blog, limit=20, offset=offset, reblog_info=True, notes_info=True)
posts = response['posts']
if not posts: return
for post in posts:
if('photos' not in post):
if('body' in post):
body = post['body']
body = body.split('<')
body = [b for b in body if 'img src=' in b]
if(body):
body = body[0].split('"')
print(body[1])
yield body[1]
else:
yield
else:
yield post['photos'][0]['original_size']['url']
offset += 20
client = pytumblr.TumblrRestClient('OAuth consumer key:')
blog = 'blogname/prefix'
with io.open('{}-posts.txt'.format(blog), 'w', encoding="utf-8") as out_file:
for post in get_all_posts(client, blog):
print(post, file=out_file)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment