Skip to content

Instantly share code, notes, and snippets.

@vitorio
Forked from igorbrigadir/scrape-storify-stories.py
Last active December 28, 2017 22:47
Show Gist options
  • Save vitorio/be7919835f54c290957b4bb97ee8bd7b to your computer and use it in GitHub Desktop.
Save vitorio/be7919835f54c290957b4bb97ee8bd7b to your computer and use it in GitHub Desktop.
Download all Storify stories for a given user
#!/usr/bin/python
try:
import simplejson as json
except:
import json
import requests
import math
import sys
import argparse as ap
import sys
import os
parser = ap.ArgumentParser(description='Downloads storifyuser.json to current directory & all Storify collections to storifyuser/storify-slug.json')
parser.add_argument('STORIFY_SCREEN_NAME', nargs='?')
args = parser.parse_args()
if args.STORIFY_SCREEN_NAME is None:
print('usage: python scrape-storify-stories.py <storify_screen_name>')
sys.exit(2)
target_user = args.STORIFY_SCREEN_NAME
print('Processing', target_user)
STORIFY_API_KEY = '' # Your Storify API Key: http://dev.storify.com/request
API_URL = 'https://api.storify.com/v1'
# eg: https://api.storify.com/v1/users/reportedly?&api_key=...
endpoint = "{0}/{1}/{2}?api_key={3}".format(API_URL, 'users', target_user, STORIFY_API_KEY)
user_json = requests.get(endpoint).json()
with open("{0}.json".format(target_user), 'w') as f:
json.dump(user_json, f)
number_of_stories = user_json['content']['stats']['stories']
stories = list()
pages = int(math.ceil(number_of_stories / 50))+1
print (target_user, 'has', number_of_stories, 'stories on', pages, 'pages:')
for page in range(0, pages):
endpoint = "{0}/{1}/{2}?api_key={3}&per_page=50&page={4}".format(API_URL, 'stories', target_user, STORIFY_API_KEY, page)
stories_json = requests.get(endpoint).json()
stories.extend(stories_json['content']['stories']) #extend: append all elements from the iterable
print ('Retrieved info for', len(stories), 'stories. Downloading elements...')
#os.makedirs("{0}".format(target_user), exist_ok=True)
progress = 0.0
for story in stories:
number_of_elements = sum(story['stats']['elements'].values())
pages = int(math.ceil(number_of_elements / 50))+1
elements = list()
for page in range(0, pages):
endpoint = "{0}/{1}/{2}/{5}/?api_key={3}&per_page=50&page={4}".format(API_URL, 'stories', target_user, STORIFY_API_KEY, page, story['slug'])
elements_json = requests.get(endpoint).json()
elements.extend(elements_json['content']['elements'])
story['elements'] = elements
with open("{0}/{1}.json".format(target_user, story['slug']), 'w') as f:
json.dump(story, f)
progress += 1.0
print("%.2f%%" % ((progress/number_of_stories)*100.00), 'done. Saved', story['slug'], 'with', len(elements), 'elements...')
print('Finished:', target_user)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment