Skip to content

Instantly share code, notes, and snippets.

@ErikGartner
Created March 19, 2017 10:22
Show Gist options
  • Save ErikGartner/8022223b9ee4dcfc66f4f5f343396af0 to your computer and use it in GitHub Desktop.
Save ErikGartner/8022223b9ee4dcfc66f4f5f343396af0 to your computer and use it in GitHub Desktop.
Whitewolf wikia dumper
import requests
import json
BASE_URL = 'http://whitewolf.wikia.com/'
def scrape_artifacts(offset=''):
path = 'api/v1/Articles/List'
query = {
'expand': 1,
'offset': offset
}
data = requests.get('{}{}'.format(BASE_URL, path), params=query)
if data.status_code != 200:
return (False, False)
js = data.json()
new_offset = False if 'offset' not in js else js['offset']
return (js['items'], new_offset)
def scrape_all():
offset = ''
full_db = []
while offset is not False:
(items, offset) = scrape_artifacts(offset)
if items is False:
break
full_db.extend(items)
print('Scraping: offset: {}, size: {}, last: {}'.format(offset,
len(full_db),
items[-1]['title']))
return full_db
def build_index():
db = scrape_all()
with open('article_index.json', 'w') as of:
json.dump(db, of, indent=2)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment