Skip to content

Instantly share code, notes, and snippets.

@windwarrior
Last active October 27, 2017 19:40
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save windwarrior/ceba826b68674dccf82344dc978683bd to your computer and use it in GitHub Desktop.
Save windwarrior/ceba826b68674dccf82344dc978683bd to your computer and use it in GitHub Desktop.
Simple Endpoint Crawler
import requests
import datetime
import json
def crawl_endpoint(endpoint):
max_pages = 3
i = 0
endpoint_result = []
while i < max_pages:
request_string = "https://api.guildwars2.com/v2/{}?page={}&page_size=200".format(endpoint, i)
result = requests.get(request_string)
max_pages = int(result.headers['X-Page-Total'])
endpoint_result.append(result.json())
i = i + 1
with open('{}.json'.format(endpoint), 'w') as f:
json.dump(endpoint_result, f)
if __name__ == "__main__":
crawl_endpoint('skills')
crawl_endpoint('specializations')
crawl_endpoint('traits')
from os.path import isfile
import web
import re
import json
import abc
urls = (
'/v2/skills/?', 'Skills',
'/v2/traits/?', 'Traits',
'/v2/specializations/?', 'Specializations'
)
ids_pattern = re.compile(r'\d+(,\d+)*')
ids_match = re.compile(r'\d+')
class JSONDB:
def __init__(self, filename):
self.filename = filename
self.db = {}
def load(self):
with open(self.filename, 'r') as f:
items = json.load(f)
for item in items:
self.db[item.get('id')] = item
def get(self, id):
return self.db.get(id)
class PaginatedCachedFrontend:
__metaclass__ = abc.ABCMeta
@abc.abstractmethod
def get_db(self):
pass
def GET(self):
web.header('Content-Type', 'application/json')
user_data = web.input()
if not ('ids' in user_data and ids_pattern.match(user_data.ids)):
raise web.notfound(json.dumps({'text': 'only ids supported by frontend cache'}))
ids = list(filter(lambda x: x != None, [self.get_db().get(int(x)) for x in ids_match.findall(user_data.ids)]))
if len(ids):
return json.dumps(ids)
else:
raise web.notfound(json.dumps({'text': 'all ids provided are invalid'}))
class Skills(PaginatedCachedFrontend):
def get_db(self):
return web.skills_db
class Traits(PaginatedCachedFrontend):
def get_db(self):
return web.traits_db
class Specializations(PaginatedCachedFrontend):
def get_db(self):
return web.specializations_db
if __name__ == "__main__":
if isfile('skills.json') and isfile('traits.json') and isfile('specializations.json'):
app = web.application(urls, globals())
web.skills_db = JSONDB('skills.json')
web.skills_db.load()
web.traits_db = JSONDB('traits.json')
web.traits_db.load()
web.specializations_db = JSONDB('specializations.json')
web.specializations_db.load()
app.run()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment