-
-
Save windwarrior/ceba826b68674dccf82344dc978683bd to your computer and use it in GitHub Desktop.
Simple Endpoint Crawler
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import datetime | |
import json | |
def crawl_endpoint(endpoint): | |
max_pages = 3 | |
i = 0 | |
endpoint_result = [] | |
while i < max_pages: | |
request_string = "https://api.guildwars2.com/v2/{}?page={}&page_size=200".format(endpoint, i) | |
result = requests.get(request_string) | |
max_pages = int(result.headers['X-Page-Total']) | |
endpoint_result.append(result.json()) | |
i = i + 1 | |
with open('{}.json'.format(endpoint), 'w') as f: | |
json.dump(endpoint_result, f) | |
if __name__ == "__main__": | |
crawl_endpoint('skills') | |
crawl_endpoint('specializations') | |
crawl_endpoint('traits') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from os.path import isfile | |
import web | |
import re | |
import json | |
import abc | |
urls = ( | |
'/v2/skills/?', 'Skills', | |
'/v2/traits/?', 'Traits', | |
'/v2/specializations/?', 'Specializations' | |
) | |
ids_pattern = re.compile(r'\d+(,\d+)*') | |
ids_match = re.compile(r'\d+') | |
class JSONDB: | |
def __init__(self, filename): | |
self.filename = filename | |
self.db = {} | |
def load(self): | |
with open(self.filename, 'r') as f: | |
items = json.load(f) | |
for item in items: | |
self.db[item.get('id')] = item | |
def get(self, id): | |
return self.db.get(id) | |
class PaginatedCachedFrontend: | |
__metaclass__ = abc.ABCMeta | |
@abc.abstractmethod | |
def get_db(self): | |
pass | |
def GET(self): | |
web.header('Content-Type', 'application/json') | |
user_data = web.input() | |
if not ('ids' in user_data and ids_pattern.match(user_data.ids)): | |
raise web.notfound(json.dumps({'text': 'only ids supported by frontend cache'})) | |
ids = list(filter(lambda x: x != None, [self.get_db().get(int(x)) for x in ids_match.findall(user_data.ids)])) | |
if len(ids): | |
return json.dumps(ids) | |
else: | |
raise web.notfound(json.dumps({'text': 'all ids provided are invalid'})) | |
class Skills(PaginatedCachedFrontend): | |
def get_db(self): | |
return web.skills_db | |
class Traits(PaginatedCachedFrontend): | |
def get_db(self): | |
return web.traits_db | |
class Specializations(PaginatedCachedFrontend): | |
def get_db(self): | |
return web.specializations_db | |
if __name__ == "__main__": | |
if isfile('skills.json') and isfile('traits.json') and isfile('specializations.json'): | |
app = web.application(urls, globals()) | |
web.skills_db = JSONDB('skills.json') | |
web.skills_db.load() | |
web.traits_db = JSONDB('traits.json') | |
web.traits_db.load() | |
web.specializations_db = JSONDB('specializations.json') | |
web.specializations_db.load() | |
app.run() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment