Skip to content

Instantly share code, notes, and snippets.

@Natim
Created March 9, 2016 12:30
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Natim/22bba2335a2892919c44 to your computer and use it in GitHub Desktop.
Save Natim/22bba2335a2892919c44 to your computer and use it in GitHub Desktop.
For each addons, scrap AMO to get the who and why information.
# -*- coding: utf-8 -*-
# This is PY3 only code using asyncIO
import asyncio
import aiohttp
from pyquery import PyQuery as pyquery
async def fetch_info(session, record):
if 'blockID' not in record:
print("{} doesn't have a blockID".format(record['id']))
return record
# 2. Pour chaque record, calculer l'url de blocklists
url = "https://addons.mozilla.org/fr/firefox/blocked/{}".format(
record['blockID'])
async with session.get(url) as resp:
if resp.status != 200:
body = await resp.text()
raise ValueError('{} — {}'.format(resp.status, body))
data = await resp.text()
doc = pyquery(data)
record['why'] = doc('.blocked dl>dd')[0].text
record['who'] = doc('.blocked dl>dd')[1].text
# 5. Modifier les records avec les infos
# Find out informations
return record
async def main():
url = ('https://kinto.dev.mozaws.net/v1'
'/buckets/blocklists/collections/addons/records?_limit=10')
with aiohttp.ClientSession() as session:
# 1. Recupèrer tous les records
async with session.get(url) as resp:
# XXX: Handle the Next-Page header
if resp.status != 200:
body = await resp.json()
raise ValueError('{} — {}'.format(response.status, body))
data = await resp.json()
records = data['data']
# 3. Ajouter un fetch_page pour chaque
coros = [fetch_info(session, record) for record in records]
# 4. Aller chercher toutes les pages.
results = await asyncio.gather(*coros)
print(results)
# 6. Pousser les modifications
if __name__ == '__main__':
loop = asyncio.get_event_loop()
loop.run_until_complete(main())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment