Created
March 9, 2016 12:30
-
-
Save Natim/22bba2335a2892919c44 to your computer and use it in GitHub Desktop.
For each addons, scrap AMO to get the who and why information.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
# This is PY3 only code using asyncIO | |
import asyncio | |
import aiohttp | |
from pyquery import PyQuery as pyquery | |
async def fetch_info(session, record): | |
if 'blockID' not in record: | |
print("{} doesn't have a blockID".format(record['id'])) | |
return record | |
# 2. Pour chaque record, calculer l'url de blocklists | |
url = "https://addons.mozilla.org/fr/firefox/blocked/{}".format( | |
record['blockID']) | |
async with session.get(url) as resp: | |
if resp.status != 200: | |
body = await resp.text() | |
raise ValueError('{} — {}'.format(resp.status, body)) | |
data = await resp.text() | |
doc = pyquery(data) | |
record['why'] = doc('.blocked dl>dd')[0].text | |
record['who'] = doc('.blocked dl>dd')[1].text | |
# 5. Modifier les records avec les infos | |
# Find out informations | |
return record | |
async def main(): | |
url = ('https://kinto.dev.mozaws.net/v1' | |
'/buckets/blocklists/collections/addons/records?_limit=10') | |
with aiohttp.ClientSession() as session: | |
# 1. Recupèrer tous les records | |
async with session.get(url) as resp: | |
# XXX: Handle the Next-Page header | |
if resp.status != 200: | |
body = await resp.json() | |
raise ValueError('{} — {}'.format(response.status, body)) | |
data = await resp.json() | |
records = data['data'] | |
# 3. Ajouter un fetch_page pour chaque | |
coros = [fetch_info(session, record) for record in records] | |
# 4. Aller chercher toutes les pages. | |
results = await asyncio.gather(*coros) | |
print(results) | |
# 6. Pousser les modifications | |
if __name__ == '__main__': | |
loop = asyncio.get_event_loop() | |
loop.run_until_complete(main()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment