Skip to content

Instantly share code, notes, and snippets.

@sergiolucero
Created October 8, 2019 01:05
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sergiolucero/75a14c2dd82a0c911bc7b7d5f366d321 to your computer and use it in GitHub Desktop.
Save sergiolucero/75a14c2dd82a0c911bc7b7d5f366d321 to your computer and use it in GitHub Desktop.
asyncio scraping geocgr
import pickle, time
import asyncio
import concurrent.futures
import requests
url_base='https://www.contraloria.cl/opencgrapp/geocgr/api/comunas/%05d/newobras'
def get_comunas(region_id):
print(f'START {region_id} [{time.ctime()}]')
region_id = int(region_id)
comunas = open('chalicelib/R%02d.csv' %region_id, 'r').read().split()
data = [requests.get(url_base %int(comuna)).json() for comuna in comunas] # use asyncio!
print(f'STOP {region_id} [{time.ctime()}]')
pickle.dump(data, open(f'data_{region_id}.pk','wb'))
return data # json concatenate
async def main():
with concurrent.futures.ThreadPoolExecutor(max_workers=20) as executor:
loop = asyncio.get_event_loop()
futures = [
loop.run_in_executor(
executor,
get_comunas,
i
)
for i in range(1,17)
]
for response in await asyncio.gather(*futures):
pass
loop = asyncio.get_event_loop()
loop.run_until_complete(main())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment