Skip to content

Instantly share code, notes, and snippets.

@aVolpe

aVolpe/scraper.py

Last active Aug 18, 2020
Embed
What would you like to do?
Extractor de información histórica de cotizaciones en Paraguay
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Dependencias:
# - beautifulsoup4
import sys
import time
import urllib
import urllib.request
import contextlib
from bs4 import BeautifulSoup
def get_file(anho, mes):
s_anho = str(anho)
s_mes = str(mes)
if mes < 10:
s_mes = '0' + s_mes
data = urllib.parse.urlencode({'anho': s_anho, 'mes' : s_mes}).encode('UTF-8')
req = urllib.request.Request("https://www.bcp.gov.py/webapps/web/cotizacion/monedas-mensual",
data,
headers={'User-Agent' : "Magic Browser"})
with contextlib.closing(urllib.request.urlopen(req)) as response:
result = response.read()
soup = BeautifulSoup(result, 'html.parser')
# Ejemplo de como imprimir todo
# print soup.prettify()
# Obtenemos la tabla
# print soup
tabla_paraguay = soup.find('table', {"id": "cotizacion-interbancaria"})
# Obtenemos todas las filas
rows = tabla_paraguay.find_all("tr")
to_ret = ""
for row in rows:
# obtenemos todas las columns
cells = row.find_all("td")
linea = ''
for cell in cells:
# se eliminan los asteriscos y se borran los espacios en blanco
linea += cell.get_text().replace('*', '').strip() + ','
if not linea:
continue
if linea[0] == '(':
continue
# se agrega la fecha de hoy
linea += time.strftime('%x %X')
#imprimos la fila
to_ret += s_anho + ',' + s_mes + ',' + linea + '\n'
return to_ret
# print get_file(2001, 1)
for i in range(2001, 2020):
for j in range(1, 13):
try:
print (get_file(i, j))
time.sleep(2)
except Exception as error:
print ('error', error, file=sys.stderr)
print ('Error en ' + str(i) + ' mes ' + str(j), file=sys.stderr)
sys.exit()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.