Skip to content

Instantly share code, notes, and snippets.

@vjvelascorios
Created May 9, 2024 05:07
Show Gist options
  • Save vjvelascorios/68fe2be381ac14a1bdcfcd116135a3e5 to your computer and use it in GitHub Desktop.
Save vjvelascorios/68fe2be381ac14a1bdcfcd116135a3e5 to your computer and use it in GitHub Desktop.
Download Banxico's Library monthly updates
import os
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin
from datetime import datetime
# URL
url = "https://www.banxico.org.mx/servicios/boletin-mensual-de-la-biblioteca-del-banco-de-mexi/boletin-mensual-biblioteca-pu.html"
# Carpeta local donde se guardarán los archivos PDF
local_folder = "C:\\banxicos library updates"
# Crear la carpeta local si no existe
if not os.path.exists(local_folder):
os.makedirs(local_folder)
# Realizar la solicitud HTTP
response = requests.get(url)
if response.status_code == 200:
# Analizar el contenido HTML
soup = BeautifulSoup(response.content, 'html.parser')
# Encontrar todos los enlaces <a> que apuntan a archivos PDF
pdf_links = soup.find_all('a', href=lambda href: href and href.endswith('.pdf'))
# Descargar los archivos PDF que no existen localmente
for link in pdf_links:
pdf_url = urljoin(url, link['href'])
file_name = os.path.basename(pdf_url)
# Obtener la fecha del HTML para renombrar el archivo
row = link.find_parent('tr')
date_string = row.find('td', class_='bmdateview').text.strip()
date = datetime.strptime(date_string, '%d/%m/%y')
new_file_name = f"Boletín Banxico {date.strftime('%Y.%m.%d')}.pdf"
local_path = os.path.join(local_folder, new_file_name)
# Descargar el archivo si no existe localmente
if not os.path.exists(local_path):
with open(local_path, 'wb') as f:
pdf_response = requests.get(pdf_url)
f.write(pdf_response.content)
print(f"Se ha descargado un nuevo archivo: {local_path}")
else:
print("No se pudo acceder a la página web")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment