Skip to content

Instantly share code, notes, and snippets.

@juanitobanca
Created October 14, 2019 02:52
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save juanitobanca/c991ce0e3488c96bf8c59f2a7408896f to your computer and use it in GitHub Desktop.
Save juanitobanca/c991ce0e3488c96bf8c59f2a7408896f to your computer and use it in GitHub Desktop.
import requests
from bs4 import BeautifulSoup
url = 'http://jaalamperez.blogspot.com/'
# Guardar lo que se scrapea
fechas = []
signos = []
horoscopos = []
while url:
print(url)
# logica
req = requests.get( url = url )
soup = BeautifulSoup(req.text, 'lxml')
fechas.append( url[51:61] )
for s in soup.find_all('h3'):
signos.append(s.text.strip())
for h in soup.find_all('div', {'class': 'MsoNormal'}):
h = h.text.strip()
h = h.replace( '(adsbygoogle = window.adsbygoogle || []).push({});', '' )
h = ' '.join( h.split() )
horoscopos.append(h)
url = soup.find('a', {'class': 'blog-pager-older-link'})
if url:
url = url.attrs['href']
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment