Skip to content

Instantly share code, notes, and snippets.

@jnrdrgz
Created February 22, 2020 20:15
Show Gist options
  • Save jnrdrgz/b9f45430b71e8a298444e23a64b44aa7 to your computer and use it in GitHub Desktop.
Save jnrdrgz/b9f45430b71e8a298444e23a64b44aa7 to your computer and use it in GitHub Desktop.
Ver noticias de lagaceta.com.ar
import requests
import bs4
import re
URL_GACETA = "" # link de la noticia
r = requests.get(URL_GACETA) # request
if r.status_code == 200:
s = bs4.BeautifulSoup(r.text, "html.parser")
noticia = s.find(class_="newsBody").getText() # busca la clase "news body" que supongo, hay una por noticia
noticia = noticia.replace("<p>","").replace("</p>","")) # remueve los tags de html
noticia = re.sub(r"google.+;", "", noticia) # también algo de google que queda colgado en medio de la noticia
print(noticia)
else:
print("Error {}".format(r.status_code))
import requests
import bs4
import re
URL_GACETA = "" # link de la noticia
print(re.sub(r"google.+;", "", bs4.BeautifulSoup(requests.get(URL_GACETA).text, "html.parser").find(class_="newsBody").getText().replace("<p>","").replace("</p>","")))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment