Created
February 22, 2020 20:15
-
-
Save jnrdrgz/b9f45430b71e8a298444e23a64b44aa7 to your computer and use it in GitHub Desktop.
Ver noticias de lagaceta.com.ar
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import bs4 | |
import re | |
URL_GACETA = "" # link de la noticia | |
r = requests.get(URL_GACETA) # request | |
if r.status_code == 200: | |
s = bs4.BeautifulSoup(r.text, "html.parser") | |
noticia = s.find(class_="newsBody").getText() # busca la clase "news body" que supongo, hay una por noticia | |
noticia = noticia.replace("<p>","").replace("</p>","")) # remueve los tags de html | |
noticia = re.sub(r"google.+;", "", noticia) # también algo de google que queda colgado en medio de la noticia | |
print(noticia) | |
else: | |
print("Error {}".format(r.status_code)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import bs4 | |
import re | |
URL_GACETA = "" # link de la noticia | |
print(re.sub(r"google.+;", "", bs4.BeautifulSoup(requests.get(URL_GACETA).text, "html.parser").find(class_="newsBody").getText().replace("<p>","").replace("</p>",""))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment