Last active
March 10, 2019 16:12
-
-
Save luciano-buono/331c390182f7c2a30efa2ffd18cbc805 to your computer and use it in GitHub Desktop.
Obtener Titulos de articulos desde la pagina de Clarin
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# import the requests Python library for programmatically making HTTP requests | |
# after installing it according to these instructions: | |
# http://docs.python-requests.org/en/latest/user/install/#install | |
import requests | |
import json | |
# import the BeautifulSoup Python library according to these instructions: | |
# http://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-beautiful-soup | |
# use this syntax as described on the documentation page: | |
# http://www.crummy.com/software/BeautifulSoup/bs4/doc/#making-the-soup | |
from bs4 import BeautifulSoup | |
# the URL of the NY Times website we want to parse | |
base_url = 'https://www.clarin.com/' | |
# the syntax (according to the documentation) for how to | |
# "load" a webpage through Python | |
r = requests.get(base_url) | |
print(r) | |
# how to decode the text of the HTML of the NY Times homepage | |
# website. r comes from the requests request above | |
soup = BeautifulSoup(r.text,"html.parser") | |
# find and loop through all elements on the page with the | |
# class name "story-heading" | |
#for link in soup.find_all('a'): | |
# print(link.get('href')) | |
#for volanta in ( soup.find_all(class_="summary") or soup.find_all(class_="volanta") or soup.find_all(['h1','h2','h3']) ) : | |
# print(volanta.text,"\n" ) | |
for volanta in soup.find_all (['p','h1','h2','h3'] ): | |
try: | |
if ('volanta' in volanta.attrs.get('class')) : | |
print(volanta.text) | |
except TypeError: | |
pass | |
except KeyboardInterrupt: | |
print("\nThey are sentinent\n") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment