luciano-buono/Python-getHtml.txt

## Python-getHtml.txt
# import the requests Python library for programmatically making HTTP requests
# after installing it according to these instructions:
# http://docs.python-requests.org/en/latest/user/install/#install
import requests
import json

# import the BeautifulSoup Python library according to these instructions:
# http://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-beautiful-soup
# use this syntax as described on the documentation page:
# http://www.crummy.com/software/BeautifulSoup/bs4/doc/#making-the-soup
from bs4 import BeautifulSoup


# the URL of the NY Times website we want to parse
base_url = 'https://www.clarin.com/'

# the syntax (according to the documentation) for how to
# "load" a webpage through Python
r = requests.get(base_url)
print(r)
# how to decode the text of the HTML of the NY Times homepage
# website. r comes from the requests request above
soup = BeautifulSoup(r.text,"html.parser")

# find and loop through all elements on the page with the
# class name "story-heading"

#for link in soup.find_all('a'):
#    print(link.get('href'))


#for volanta in (  soup.find_all(class_="summary")  or soup.find_all(class_="volanta")   or soup.find_all(['h1','h2','h3'])  ) :
#    print(volanta.text,"\n" )


for volanta in soup.find_all (['p','h1','h2','h3'] ):
    try:
        if ('volanta'  in volanta.attrs.get('class')) :
            print(volanta.text)
    except TypeError:
        pass
    except KeyboardInterrupt:
        print("\nThey are sentinent\n")
	# import the requests Python library for programmatically making HTTP requests
	# after installing it according to these instructions:
	# http://docs.python-requests.org/en/latest/user/install/#install
	import requests
	import json

	# import the BeautifulSoup Python library according to these instructions:
	# http://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-beautiful-soup
	# use this syntax as described on the documentation page:
	# http://www.crummy.com/software/BeautifulSoup/bs4/doc/#making-the-soup
	from bs4 import BeautifulSoup


	# the URL of the NY Times website we want to parse
	base_url = 'https://www.clarin.com/'

	# the syntax (according to the documentation) for how to
	# "load" a webpage through Python
	r = requests.get(base_url)
	print(r)
	# how to decode the text of the HTML of the NY Times homepage
	# website. r comes from the requests request above
	soup = BeautifulSoup(r.text,"html.parser")

	# find and loop through all elements on the page with the
	# class name "story-heading"

	#for link in soup.find_all('a'):
	# print(link.get('href'))



	#for volanta in ( soup.find_all(class_="summary") or soup.find_all(class_="volanta") or soup.find_all(['h1','h2','h3']) ) :
	# print(volanta.text,"\n" )



	for volanta in soup.find_all (['p','h1','h2','h3'] ):
	try:
	if ('volanta' in volanta.attrs.get('class')) :
	print(volanta.text)
	except TypeError:
	pass
	except KeyboardInterrupt:
	print("\nThey are sentinent\n")