Created
April 28, 2018 21:42
-
-
Save hvent90/8ee66f19e29bc4d2ac3ac983a0c68e49 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# import libraries | |
import urllib2 | |
from bs4 import BeautifulSoup | |
def getArticles(soup): | |
return soup.find_all('item') | |
def getHeadline(soup): | |
return soup.find('title').string | |
def getArticleUrl(soup): | |
return soup.find('guid').string | |
# specify the url | |
quote_page = 'http://feeds.arstechnica.com/arstechnica/index' | |
# query the website and return the html to the variable 'page' | |
page = urllib2.urlopen(quote_page) | |
# parse the html using beautiful soup and store in variable 'soup' | |
soup = BeautifulSoup(page, 'html.parser') | |
# Get all Articles | |
articles = getArticles(soup) | |
for article in articles: | |
print(getHeadline(article)) | |
print(getArticleUrl(article)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment