Skip to content

Instantly share code, notes, and snippets.

@wcarhart
Created March 29, 2019 16:44
Show Gist options
  • Save wcarhart/0fbae5b498b2102ca1069906039131f8 to your computer and use it in GitHub Desktop.
Save wcarhart/0fbae5b498b2102ca1069906039131f8 to your computer and use it in GitHub Desktop.
Get the NYT headlines for the day!
import requests, re
from bs4 import BeautifulSoup
def get_nyt_headlines():
r = requests.get('https://www.nytimes.com/')
soup = BeautifulSoup(r.text, 'html.parser')
headers = soup.find_all('h2')
spans = [tag.find_all('span') for tag in headers]
headlines = [re.findall("<span>(.*?)</span>", str(span[0]))[0] for span in spans if not len(span) == 0]
for index, headline in enumerate(headlines):
print("{}. {}".format(index+1, headline))
if __name__ == '__main__':
get_nyt_headlines()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment