Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save robgon-art/1186da076dcc5cef6148716fc62c381a to your computer and use it in GitHub Desktop.
Save robgon-art/1186da076dcc5cef6148716fc62c381a to your computer and use it in GitHub Desktop.
Get Friends Summaries from Wikipedia
import requests
from bs4 import BeautifulSoup
import pandas as pd
df_friends = pd.DataFrame(columns=["id", "title", "summary", "meta.season", "meta.episode"]).set_index("id")
for season in range(1, 11):
url = "https://en.wikipedia.org/wiki/Friends_(season_" + str(season) + ")#Episodes"
page = requests.get(url)
soup = BeautifulSoup(page.text, "html.parser")
episode_list = soup.find(class_="wikitable plainrowheaders wikiepisodetable")
titles = episode_list.find_all(class_='summary') # summary description
summaries = episode_list.find_all(class_='description') # summary description
episode = 1
for t, s in zip(titles, summaries):
if season == 1 and episode == 1:
title = "The One Where It All Began"
else:
title = t.get_text().strip()
parts = title.split('""')
title = parts[0]
title = title.replace('"', '')
title = title.replace('†', '')
summary = s.get_text().strip()
parts = summary.split("\n")
summary = parts[0]
s = "s" + str(season).zfill(2)
e = "e" + str(episode).zfill(2)
id = s+e
df_friends.at[id, "title"] = title
df_friends.at[id, "summary"] = summary
df_friends.at[id, "meta.season"] = s
df_friends.at[id, "meta.episode"] = e
episode += 1
df_friends.to_csv("friends_summaries.csv")
df_friends
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment