Skip to content

Instantly share code, notes, and snippets.

@j6k4m8
Created November 29, 2022 23:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save j6k4m8/47ddaf5ebbd85d0a3b8b8724ba24fd66 to your computer and use it in GitHub Desktop.
Save j6k4m8/47ddaf5ebbd85d0a3b8b8724ba24fd66 to your computer and use it in GitHub Desktop.
Was Olivia Colman in a lot more comedies before?
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import json\n",
"import requests\n",
"import matplotlib.pyplot as plt\n",
"from tqdm.auto import tqdm\n",
"from bs4 import BeautifulSoup\n",
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"con = requests.get(\"https://www.imdb.com/name/nm1469236/?ref_=rg_mv_close\").content\n",
"soup = BeautifulSoup(con, \"html.parser\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"rows = soup.find_all(\"div\", {\"class\":\"filmo-row\"})\n",
"years = []\n",
"work = []\n",
"work_link = []\n",
"for row in rows:\n",
" year = row.find(\"span\", {\"class\":\"year_column\"})\n",
" years.append(year.text)\n",
" work.append(row.find(\"b\").text)\n",
" work_link.append(row.find(\"a\").get(\"href\"))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"work_genres = []\n",
"for link in tqdm(work_link):\n",
" con = requests.get(\n",
" \"https://www.imdb.com\" + link,\n",
" headers={\n",
" \"User-Agent\": \"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36\"\n",
" },\n",
" ).content\n",
" soup = BeautifulSoup(con, \"html.parser\")\n",
" script = soup.find(\"script\", {\"type\":\"application/ld+json\"})\n",
" if script is None:\n",
" continue\n",
" jsonparsed = json.loads(script.text)\n",
" work_genres.append(jsonparsed.get(\"genre\", []))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"\n",
"df = pd.DataFrame({\n",
" \"year_total\": years[:len(work_genres)],\n",
" # Remove space, and take first year before hyphen\n",
" \"year_parsed\": [int(y.strip()[:4]) if y.strip() else None for y in years[:len(work_genres)]],\n",
" \"work\": work[:len(work_genres)],\n",
" \"has_comedy\": [1 if \"Comedy\" in genre else 0 for genre in work_genres],\n",
" \"has_drama\": [1 if \"Drama\" in genre else 0 for genre in work_genres], \n",
"})\n",
"dfg = df[\n",
" df.year_parsed > 1900\n",
"].groupby(\"year_parsed\").agg({\n",
" \"has_comedy\": \"sum\",\n",
" \"has_drama\": \"sum\",\n",
"}).reset_index()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"with plt.style.context(\"seaborn\"):\n",
" fig, ax = plt.subplots(figsize=(8, 6), dpi=150)\n",
" comline = ax.plot(\n",
" dfg.year_parsed, dfg.has_comedy, label=\"Comedy\", alpha=0.25, linewidth=5\n",
" )\n",
" comcolor = comline[0].get_color()\n",
" # Moving average\n",
" ax.plot(\n",
" dfg.year_parsed,\n",
" dfg.has_comedy.rolling(5).mean(),\n",
" label=\"Comedy (5yr avg)\",\n",
" color=comcolor,\n",
" linestyle=\"--\",\n",
" )\n",
"\n",
" draline = ax.plot(\n",
" dfg.year_parsed, dfg.has_drama, label=\"Drama\", alpha=0.25, linewidth=5\n",
" )\n",
" dracolor = draline[0].get_color()\n",
" # Moving average\n",
" ax.plot(\n",
" dfg.year_parsed,\n",
" dfg.has_drama.rolling(5).mean(),\n",
" label=\"Drama (5yr avg)\",\n",
" color=dracolor,\n",
" linestyle=\"--\",\n",
" )\n",
" ax.legend()\n",
" ax.set_xlabel(\"Year\")\n",
" ax.set_ylabel(\"Number of movies\")\n",
" ax.set_title(\"Number of Olivia Colman movies by genre\")\n",
" plt.show()\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.9.7 64-bit ('scripting')",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
},
"vscode": {
"interpreter": {
"hash": "410f6db90cc89b666adbd1b755ae7555dd227a2d7c11822f3d377845b87672a4"
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}
@j6k4m8
Copy link
Author

j6k4m8 commented Nov 29, 2022

c4a32558-d183-43cf-839f-d3c0f49d0b72

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment