Created
November 29, 2022 23:09
-
-
Save j6k4m8/47ddaf5ebbd85d0a3b8b8724ba24fd66 to your computer and use it in GitHub Desktop.
Was Olivia Colman in a lot more comedies before?
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import json\n", | |
"import requests\n", | |
"import matplotlib.pyplot as plt\n", | |
"from tqdm.auto import tqdm\n", | |
"from bs4 import BeautifulSoup\n", | |
"import pandas as pd" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"con = requests.get(\"https://www.imdb.com/name/nm1469236/?ref_=rg_mv_close\").content\n", | |
"soup = BeautifulSoup(con, \"html.parser\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"rows = soup.find_all(\"div\", {\"class\":\"filmo-row\"})\n", | |
"years = []\n", | |
"work = []\n", | |
"work_link = []\n", | |
"for row in rows:\n", | |
" year = row.find(\"span\", {\"class\":\"year_column\"})\n", | |
" years.append(year.text)\n", | |
" work.append(row.find(\"b\").text)\n", | |
" work_link.append(row.find(\"a\").get(\"href\"))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"work_genres = []\n", | |
"for link in tqdm(work_link):\n", | |
" con = requests.get(\n", | |
" \"https://www.imdb.com\" + link,\n", | |
" headers={\n", | |
" \"User-Agent\": \"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36\"\n", | |
" },\n", | |
" ).content\n", | |
" soup = BeautifulSoup(con, \"html.parser\")\n", | |
" script = soup.find(\"script\", {\"type\":\"application/ld+json\"})\n", | |
" if script is None:\n", | |
" continue\n", | |
" jsonparsed = json.loads(script.text)\n", | |
" work_genres.append(jsonparsed.get(\"genre\", []))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"\n", | |
"df = pd.DataFrame({\n", | |
" \"year_total\": years[:len(work_genres)],\n", | |
" # Remove space, and take first year before hyphen\n", | |
" \"year_parsed\": [int(y.strip()[:4]) if y.strip() else None for y in years[:len(work_genres)]],\n", | |
" \"work\": work[:len(work_genres)],\n", | |
" \"has_comedy\": [1 if \"Comedy\" in genre else 0 for genre in work_genres],\n", | |
" \"has_drama\": [1 if \"Drama\" in genre else 0 for genre in work_genres], \n", | |
"})\n", | |
"dfg = df[\n", | |
" df.year_parsed > 1900\n", | |
"].groupby(\"year_parsed\").agg({\n", | |
" \"has_comedy\": \"sum\",\n", | |
" \"has_drama\": \"sum\",\n", | |
"}).reset_index()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"with plt.style.context(\"seaborn\"):\n", | |
" fig, ax = plt.subplots(figsize=(8, 6), dpi=150)\n", | |
" comline = ax.plot(\n", | |
" dfg.year_parsed, dfg.has_comedy, label=\"Comedy\", alpha=0.25, linewidth=5\n", | |
" )\n", | |
" comcolor = comline[0].get_color()\n", | |
" # Moving average\n", | |
" ax.plot(\n", | |
" dfg.year_parsed,\n", | |
" dfg.has_comedy.rolling(5).mean(),\n", | |
" label=\"Comedy (5yr avg)\",\n", | |
" color=comcolor,\n", | |
" linestyle=\"--\",\n", | |
" )\n", | |
"\n", | |
" draline = ax.plot(\n", | |
" dfg.year_parsed, dfg.has_drama, label=\"Drama\", alpha=0.25, linewidth=5\n", | |
" )\n", | |
" dracolor = draline[0].get_color()\n", | |
" # Moving average\n", | |
" ax.plot(\n", | |
" dfg.year_parsed,\n", | |
" dfg.has_drama.rolling(5).mean(),\n", | |
" label=\"Drama (5yr avg)\",\n", | |
" color=dracolor,\n", | |
" linestyle=\"--\",\n", | |
" )\n", | |
" ax.legend()\n", | |
" ax.set_xlabel(\"Year\")\n", | |
" ax.set_ylabel(\"Number of movies\")\n", | |
" ax.set_title(\"Number of Olivia Colman movies by genre\")\n", | |
" plt.show()\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3.9.7 64-bit ('scripting')", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.9.7" | |
}, | |
"vscode": { | |
"interpreter": { | |
"hash": "410f6db90cc89b666adbd1b755ae7555dd227a2d7c11822f3d377845b87672a4" | |
} | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Author
j6k4m8
commented
Nov 29, 2022
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment