Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save afvanwoudenberg/e7fac4ebf85c0f814bd0f82ed28929dc to your computer and use it in GitHub Desktop.
Save afvanwoudenberg/e7fac4ebf85c0f814bd0f82ed28929dc to your computer and use it in GitHub Desktop.
Web scraping Wikipedia to create a mathematicians’ birthdays calendar
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"id": "d9d5979f-c516-433a-8727-885b52c4b3df",
"metadata": {},
"source": [
"# Mathematicians' birthdays calendar\n",
"\n",
"Aswin van Woudenberg (https://www.aswinvanwoudenberg.com | https://github.com/afvanwoudenberg)"
]
},
{
"cell_type": "markdown",
"id": "cbd6444f-120f-4e44-ad4d-f4e3f8f8b7d5",
"metadata": {},
"source": [
"## Importing libraries"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "e5e0ce6e-7284-4932-8208-1ffb65c6b204",
"metadata": {},
"outputs": [],
"source": [
"from selenium import webdriver\n",
"from selenium.webdriver.chrome.service import Service\n",
"from selenium.webdriver.common.by import By\n",
"from selenium.common.exceptions import NoSuchElementException\n",
"from webdriver_manager.chrome import ChromeDriverManager\n",
"\n",
"from icalendar import Calendar, Event, vRecur\n",
"from datetime import date\n",
"\n",
"from abc import ABC, abstractmethod\n",
"\n",
"import pandas as pd\n",
"\n",
"import re\n",
"import calendar\n",
"import pickle\n",
"import json"
]
},
{
"cell_type": "markdown",
"id": "8dcac65a-07af-41e6-a1fb-5dcd8e7346d2",
"metadata": {},
"source": [
"## Building the scraper"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "d58daa20-72f6-40ef-8504-c74aa7276493",
"metadata": {},
"outputs": [],
"source": [
"class BaseSelector(ABC):\n",
" @abstractmethod\n",
" def scrape(self, scraper, parent, **kwargs):\n",
" pass"
]
},
{
"cell_type": "markdown",
"id": "58706c02-050e-4bd0-ad67-2a760aef6a5f",
"metadata": {},
"source": [
"### Scraping attributes"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "44d83670-1bbb-4c62-b0c4-5d695445f56b",
"metadata": {},
"outputs": [],
"source": [
"class AttributeSelector(BaseSelector):\n",
" def __init__(self, id, by, path, attribute, multiple=False, default=None):\n",
" self.id = id\n",
" self.by = by\n",
" self.path = path\n",
" self.attribute = attribute\n",
" self.multiple = multiple\n",
" self.default = default\n",
"\n",
" def scrape(self, scraper, parent, **kwargs): \n",
" if not self.multiple:\n",
" try:\n",
" elem = parent.find_element(self.by, self.path.format(**kwargs))\n",
" return {self.id: elem.get_attribute(self.attribute)}\n",
" except NoSuchElementException:\n",
" return self.default\n",
" else:\n",
" elems = parent.find_elements(self.by, self.path.format(**kwargs))\n",
" return [{self.id: elem.get_attribute(self.attribute)} for elem in elems]"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "855688a8-f200-4f5c-947f-fa9964c5327b",
"metadata": {},
"outputs": [],
"source": [
"urls_selector = AttributeSelector(\n",
" 'url', \n",
" By.XPATH, \n",
" \"//*[@id='mw-content-text']/div/ul[preceding::h2/span[.='Births'] and following::h2/span[.='Deaths']]/li[contains(., 'mathematician')]/a[string-length(.)>4][1]\", \n",
" 'href', \n",
" multiple=True\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "82fac6cd-1ae9-4f4f-adad-d9c3d5bebb44",
"metadata": {},
"outputs": [],
"source": [
"driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "9a5e53a0-c61c-45cb-bb51-c220227f3584",
"metadata": {},
"outputs": [],
"source": [
"driver.get(\"https://en.wikipedia.org/wiki/December_7\")"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "3bbe306c-627f-4084-9fb6-70e43cb66e95",
"metadata": {},
"outputs": [],
"source": [
"urls = urls_selector.scrape(driver, driver)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "b56a6ba0-9179-4a95-b833-f5ee661cc12d",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[{'url': 'https://en.wikipedia.org/wiki/Leopold_Kronecker'},\n",
" {'url': 'https://en.wikipedia.org/wiki/Danilo_Blanu%C5%A1a'},\n",
" {'url': 'https://en.wikipedia.org/wiki/Mary_Ellen_Rudin'},\n",
" {'url': 'https://en.wikipedia.org/wiki/Nick_Katz'}]"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"urls"
]
},
{
"cell_type": "markdown",
"id": "6738e5d7-b978-45de-bef1-523d551c5b3d",
"metadata": {},
"source": [
"### Scraping text"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "3023dbb2-3918-44cf-a97a-36b527f65799",
"metadata": {},
"outputs": [],
"source": [
"class TextSelector(AttributeSelector):\n",
" def __init__(self, id, by, path, multiple=False, default=None):\n",
" super().__init__(id, by, path, 'innerText', multiple, default)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "c8a52aa4-4595-4a1e-9306-2fe42f86e151",
"metadata": {},
"outputs": [],
"source": [
"mathematician_selector = TextSelector(\n",
" 'mathematician', \n",
" By.XPATH, \n",
" \"//*[@id='mw-content-text']/div/ul[preceding::h2/span[.='Births'] and following::h2/span[.='Deaths']]/li[contains(., 'mathematician')]\", \n",
" multiple=True\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "d9a704c3-0a14-4352-9569-5ebe2a7a9069",
"metadata": {},
"outputs": [],
"source": [
"mathematicians = mathematician_selector.scrape(driver, driver)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "11c8c5c3-49fc-41a5-8217-b67de7b3575c",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[{'mathematician': '1823 – Leopold Kronecker, Polish-German mathematician and academic (d. 1891)'},\n",
" {'mathematician': '1903 – Danilo Blanuša, Croatian mathematician, physicist, and academic (d. 1987)'},\n",
" {'mathematician': '1924 – Mary Ellen Rudin, American mathematician (d. 2013)[16]'},\n",
" {'mathematician': '1943 – Nick Katz, American mathematician and academic'}]"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"mathematicians"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "81fa1292-78cb-49a0-9ebf-94711399861f",
"metadata": {},
"outputs": [],
"source": [
"month_day_selector = TextSelector('month_day', By.XPATH, '//*[@id=\"firstHeading\"]/span')"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "cc5ca3f3-7675-4ffb-95f0-6748b4ead92e",
"metadata": {},
"outputs": [],
"source": [
"month_day = month_day_selector.scrape(driver, driver)"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "adb4004b-43e4-432e-ae42-84ee3fa9cad2",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'month_day': 'December 7'}"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"month_day"
]
},
{
"cell_type": "markdown",
"id": "94dd3461-d0d5-42ba-ae4e-a242c02462ed",
"metadata": {},
"source": [
"### Cleaning and transforming data"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "222a9766-05be-4c69-b70a-ed001dca5d46",
"metadata": {},
"outputs": [],
"source": [
"class MapSelector(BaseSelector):\n",
" def __init__(self, fun, child):\n",
" self.fun = fun\n",
" self.child = child\n",
"\n",
" def scrape(self, scraper, parent, **kwargs):\n",
" data = self.child.scrape(scraper, parent, **kwargs)\n",
" if type(data) == dict:\n",
" return self.fun(data)\n",
" return list(map(self.fun, data))"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "9060b95b-434f-4085-9d70-66befe09cc79",
"metadata": {},
"outputs": [],
"source": [
"split_month_day_selector = MapSelector(\n",
" lambda m: { \n",
" \"day\": int(m['month_day'].split()[1]), \n",
" \"month\": list(calendar.month_name).index(m['month_day'].split()[0]) \n",
" },\n",
" month_day_selector\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "4e338230-3da2-4604-a243-1239e6aedd1b",
"metadata": {},
"outputs": [],
"source": [
"split_month_day = split_month_day_selector.scrape(driver, driver)"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "f0029c39-ecdb-4050-a4bc-95208b7c84d3",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'day': 7, 'month': 12}"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"split_month_day"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "757edb21-f601-4f21-b5c5-6f19423deab5",
"metadata": {},
"outputs": [],
"source": [
"def unpack_mathematician_data(m):\n",
" data = re.sub(r'\\[[^\\]]*\\]', '', m['mathematician']) # remove any references\n",
" match = re.match(r'^(\\d+)\\s.*–\\s*(.*?),\\s*(.*?)$', data)\n",
"\n",
" return {\n",
" \"year\": int(match.group(1)),\n",
" \"name\": match.group(2),\n",
" \"bio\": match.group(3)\n",
" }"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "3c99aaef-ae5c-40de-a480-7c2784cac63c",
"metadata": {},
"outputs": [],
"source": [
"split_mathematicians_selector = MapSelector(unpack_mathematician_data, mathematician_selector)"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "7278a7c7-16d7-41ff-979d-653cca6c0170",
"metadata": {},
"outputs": [],
"source": [
"split_mathematicians = split_mathematicians_selector.scrape(driver, driver)"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "93f400e4-ca82-40a5-a043-b2ff272b7ed9",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[{'year': 1823,\n",
" 'name': 'Leopold Kronecker',\n",
" 'bio': 'Polish-German mathematician and academic (d. 1891)'},\n",
" {'year': 1903,\n",
" 'name': 'Danilo Blanuša',\n",
" 'bio': 'Croatian mathematician, physicist, and academic (d. 1987)'},\n",
" {'year': 1924,\n",
" 'name': 'Mary Ellen Rudin',\n",
" 'bio': 'American mathematician (d. 2013)'},\n",
" {'year': 1943,\n",
" 'name': 'Nick Katz',\n",
" 'bio': 'American mathematician and academic'}]"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"split_mathematicians"
]
},
{
"cell_type": "markdown",
"id": "fdb87eb3-de50-4b28-a7c7-481dc128b873",
"metadata": {},
"source": [
"### Merging data"
]
},
{
"cell_type": "code",
"execution_count": 26,
"id": "e23bdbc8-5d68-48e3-9543-68aaf5203aca",
"metadata": {},
"outputs": [],
"source": [
"class ZipSelector(BaseSelector):\n",
" def __init__(self, left, right):\n",
" self.left = left\n",
" self.right = right\n",
" \n",
" def scrape(self, scraper, parent, **kwargs):\n",
" data_left = self.left.scrape(scraper, parent, **kwargs)\n",
" data_right = self.right.scrape(scraper, parent, **kwargs)\n",
" \n",
" if type(data_left) == dict and type(data_right) == dict:\n",
" return data_left | data_right\n",
" elif type(data_left) == dict and type(data_right) == list:\n",
" return [data_left | m for m in data_right]\n",
" elif type(data_left) == list and type(data_right) == dict:\n",
" return [m | data_right for m in data_left]\n",
" return [data_left[i] | data_right[i] for i in range(min(len(data_left),len(data_right)))]"
]
},
{
"cell_type": "code",
"execution_count": 27,
"id": "02492623-3bd7-4bd3-a6ad-95b1a8ad2382",
"metadata": {},
"outputs": [],
"source": [
"zip_month_day_mathematicians_selector = ZipSelector(split_month_day_selector, split_mathematicians_selector)\n",
"zip_month_day_mathematicians_urls_selector = ZipSelector(zip_month_day_mathematicians_selector, urls_selector)"
]
},
{
"cell_type": "code",
"execution_count": 28,
"id": "3ae8514f-b058-48e8-9df8-77a7287e734d",
"metadata": {},
"outputs": [],
"source": [
"zip_month_day_mathematicians_urls = zip_month_day_mathematicians_urls_selector.scrape(driver, driver)"
]
},
{
"cell_type": "code",
"execution_count": 29,
"id": "7138e074-c972-4687-a861-b74e3b18c852",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[{'day': 7,\n",
" 'month': 12,\n",
" 'year': 1823,\n",
" 'name': 'Leopold Kronecker',\n",
" 'bio': 'Polish-German mathematician and academic (d. 1891)',\n",
" 'url': 'https://en.wikipedia.org/wiki/Leopold_Kronecker'},\n",
" {'day': 7,\n",
" 'month': 12,\n",
" 'year': 1903,\n",
" 'name': 'Danilo Blanuša',\n",
" 'bio': 'Croatian mathematician, physicist, and academic (d. 1987)',\n",
" 'url': 'https://en.wikipedia.org/wiki/Danilo_Blanu%C5%A1a'},\n",
" {'day': 7,\n",
" 'month': 12,\n",
" 'year': 1924,\n",
" 'name': 'Mary Ellen Rudin',\n",
" 'bio': 'American mathematician (d. 2013)',\n",
" 'url': 'https://en.wikipedia.org/wiki/Mary_Ellen_Rudin'},\n",
" {'day': 7,\n",
" 'month': 12,\n",
" 'year': 1943,\n",
" 'name': 'Nick Katz',\n",
" 'bio': 'American mathematician and academic',\n",
" 'url': 'https://en.wikipedia.org/wiki/Nick_Katz'}]"
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"zip_month_day_mathematicians_urls"
]
},
{
"cell_type": "markdown",
"id": "6d399eec-03d1-418a-a40a-7fa2ff6cbdb2",
"metadata": {},
"source": [
"### Navigating pages"
]
},
{
"cell_type": "code",
"execution_count": 30,
"id": "c4c6caeb-fc23-4209-9578-4f3c92e1642b",
"metadata": {},
"outputs": [],
"source": [
"class URLSelector(BaseSelector):\n",
" def __init__(self, urls, child):\n",
" self.urls = urls\n",
" self.child = child\n",
" \n",
" def scrape(self, scraper, parent, **kwargs):\n",
" result = []\n",
" \n",
" for url in self.urls:\n",
" scraper.get(url.format(**kwargs))\n",
" data = self.child.scrape(scraper, scraper, **kwargs)\n",
" \n",
" if type(data) == dict:\n",
" result.append(data)\n",
" elif type(data) == list:\n",
" result.extend(data)\n",
" \n",
" return result"
]
},
{
"cell_type": "code",
"execution_count": 31,
"id": "d2aed30f-7cdc-4044-8c3e-c45c8bfdb7ff",
"metadata": {},
"outputs": [],
"source": [
"month_names = calendar.month_name[1:]\n",
"days_in_month = [calendar.monthrange(2020, month)[1] for month in range(1, 13)]\n",
"months_and_days = list(zip(month_names, days_in_month))\n",
"\n",
"wikipedia_urls = [f\"https://en.wikipedia.org/wiki/{month}_{day+1}\" for (month, days) in months_and_days for day in range(days)]"
]
},
{
"cell_type": "code",
"execution_count": 32,
"id": "45f96196-0770-4ab0-b4d1-5e4f81b90f27",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['https://en.wikipedia.org/wiki/January_1',\n",
" 'https://en.wikipedia.org/wiki/January_2',\n",
" 'https://en.wikipedia.org/wiki/January_3',\n",
" 'https://en.wikipedia.org/wiki/January_4',\n",
" 'https://en.wikipedia.org/wiki/January_5']"
]
},
"execution_count": 32,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"wikipedia_urls[:5]"
]
},
{
"cell_type": "code",
"execution_count": 176,
"id": "213eb776-b9c9-4b94-b8d5-daee8c9067e7",
"metadata": {},
"outputs": [],
"source": [
"scraper = URLSelector(wikipedia_urls, zip_month_day_mathematicians_urls_selector)"
]
},
{
"cell_type": "markdown",
"id": "631c31d6-ee83-4c60-8128-147fc059d761",
"metadata": {},
"source": [
"## Running the scraper"
]
},
{
"cell_type": "code",
"execution_count": 177,
"id": "b431a578-b4c6-4e71-b83e-3123b9884025",
"metadata": {},
"outputs": [],
"source": [
"data = scraper.scrape(driver, driver)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "675b6f47-6a63-4875-999d-350e7f346c27",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[{'day': 1,\n",
" 'month': 1,\n",
" 'year': 1878,\n",
" 'name': 'Agner Krarup Erlang',\n",
" 'bio': 'Danish mathematician, statistician, and engineer (d. 1929)',\n",
" 'url': 'https://en.wikipedia.org/wiki/Agner_Krarup_Erlang'},\n",
" {'day': 1,\n",
" 'month': 1,\n",
" 'year': 1894,\n",
" 'name': 'Satyendra Nath Bose',\n",
" 'bio': 'Indian physicist and mathematician (d. 1974)',\n",
" 'url': 'https://en.wikipedia.org/wiki/Satyendra_Nath_Bose'},\n",
" {'day': 1,\n",
" 'month': 1,\n",
" 'year': 1905,\n",
" 'name': 'Stanisław Mazur',\n",
" 'bio': 'Ukrainian-Polish mathematician and theorist (d. 1981)',\n",
" 'url': 'https://en.wikipedia.org/wiki/Stanis%C5%82aw_Mazur'},\n",
" {'day': 1,\n",
" 'month': 1,\n",
" 'year': 1912,\n",
" 'name': 'Boris Vladimirovich Gnedenko',\n",
" 'bio': 'Russian mathematician and historian (d. 1995)',\n",
" 'url': 'https://en.wikipedia.org/wiki/Boris_Vladimirovich_Gnedenko'},\n",
" {'day': 2,\n",
" 'month': 1,\n",
" 'year': 1803,\n",
" 'name': 'Guglielmo Libri Carucci dalla Sommaja',\n",
" 'bio': 'Italian mathematician and academic (d. 1869)',\n",
" 'url': 'https://en.wikipedia.org/wiki/Guglielmo_Libri_Carucci_dalla_Sommaja'}]"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data[:5]"
]
},
{
"cell_type": "code",
"execution_count": 179,
"id": "0275f08b-ae19-45f3-af58-62c93ed13bef",
"metadata": {},
"outputs": [],
"source": [
"# Save to a JSON file\n",
"with open('data.json', 'w') as json_file:\n",
" json.dump(data, json_file)"
]
},
{
"cell_type": "code",
"execution_count": 37,
"id": "533a5a4f-56ad-4c5c-b842-43cc7c27e67e",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>day</th>\n",
" <th>month</th>\n",
" <th>year</th>\n",
" <th>name</th>\n",
" <th>bio</th>\n",
" <th>url</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1878</td>\n",
" <td>Agner Krarup Erlang</td>\n",
" <td>Danish mathematician, statistician, and engine...</td>\n",
" <td>https://en.wikipedia.org/wiki/Agner_Krarup_Erlang</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1894</td>\n",
" <td>Satyendra Nath Bose</td>\n",
" <td>Indian physicist and mathematician (d. 1974)</td>\n",
" <td>https://en.wikipedia.org/wiki/Satyendra_Nath_Bose</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1905</td>\n",
" <td>Stanisław Mazur</td>\n",
" <td>Ukrainian-Polish mathematician and theorist (d...</td>\n",
" <td>https://en.wikipedia.org/wiki/Stanis%C5%82aw_M...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1912</td>\n",
" <td>Boris Vladimirovich Gnedenko</td>\n",
" <td>Russian mathematician and historian (d. 1995)</td>\n",
" <td>https://en.wikipedia.org/wiki/Boris_Vladimirov...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>1803</td>\n",
" <td>Guglielmo Libri Carucci dalla Sommaja</td>\n",
" <td>Italian mathematician and academic (d. 1869)</td>\n",
" <td>https://en.wikipedia.org/wiki/Guglielmo_Libri_...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>833</th>\n",
" <td>28</td>\n",
" <td>12</td>\n",
" <td>1950</td>\n",
" <td>Clifford Cocks</td>\n",
" <td>English mathematician and cryptographer</td>\n",
" <td>https://en.wikipedia.org/wiki/Clifford_Cocks</td>\n",
" </tr>\n",
" <tr>\n",
" <th>834</th>\n",
" <td>29</td>\n",
" <td>12</td>\n",
" <td>1856</td>\n",
" <td>Thomas Joannes Stieltjes</td>\n",
" <td>Dutch-French mathematician and academic (d. 1894)</td>\n",
" <td>https://en.wikipedia.org/wiki/Thomas_Joannes_S...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>835</th>\n",
" <td>30</td>\n",
" <td>12</td>\n",
" <td>1944</td>\n",
" <td>Joseph Hilbe</td>\n",
" <td>American mathematician and philosopher (d. 2017)</td>\n",
" <td>https://en.wikipedia.org/wiki/Joseph_Hilbe</td>\n",
" </tr>\n",
" <tr>\n",
" <th>836</th>\n",
" <td>31</td>\n",
" <td>12</td>\n",
" <td>1714</td>\n",
" <td>Arima Yoriyuki</td>\n",
" <td>Japanese mathematician and educator (d. 1783)</td>\n",
" <td>https://en.wikipedia.org/wiki/Arima_Yoriyuki</td>\n",
" </tr>\n",
" <tr>\n",
" <th>837</th>\n",
" <td>31</td>\n",
" <td>12</td>\n",
" <td>1952</td>\n",
" <td>Vaughan Jones</td>\n",
" <td>New Zealand mathematician and academic (d. 2020)</td>\n",
" <td>https://en.wikipedia.org/wiki/Vaughan_Jones</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>838 rows × 6 columns</p>\n",
"</div>"
],
"text/plain": [
" day month year name \\\n",
"0 1 1 1878 Agner Krarup Erlang \n",
"1 1 1 1894 Satyendra Nath Bose \n",
"2 1 1 1905 Stanisław Mazur \n",
"3 1 1 1912 Boris Vladimirovich Gnedenko \n",
"4 2 1 1803 Guglielmo Libri Carucci dalla Sommaja \n",
".. ... ... ... ... \n",
"833 28 12 1950 Clifford Cocks \n",
"834 29 12 1856 Thomas Joannes Stieltjes \n",
"835 30 12 1944 Joseph Hilbe \n",
"836 31 12 1714 Arima Yoriyuki \n",
"837 31 12 1952 Vaughan Jones \n",
"\n",
" bio \\\n",
"0 Danish mathematician, statistician, and engine... \n",
"1 Indian physicist and mathematician (d. 1974) \n",
"2 Ukrainian-Polish mathematician and theorist (d... \n",
"3 Russian mathematician and historian (d. 1995) \n",
"4 Italian mathematician and academic (d. 1869) \n",
".. ... \n",
"833 English mathematician and cryptographer \n",
"834 Dutch-French mathematician and academic (d. 1894) \n",
"835 American mathematician and philosopher (d. 2017) \n",
"836 Japanese mathematician and educator (d. 1783) \n",
"837 New Zealand mathematician and academic (d. 2020) \n",
"\n",
" url \n",
"0 https://en.wikipedia.org/wiki/Agner_Krarup_Erlang \n",
"1 https://en.wikipedia.org/wiki/Satyendra_Nath_Bose \n",
"2 https://en.wikipedia.org/wiki/Stanis%C5%82aw_M... \n",
"3 https://en.wikipedia.org/wiki/Boris_Vladimirov... \n",
"4 https://en.wikipedia.org/wiki/Guglielmo_Libri_... \n",
".. ... \n",
"833 https://en.wikipedia.org/wiki/Clifford_Cocks \n",
"834 https://en.wikipedia.org/wiki/Thomas_Joannes_S... \n",
"835 https://en.wikipedia.org/wiki/Joseph_Hilbe \n",
"836 https://en.wikipedia.org/wiki/Arima_Yoriyuki \n",
"837 https://en.wikipedia.org/wiki/Vaughan_Jones \n",
"\n",
"[838 rows x 6 columns]"
]
},
"execution_count": 37,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = pd.DataFrame(data)\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 35,
"id": "26e52cf6-4831-4dc3-ad4d-378f1687699b",
"metadata": {},
"outputs": [],
"source": [
"df.to_csv('data.csv', index=False)"
]
},
{
"cell_type": "markdown",
"id": "2cf831d6-5de9-4730-a479-99ef003924c6",
"metadata": {},
"source": [
"## Creating the calendar"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "9abf5ef5-a6d4-467b-a5ea-ef23af7c7b4b",
"metadata": {},
"outputs": [],
"source": [
"# Create the calendar\n",
"cal = Calendar()\n",
"\n",
"# Iterate through the birthday data and add recurring events to the calendar\n",
"for person in data:\n",
" name = person['name']\n",
" birthdate = date(person['year'], person['month'], person['day'])\n",
" bio = person['bio']\n",
" url = person['url']\n",
"\n",
" # Create an event for the birthday with recurrence rule\n",
" event = Event()\n",
" event.add('summary', f\"{name}'s birthday\")\n",
" event.add('description', f\"{name} - {bio}\\n\\n{url}\")\n",
" event.add('dtstart', birthdate)\n",
" event.add('rrule', {'freq': 'yearly'})\n",
" event.add('url', url)\n",
" event.add('transp', 'TRANSPARENT') # Make events not show up as 'busy'\n",
"\n",
" cal.add_component(event)\n",
"\n",
"# Save the calendar to a file\n",
"with open('mathematicians_birthdays_calendar.ics', 'wb') as ics_file:\n",
" ics_file.write(cal.to_ical())"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "34488e12-e0cb-443e-82aa-8d982949520c",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.4"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment