Skip to content

Instantly share code, notes, and snippets.

@lgaud
Last active November 20, 2023 09:29
Show Gist options
  • Save lgaud/d742d94cd653f205ecbe4d18fefc8ec2 to your computer and use it in GitHub Desktop.
Save lgaud/d742d94cd653f205ecbe4d18fefc8ec2 to your computer and use it in GitHub Desktop.
This is an example of importing a Notion database into Pandas, created for a blog post at https://www.pynotion.com/load-in-pandas
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"This is an example of importing a Notion database into Pandas, created for a blog post at https://www.pynotion.com/load-in-pandas\n",
"\n",
"Instructions to run:\n",
"1. Set up a Notion Integration and set the key to the NOTION_KEY environment variable.\n",
"2. Get the ID for a Notion database you'd like to load in Pandas and set notion_database_id in the cell below. \n",
" - The data analysis part of this sample uses a dataset of [Ted Talks from Kaggle](https://www.kaggle.com/datasets/miguelcorraljr/ted-talks-2022) which was imported into Notion from the CSV file for the purpose of showing some analysis on a larger dataset. It has over 5000 rows so it takes a couple minutes to load."
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"# Set your database id here:\n",
"notion_database_id = \"312c0bbdea67435b87761098c6a24f47\""
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import requests\n",
"from urllib.parse import urljoin\n",
"\n",
"class NotionClient():\n",
" def __init__(self, notion_key):\n",
" self.notion_key = notion_key\n",
" self.default_headers = {'Authorization': f\"Bearer {self.notion_key}\",\n",
" 'Content-Type': 'application/json', 'Notion-Version': '2022-06-28'}\n",
" self.session = requests.Session()\n",
" self.session.headers.update(self.default_headers)\n",
" self.NOTION_BASE_URL = \"https://api.notion.com/v1/\"\n",
" \n",
" def query_database(self, db_id, filter_object=None, sorts=None, start_cursor=None, page_size=None):\n",
" db_url = urljoin(self.NOTION_BASE_URL, f\"databases/{db_id}/query\")\n",
" params = {}\n",
" if filter_object is not None:\n",
" params[\"filter\"] = filter_object\n",
" if sorts is not None:\n",
" params[\"sorts\"] = sorts\n",
" if start_cursor is not None:\n",
" params[\"start_cursor\"] = start_cursor\n",
" if page_size is not None:\n",
" params[\"page_size\"] = page_size\n",
" \n",
" return self.session.post(db_url, json=params, headers=self.default_headers)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"from datetime import datetime\n",
"class PandasConverter():\n",
" text_types = [\"rich_text\", \"title\"] \n",
" \n",
" def response_to_records(self, db_response):\n",
" records = []\n",
" for result in db_response[\"results\"]:\n",
" records.append(self.get_record(result))\n",
" return records \n",
"\n",
" def get_record(self, result):\n",
" record = {}\n",
" for name in result[\"properties\"]:\n",
" if self.is_supported(result[\"properties\"][name]):\n",
" record[name] = self.get_property_value(result[\"properties\"][name])\n",
"\n",
" return record\n",
" \n",
" def is_supported(self, prop):\n",
" if prop.get(\"type\") in [\"checkbox\", \"date\", \"number\", \"rich_text\", \"title\"]:\n",
" return True\n",
" else:\n",
" return False\n",
" \n",
" def get_property_value(self, prop):\n",
" prop_type = prop.get(\"type\")\n",
" if prop_type in self.text_types:\n",
" return self.get_text(prop)\n",
" elif prop_type == \"date\":\n",
" return self.get_date(prop)\n",
" else:\n",
" return prop.get(prop_type)\n",
" \n",
" def get_text(self, text_object):\n",
" text = \"\"\n",
" text_type = text_object.get(\"type\")\n",
" for rt in text_object.get(text_type):\n",
" text += rt.get(\"plain_text\")\n",
" return text\n",
" \n",
" def get_date(self, date_object):\n",
" date_value = date_object.get(\"date\")\n",
" if date_value is not None:\n",
" if date_value.get(\"end\") is None:\n",
" return date_value.get(\"start\")\n",
" else: \n",
" start = datetime.fromisoformat(date_value.get(\"start\"))\n",
" end = datetime.fromisoformat(date_value.get(\"end\"))\n",
" return end - start\n",
" return None\n"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"class PandasLoader():\n",
" def __init__(self, notion_client, pandas_converter):\n",
" self.notion_client = notion_client\n",
" self.converter = pandas_converter\n",
" \n",
" def load_db(self, db_id):\n",
" page_count = 1\n",
" print(f\"Loading page {page_count}\")\n",
" db_response = self.notion_client.query_database(db_id)\n",
" records = []\n",
" if db_response.ok:\n",
" db_response_obj = db_response.json()\n",
" records.extend(self.converter.response_to_records(db_response_obj))\n",
" \n",
" while db_response_obj.get(\"has_more\"):\n",
" page_count += 1\n",
" print(f\"Loading page {page_count}\")\n",
" start_cursor = db_response_obj.get(\"next_cursor\")\n",
" db_response = self.notion_client.query_database(db_id, start_cursor=start_cursor)\n",
" if db_response.ok:\n",
" db_response_obj = db_response.json()\n",
" records.extend(self.converter.response_to_records(db_response_obj))\n",
" return pd.DataFrame(records)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Loading page 1\n",
"Loading page 2\n",
"Loading page 3\n",
"Loading page 4\n",
"Loading page 5\n",
"Loading page 6\n",
"Loading page 7\n",
"Loading page 8\n",
"Loading page 9\n",
"Loading page 10\n",
"Loading page 11\n",
"Loading page 12\n",
"Loading page 13\n",
"Loading page 14\n",
"Loading page 15\n",
"Loading page 16\n",
"Loading page 17\n",
"Loading page 18\n",
"Loading page 19\n",
"Loading page 20\n",
"Loading page 21\n",
"Loading page 22\n",
"Loading page 23\n",
"Loading page 24\n",
"Loading page 25\n",
"Loading page 26\n",
"Loading page 27\n",
"Loading page 28\n",
"Loading page 29\n",
"Loading page 30\n",
"Loading page 31\n",
"Loading page 32\n",
"Loading page 33\n",
"Loading page 34\n",
"Loading page 35\n",
"Loading page 36\n",
"Loading page 37\n",
"Loading page 38\n",
"Loading page 39\n",
"Loading page 40\n",
"Loading page 41\n",
"Loading page 42\n",
"Loading page 43\n",
"Loading page 44\n",
"Loading page 45\n",
"Loading page 46\n",
"Loading page 47\n",
"Loading page 48\n",
"Loading page 49\n",
"Loading page 50\n",
"Loading page 51\n",
"Loading page 52\n",
"Loading page 53\n",
"Loading page 54\n",
"Loading page 55\n",
"Loading page 56\n",
"Loading page 57\n",
"Loading page 58\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>speaker</th>\n",
" <th>published_date</th>\n",
" <th>views</th>\n",
" <th>likes</th>\n",
" <th>talk_id</th>\n",
" <th>recorded_date</th>\n",
" <th>event</th>\n",
" <th>duration</th>\n",
" <th>title</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Antonio Damasio</td>\n",
" <td>2011-12-19</td>\n",
" <td>2396029</td>\n",
" <td>71000</td>\n",
" <td>1308</td>\n",
" <td>2011-03-02</td>\n",
" <td>TED2011</td>\n",
" <td>1105</td>\n",
" <td>The quest to understand consciousness</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Farish Ahmad-Noor</td>\n",
" <td>2020-06-23</td>\n",
" <td>2074509</td>\n",
" <td>62000</td>\n",
" <td>64428</td>\n",
" <td>2019-09-28</td>\n",
" <td>TEDxNTU</td>\n",
" <td>729</td>\n",
" <td>Why is colonialism (still) romanticized?</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Harald Haas</td>\n",
" <td>2011-08-02</td>\n",
" <td>2769918</td>\n",
" <td>83000</td>\n",
" <td>1202</td>\n",
" <td>2011-07-13</td>\n",
" <td>TEDGlobal 2011</td>\n",
" <td>755</td>\n",
" <td>Wireless data from every light bulb</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Esther Ndichu</td>\n",
" <td>2018-05-04</td>\n",
" <td>87610</td>\n",
" <td>2600</td>\n",
" <td>13014</td>\n",
" <td>2015-09-02</td>\n",
" <td>TED@UPS</td>\n",
" <td>698</td>\n",
" <td>Hunger isn't a food issue. It's a logistics issue</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Paola Antonelli</td>\n",
" <td>2008-10-15</td>\n",
" <td>403983</td>\n",
" <td>12000</td>\n",
" <td>372</td>\n",
" <td>2007-12-12</td>\n",
" <td>EG 2007</td>\n",
" <td>1038</td>\n",
" <td>Design and the Elastic Mind</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5696</th>\n",
" <td>Martin Rees</td>\n",
" <td>2014-08-25</td>\n",
" <td>1315891</td>\n",
" <td>39000</td>\n",
" <td>2067</td>\n",
" <td>2014-03-13</td>\n",
" <td>TED2014</td>\n",
" <td>403</td>\n",
" <td>Can we prevent the end of the world?</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5697</th>\n",
" <td>Matilda Ho</td>\n",
" <td>2017-11-28</td>\n",
" <td>1522508</td>\n",
" <td>45000</td>\n",
" <td>3671</td>\n",
" <td>2017-04-24</td>\n",
" <td>TED2017</td>\n",
" <td>294</td>\n",
" <td>The future of good food in China</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5698</th>\n",
" <td>Stephen Bax</td>\n",
" <td>2019-02-22</td>\n",
" <td>8975610</td>\n",
" <td>269000</td>\n",
" <td>24485</td>\n",
" <td>2017-05-25</td>\n",
" <td>TED-Ed</td>\n",
" <td>262</td>\n",
" <td>The world's most mysterious book</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5699</th>\n",
" <td>Ethan Nadelmann</td>\n",
" <td>2014-11-12</td>\n",
" <td>2309479</td>\n",
" <td>69000</td>\n",
" <td>2130</td>\n",
" <td>2014-10-15</td>\n",
" <td>TEDGlobal 2014</td>\n",
" <td>1036</td>\n",
" <td>Why we need to end the War on Drugs</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5700</th>\n",
" <td>Simona Francese</td>\n",
" <td>2018-09-17</td>\n",
" <td>2611782</td>\n",
" <td>78000</td>\n",
" <td>20390</td>\n",
" <td>2018-04-10</td>\n",
" <td>TED2018</td>\n",
" <td>596</td>\n",
" <td>Your fingerprints reveal more than you think</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5701 rows × 9 columns</p>\n",
"</div>"
],
"text/plain": [
" speaker published_date views likes talk_id \\\n",
"0 Antonio Damasio 2011-12-19 2396029 71000 1308 \n",
"1 Farish Ahmad-Noor 2020-06-23 2074509 62000 64428 \n",
"2 Harald Haas 2011-08-02 2769918 83000 1202 \n",
"3 Esther Ndichu 2018-05-04 87610 2600 13014 \n",
"4 Paola Antonelli 2008-10-15 403983 12000 372 \n",
"... ... ... ... ... ... \n",
"5696 Martin Rees 2014-08-25 1315891 39000 2067 \n",
"5697 Matilda Ho 2017-11-28 1522508 45000 3671 \n",
"5698 Stephen Bax 2019-02-22 8975610 269000 24485 \n",
"5699 Ethan Nadelmann 2014-11-12 2309479 69000 2130 \n",
"5700 Simona Francese 2018-09-17 2611782 78000 20390 \n",
"\n",
" recorded_date event duration \\\n",
"0 2011-03-02 TED2011 1105 \n",
"1 2019-09-28 TEDxNTU 729 \n",
"2 2011-07-13 TEDGlobal 2011 755 \n",
"3 2015-09-02 TED@UPS 698 \n",
"4 2007-12-12 EG 2007 1038 \n",
"... ... ... ... \n",
"5696 2014-03-13 TED2014 403 \n",
"5697 2017-04-24 TED2017 294 \n",
"5698 2017-05-25 TED-Ed 262 \n",
"5699 2014-10-15 TEDGlobal 2014 1036 \n",
"5700 2018-04-10 TED2018 596 \n",
"\n",
" title \n",
"0 The quest to understand consciousness \n",
"1 Why is colonialism (still) romanticized? \n",
"2 Wireless data from every light bulb \n",
"3 Hunger isn't a food issue. It's a logistics issue \n",
"4 Design and the Elastic Mind \n",
"... ... \n",
"5696 Can we prevent the end of the world? \n",
"5697 The future of good food in China \n",
"5698 The world's most mysterious book \n",
"5699 Why we need to end the War on Drugs \n",
"5700 Your fingerprints reveal more than you think \n",
"\n",
"[5701 rows x 9 columns]"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import os\n",
"client = NotionClient(os.environ.get(\"NOTION_KEY\"))\n",
"converter = PandasConverter()\n",
"loader = PandasLoader(client, converter)\n",
"df = loader.load_db(notion_database_id)\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>views</th>\n",
" <th>likes</th>\n",
" <th>talk_id</th>\n",
" <th>duration</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>5.701000e+03</td>\n",
" <td>5.701000e+03</td>\n",
" <td>5701.000000</td>\n",
" <td>5701.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>2.147123e+06</td>\n",
" <td>6.386154e+04</td>\n",
" <td>26577.133135</td>\n",
" <td>706.021751</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>3.711874e+06</td>\n",
" <td>1.098655e+05</td>\n",
" <td>30530.857641</td>\n",
" <td>519.196825</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>4.560000e+02</td>\n",
" <td>1.300000e+01</td>\n",
" <td>1.000000</td>\n",
" <td>60.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>6.859920e+05</td>\n",
" <td>2.000000e+04</td>\n",
" <td>1660.000000</td>\n",
" <td>354.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>1.405696e+06</td>\n",
" <td>4.200000e+04</td>\n",
" <td>10368.000000</td>\n",
" <td>679.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>2.189406e+06</td>\n",
" <td>6.500000e+04</td>\n",
" <td>51783.000000</td>\n",
" <td>924.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>7.393596e+07</td>\n",
" <td>2.200000e+06</td>\n",
" <td>98843.000000</td>\n",
" <td>20404.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" views likes talk_id duration\n",
"count 5.701000e+03 5.701000e+03 5701.000000 5701.000000\n",
"mean 2.147123e+06 6.386154e+04 26577.133135 706.021751\n",
"std 3.711874e+06 1.098655e+05 30530.857641 519.196825\n",
"min 4.560000e+02 1.300000e+01 1.000000 60.000000\n",
"25% 6.859920e+05 2.000000e+04 1660.000000 354.000000\n",
"50% 1.405696e+06 4.200000e+04 10368.000000 679.000000\n",
"75% 2.189406e+06 6.500000e+04 51783.000000 924.000000\n",
"max 7.393596e+07 2.200000e+06 98843.000000 20404.000000"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.describe()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Data Analysis Example\n",
"This code assumes that your data comes from the [Ted Talks Dataset](https://www.kaggle.com/datasets/miguelcorraljr/ted-talks-2022) available from Kaggle. But you can adjust it to something you already have in Notion!"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"speaker_count = df.groupby('speaker')['speaker'].count()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"47"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"max_speaker_count = speaker_count.max()\n",
"max_speaker_count"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"scrolled": false
},
"outputs": [
{
"data": {
"text/plain": [
"<matplotlib.axes._subplots.AxesSubplot at 0x1e6dabcf8b0>"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 1080x720 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"speaker_count.plot(kind='hist', figsize=(15,10), bins=max_speaker_count)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"So the vast majority of Ted speakers have one talk but there's a long tail out to 47. Who are those prolific speakers?"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"speaker\n",
"Alex Gendler 47\n",
"Iseult Gillespie 37\n",
"Matt Walker 18\n",
"Elizabeth Cox 15\n",
"Alex Rosenthal 15\n",
" TED-Ed 15\n",
"Emma Bryce 12\n",
"Daniel Finkel 11\n",
"Jen Gunter 11\n",
"Juan Enriquez 11\n",
"Hans Rosling 9\n",
"Bill Gates 9\n",
"Wendy De La Rosa 9\n",
"Dan Finkel 9\n",
"Greg Gage 9\n",
"Mona Chalabi 9\n",
"Fabio Pacucci 8\n",
"Dan Kwartler 7\n",
"Al Gore 7\n",
"Marco Tempest 7\n",
"Name: speaker, dtype: int64"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"top_20_speakers = speaker_count.sort_values(ascending=False)[:20]\n",
"top_20_speakers"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<matplotlib.axes._subplots.AxesSubplot at 0x1e6dae25220>"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 1080x720 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"top_20_speakers.plot(kind='bar', figsize=(15,10))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"I looked up Alex Gendler, and it turns out he does a lot of something called [TED-Ed Lessons](https://ed.ted.com/search?qs=gendler), which seems to be different from a typical Ted talk. Let's exclude that event - we can filter it out by looking at the event column."
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array(['TED2011', 'TEDxNTU', 'TEDGlobal 2011', 'TED@UPS', 'EG 2007',\n",
" 'TEDWomen 2021', 'Sleeping with Science', 'TED2018', 'TEDxOgden',\n",
" 'TED2017', 'TEDxUSC', 'TED-Ed', 'Small Thing Big Idea', 'TED2007',\n",
" 'Countdown', 'TED2009', 'TEDxMidAtlantic', 'TED2019',\n",
" 'TEDMED 2009', 'TEDxStockholm', 'TEDGlobal 2009', 'TEDSummit',\n",
" 'TED1990', 'TEDGlobal 2017', 'TEDxMileHigh', 'TED@BCG Berlin',\n",
" 'TED2020 ', 'TED2016', 'TED Salon Zebra Technologies',\n",
" 'TED@BCG Paris', 'TED2013', 'TED@NAS', 'TEDxDoha',\n",
" 'TEDxManhattanBeach', 'TED@BCG Mumbai', 'TED in the Field',\n",
" 'TEDSummit 2019', 'TED Studio', 'TEDxAmazonia', 'TEDxBeaconStreet',\n",
" 'TEDxRiodelaPlata', 'TED@IBM', 'TEDGlobal 2005', 'TED Membership',\n",
" 'TED Residency', 'TED2002', 'TED Salon Brightline Initiative',\n",
" 'TED-Ed Weekend', 'TEDGlobal 2012', 'TED2004', 'TED@Unilever',\n",
" 'TED2014', 'TEDMED 2018', 'TEDGlobal 2013',\n",
" 'TED@BCG San Francisco', 'TEDxJacksonville', 'TEDMED 2020',\n",
" 'DLD 2007', 'TED2022', 'TED2010', 'TEDxLusakaStudio',\n",
" 'TEDWomen 2020', 'TEDMED 2016', 'TED Salon: Belonging', 'TED2008',\n",
" 'TEDIndia 2009', 'TEDxMaastricht', 'TEDxKakumaCamp',\n",
" 'TEDxCopenhagen', 'TEDCity2.0', 'Body Stuff with Dr. Jen Gunter',\n",
" 'TEDWomen 2010', 'TEDNYC', 'TEDxNASA', 'TED@BCG Milan',\n",
" 'The Way We Work', 'TEDxCambridgeSalon', 'TED2003',\n",
" 'TEDWomen 2019', 'We the Future',\n",
" 'TED Countdown New York Session 2022', 'TED2015', 'TEDMonterey',\n",
" 'TEDGlobal>London', 'TEDWomen 2015', 'TEDSalon Berlin 2014',\n",
" 'TEDGlobal 2014', 'TEDxAuckland', 'TEDxVienna', 'TEDxInnsbruck',\n",
" 'TEDxPhoenix', 'TEDxMarin', 'TEDxGroningen', 'TED2005', 'TEDxTeen',\n",
" 'TEDxBoston', 'TEDxCambridge', 'TEDxFrankfurt', 'TEDxJaffa 2012',\n",
" 'TED@NYC', 'TEDSalon NY2013', 'Toronto Youth Corps',\n",
" 'TED Talks Live', 'TED2012', 'TED@Tommy',\n",
" 'TEDxOhioStateUniversity', 'TEDSalon 2007 Hot Science',\n",
" 'TED@State Street Boston', 'TED2006', 'Skoll World Forum 2007',\n",
" 'TEDxBG', 'DIY Neuroscience', 'TEDWomen 2018', 'TEDxDelft',\n",
" 'TED@State Street London', 'TEDxPenn', 'TEDxUSU', 'TEDxTysons',\n",
" 'TEDxOilSpill', 'TEDGlobal 2010', 'TED Salon Border Stories',\n",
" 'TEDYouth 2013', 'Your Money and Your Mind', 'TEDxDanubia',\n",
" 'TEDxCSU', 'TED Salon: Education Everywhere', 'TEDxMadrid',\n",
" 'TEDxChange', 'TEDxEastVan', 'TED Salon Doha Debates',\n",
" 'TED Fellows: Shape Your Future', 'Taste3 2008', 'TEDxExeter',\n",
" 'TEDxOU', 'TEDxOxford', 'TEDxSanDiego', 'TED Salon Optum',\n",
" 'TEDxManchester', 'TEDWomen 2017', 'TEDxRainier',\n",
" 'TEDxBoston 2012', 'TEDxSantaClaraUniversity',\n",
" 'TEDxImperialCollege', 'TEDxNewYork', 'Mission Blue Voyage',\n",
" 'TEDxIndianapolis', 'TEDSalon NY2014', 'TEDxCannes',\n",
" 'TED Salon Novo Nordisk', 'TEDxCharlottesville',\n",
" 'Countdown Summit', 'TEDxMtHood', 'TEDxMaui', 'TEDxWrigleyville',\n",
" 'TEDxCanberra', 'TED Audio Collective', 'TEDxBerkeley',\n",
" 'TEDxDayton', 'TEDxNewcastle', 'TEDxUCincinnati', 'TED@Westpac',\n",
" 'TEDSalon NY2011', 'TED1998', 'TEDGlobal 2007', 'TEDMED 2012',\n",
" 'TEDxCordoba', 'TEDWomen 2016', 'TEDxPSU',\n",
" 'Business Innovation Factory', 'TEDxShimizu', 'TEDxSydney',\n",
" 'TED@DuPont', 'TEDxGeorgetown', 'TEDxSingapore', 'TEDxZurich 2012',\n",
" 'TEDxWarwick', 'TEDxCERN', 'TEDxKids@Brussels', 'TEDxStanford',\n",
" 'TEDxRockville', 'TEDGlobal>Geneva', 'TED@BCG London',\n",
" 'TEDxDublin', 'TED2001', 'TEDxStCloud', 'TEDxUppsalaUniversity',\n",
" 'TEDxCreativeCoast', 'TEDxHousesOfParliament',\n",
" 'Carnegie Mellon University', 'TEDWomen 2013', 'EG 2008',\n",
" 'TEDxParis 2012', 'TEDxSFU', 'New York State Senate',\n",
" 'TEDActive 2014', 'TEDxCornellU', 'TED@Cannes',\n",
" 'TED Salon Verizon', 'TEDxGateway', 'TEDxWomen 2011',\n",
" 'TED@BCG Toronto', 'TEDxToronto 2010', 'TEDxSummit',\n",
" 'TEDxDeExtinction', 'TEDxLondon', 'TEDxLinnaeusUniversity',\n",
" 'TEDxOslo', 'TED@Merck KGaA, Darmstadt, Germany',\n",
" 'TEDxBinghamtonUniversity', 'Am I Normal? with Mona Chalabi',\n",
" 'TEDxUCLA', 'TEDxYouth@Manchester', 'TEDSalon NY2012',\n",
" 'TEDSalon London Fall 2012', 'TEDxVancouver', 'TED@WellsFargo',\n",
" 'TEDxYouth@Beaconstreet', 'TEDxDirigo', 'TEDxCaltech',\n",
" 'TEDxUCDavis', 'TEDxPortland', 'TEDxMidAtlantic 2013',\n",
" 'Torchbearers', 'TEDxKlagenfurt', 'TEDxZurich',\n",
" 'TED Salon American Family Insurance', 'TEDxLondonWomen',\n",
" 'TEDxYouth@München', 'TEDxAthens', 'TEDxTarragona',\n",
" 'TEDYouth 2014', 'TEDxManhattan', 'TEDxVermilionStreet',\n",
" 'TED Talks Education', 'TEDxRoma', 'TEDMED 2013', 'TED Connects',\n",
" 'TEDxPurdueU', 'TEDxToulouse', 'TED@BCG', 'TEDxVirginiaTech',\n",
" 'TEDxSMCC', 'TEDxKids@Ambleside', 'TED@State', 'TEDxAustin',\n",
" 'TEDxBerlin', 'TED Talks India', 'TEDxBoston 2011',\n",
" 'TEDxSaltLakeCity', 'TEDxSanFrancisco', 'TEDxGatewayWomen',\n",
" 'TEDActive 2011', 'TEDxSeattleU', 'TEDxKC', 'TEDxMileHighWomen',\n",
" 'TEDxGöteborg 2010', 'TEDxCHUV', 'TEDMED 2011',\n",
" 'TED Fellows Retreat 2013', 'TEDMED 2017',\n",
" 'TEDxUniversityofNevada', 'LIFT 2007', 'TEDxBrussels',\n",
" 'TEDxOrangeCoast', 'TED@BCG Singapore', 'TED@Intel',\n",
" 'TEDxPennsylvaniaAvenue', '2015', 'TEDxAtlanta', 'TEDxToronto',\n",
" 'TEDxOshkosh', 'TEDxSeoul', 'TED Legacy Project',\n",
" 'TEDxHopeCollege', 'TED Salon DWEN',\n",
" 'In the Green: The Business of Climate Action', 'TEDMED 2015',\n",
" 'TED Salon U.S. Air Force', 'TEDxWanChai', 'TED Masterclass',\n",
" 'TEDxSanFranciscoSalon', 'TEDxChristchurch', 'TEDxUND',\n",
" 'TED Salon UNICEF', 'TEDxDubai', 'TEDxColumbus', 'TEDxKrakow',\n",
" 'TEDxEast', 'TEDxDebrecenUniversity', 'TED Talks India: Nayi Baat',\n",
" 'Build Back Better', 'TEDSalon London Spring 2011', 'TEDxSeattle',\n",
" 'TEDYouth 2011', 'TEDMED 2014', 'Serious Play 2008',\n",
" 'TEDxToronto 2011', 'TEDxPerth', 'TEDxNextGenerationAsheville',\n",
" 'TEDxSoMa', 'TEDxUofT', 'TEDxNashville', 'TEDxNorrkoping',\n",
" 'TEDxUofM', 'TED Fellows Retreat 2015',\n",
" 'Justice with Michael Sandel', 'TEDxRosario 2017',\n",
" 'TEDActive 2015', 'TEDxSMU', 'TEDxGlasgow', 'TEDxPeachtree',\n",
" 'TEDxNYED', 'Chautauqua Institution', 'TEDxOakParkWomen',\n",
" 'TEDxYouth@Sydney', 'TEDSalon London 2010', 'TEDMED 2010',\n",
" 'TEDxSandhillsCommunityCollege',\n",
" 'How to Deal with Difficult Feelings', 'TEDxBloomington',\n",
" 'TED Dialogues', 'TEDxLondonBusinessSchool',\n",
" 'TED Salon Bezos Scholars', 'TEDxTokyo', 'TEDxStLouisWomen',\n",
" 'TED Salon The Macallan', 'TED en Español en NYC', 'TEDxYYC',\n",
" 'TEDxUniversityofRochester', 'TEDxSurrey', 'SoulPancake',\n",
" 'INK Conference', 'Full Spectrum Auditions', 'TEDxLeicester',\n",
" 'TEDSalon London Spring 2012', 'TEDxBeirut',\n",
" 'TEDxHampshireCollege', 'TEDxBristol', 'TED@Bangalore',\n",
" 'DICE Summit 2010', 'TEDxOrlando', 'TEDxZurich 2013', 'TEDxSBU',\n",
" 'TEDxFiDiWomen', 'TEDxSalem', 'TEDxKyoto',\n",
" 'Countdown Global Livestream', 'TEDxUF', 'TEDGlobal>NYC',\n",
" 'TEDxBend', 'TEDxVeniceBeach', 'TEDxCoventGardenWomen',\n",
" 'TEDxIslay', 'TEDxSeattleWomen', 'TEDxLSU', 'TEDSalon London 2009',\n",
" 'TEDSalon 2006', 'Monday.com', 'TEDxGalway', 'TEDxMiamiUniversity',\n",
" 'TEDxVCU', 'Design Matters with Debbie Millman',\n",
" 'TEDxColoradoSprings', 'The Hartford', 'TEDxOakland',\n",
" 'TEDxPittsburgh', 'TEDGlobalLondon', 'Princeton University',\n",
" 'TEDxMemphis', 'Far Flung', 'TEDYouth 2015', 'TEDxSanQuentin',\n",
" 'TEDxParramatta', 'TED Salon Belonging',\n",
" 'TEDSalon 2009 Compassion', 'TEDxMICA', 'TEDxAmsterdamWomen',\n",
" 'TEDxThessaloniki', 'TEDxBerkleeValencia', 'TEDxHogeschoolUtrecht',\n",
" 'TEDxMidAtlantic 2017', 'TEDxHouston', 'Mission Blue II',\n",
" 'TED Salon: Border Stories', 'TEDxPasadena', 'TEDxFoggyBottom',\n",
" 'TEDxBoulder', 'TEDxSanMigueldeAllende', 'TEDxYouth@CEHS',\n",
" 'TED@Nairobi', 'TEDPrize@UN', 'TEDxSydneySalon', 'TEDxParis 2010',\n",
" 'TEDxLeuvenSalon', 'TEDxCMU', 'TEDxPlaceDesNations',\n",
" 'TED Salon UNDP', 'TEDxFondduLac', 'TED@PMI', 'TEDxBasel',\n",
" 'TED Fellows 2015', 'TEDxEuston', 'TEDxDetroit', 'TEDxBoston 2010',\n",
" 'TEDxSiouxFalls', 'TEDActive 2013', 'TEDxValladolid',\n",
" 'TEDxCuauhtémoc', 'TEDxColbyCollege', 'TEDxZurich 2011',\n",
" 'TEDxMinneapolis', 'TEDxSussexUniversity', 'TEDxUM',\n",
" 'TEDxVillanovaU', 'TED Salon: Radical Craft', 'Bowery Poetry Club',\n",
" 'TEDxNaperville', 'TEDxGreatPacificGarbagePatch',\n",
" 'The TED Interview', 'TEDxHull', 'Arbejdsglaede Live', 'TEDxUMKC',\n",
" 'TEDxSouthBank', 'TEDxNSU', 'TEDxOrcasIsland', 'TEDxUFM',\n",
" 'TEDxCUNY', 'TEDxBoulder 2011', 'TEDxNorthwesternU',\n",
" 'Web 2.0 Expo 2008', 'TEDxUniversityofMississippi', 'TEDxCU',\n",
" 'Global Witness', 'TED@New York', 'TEDxMidwest', 'TEDxAmsterdam',\n",
" 'TEDLagos Ideas Search', 'TEDxParis', 'TEDxHultLondon',\n",
" 'TEDxWomen 2012', 'TEDxABQ', 'TEDxBratislava',\n",
" 'TEDxRotterdam 2010', 'TEDxMidAtlanticSalon', 'TEDxMalvern',\n",
" 'TEDxSaintThomas', 'TEDxBoston 2009', 'TEDxEindhoven',\n",
" 'TEDxBrighton', 'TEDxStLouis', 'TEDxConcordiaUPortland',\n",
" 'RSA Animate', 'Gel Conference', 'TEDxWinnipeg',\n",
" 'TED Salon Samsung', 'TEDxBrisbane', 'TEDxGoodenoughCollege',\n",
" 'TEDxRiga', 'ZigZag', 'TEDxDU 2010', 'TEDxRosario', 'TEDxLeuven',\n",
" 'Stanford University', 'TEDxStormontWomen', 'TEDxMuncyStatePrison',\n",
" 'TEDxProvincetown', 'TEDxBoise', 'TEDxYorkU', 'TEDxCrenshaw',\n",
" 'TEDxMonashUniversityMalaysia', 'TEDxSF',\n",
" 'TED Salon Education Everywhere', 'TEDxHerndon',\n",
" 'TED Salon Dell Technologies', 'TED1984', 'TEDxSalford',\n",
" 'TEDxDU 2011', 'TEDxUTAustin', 'TEDxMIT',\n",
" 'TEDSalon London Fall 2011', 'TEDxUIdaho', 'TED@SXSWi',\n",
" 'TEDSalon 2008', 'TEDxColumbusWomen', 'TEDxMalagueta',\n",
" 'TEDxUWLaCrosse', 'TEDxMet', 'TEDxWellington', 'TEDxNantes',\n",
" 'TEDxNijmegen', 'TEDxDePaulUniversity', 'Handheld Learning',\n",
" 'TEDxFIU', 'TEDxGhent', 'TEDxIEMadrid', 'TEDxWaterloo',\n",
" 'Fort Worth City Council', 'TEDxMcMinnville',\n",
" '2021 US Presidential Inauguration', 'University of California',\n",
" 'TEDxEastEnd', 'AORN Congress', \"TEDxO'Porto\",\n",
" 'TEDxGoldenGatePark 2012', 'TEDxNashvilleSalon', 'TEDxRC2',\n",
" 'TEDxUWA', 'TEDxPitic', 'TEDxNASA@SiliconValley', 'TEDxChapmanU',\n",
" 'TEDxFargo', 'TEDxBahiaBlanca', 'TED@Johannesburg',\n",
" 'TEDxSantaCruz', 'TEDxGatewayArch', 'TEDxNashvilleWomen',\n",
" 'Harvard University', 'TEDxLivoniaCCLibrary', 'TEDxPortofSpain',\n",
" 'TEDxJaffa 2013', 'TEDxUSN', 'TEDxTemecula', 'TED@MotorCity',\n",
" 'TEDxUIUC', 'WorkLife with Adam Grant', 'TEDxTufts', 'TEDxJHU',\n",
" 'TEDxCreightonU', 'TEDxUofSC', 'TEDxBroadway',\n",
" 'TED Global Idea Search 2021', 'TEDxArendal',\n",
" 'TEDxCambridgeUniversity', 'TEDxCaFoscariU', 'TEDxLincolnSquare',\n",
" 'TEDxClearBrookHighSchool', 'The Do Lectures',\n",
" 'Elizabeth G. Anderson School', 'TEDxTelAviv 2010',\n",
" 'taken for granted', 'TEDxPuget Sound', 'TEDxBellevue', 'TEDxNewy',\n",
" 'TEDxTC', 'TEDxCesena', 'TEDxAmoskeagMillyard', 'TEDxTinHau',\n",
" 'TEDxUofW', 'TEDxSuffolkUniversity', 'TEDxEQChCh',\n",
" 'TEDxCapeTownWomen', 'TEDxObserver', 'TEDxRapidCity',\n",
" 'TEDxJacksonHole', 'TEDxSkoll', 'TEDxBariloche', 'TEDxProvidence',\n",
" 'TEDxEdmonton', 'TEDxMonterey', 'TEDxKeene', 'TEDxQueensU',\n",
" 'TEDxSnoIsleLibraries', 'TEDxAustinWomen', 'TED1994', 'TEDxSSE',\n",
" 'TEDxGuangzhou', 'TED and Minderoo Foundation',\n",
" 'TEDNairobi Ideas Search', 'TEDxTaipei', 'TEDxAsheville',\n",
" 'TEDxCapeMay', \"Eric Whitacre's Virtual Choir\", 'TEDxGrandRapids',\n",
" 'TEDxUCSD', 'TEDxOmaha', 'TEDxDesMoines', 'TEDYouth 2012',\n",
" 'TED@London', 'TEDxIndianaUniversity', 'TEDxNatick',\n",
" 'TEDxWitsUniversity', 'TEDxConnecticutCollege', 'TED Prize Wish',\n",
" 'World Science Festival', 'TEDxFergusonLibrary',\n",
" 'TEDxCoconutGrove', 'TEDxSHHS', 'TEDxUTFSM', 'TEDx SHORTS',\n",
" 'TEDxUCIrvine', 'TEDxUW', 'Checking In with Susan David',\n",
" 'TEDSalon NY2015', 'TEDxConcorde', 'TEDxYouth@Bath',\n",
" 'TEDxEasthamptonWomen', 'TEDxMonroeCorrectionalComplex',\n",
" 'TEDxAmericanRiviera', 'Michael Howard Studios',\n",
" 'TEDxBeaconStreetSalon', 'American Family Insurance',\n",
" 'TEDxWalthamED', 'Conversations with People Who Hate Me',\n",
" 'TEDxTimberlaneSchools', 'TEDxHelvetia', 'TEDxHamburg', 'BBC TV',\n",
" 'TEDxMIA', 'TEDxMontreal', 'TEDxTAMUSalon', 'TEDxUdeM',\n",
" 'TEDxFulbrightDublin', 'TEDxBerkshires', '', 'TEDxSiliconValley',\n",
" 'TEDxSonomaCounty', 'TEDxBocaRaton', 'TEDxCherryCreekWomen',\n",
" 'TEDxSavannah', 'TEDxUCL', 'TEDxReus', 'Currently',\n",
" 'TEDxAlbertopolis', 'TEDxPennQuarter', 'TEDxSanJoseCA',\n",
" 'NextGen:Charity', 'TEDxAarhus', 'TEDxCalzadaDeLosHéroes',\n",
" 'Royal Institution', 'TEDxUniversityofGlasgow',\n",
" 'TED Senior Fellows at TEDGlobal 2010', 'TEDxClaremontColleges',\n",
" 'TEDxPaloAlto', 'TEDxParcDuCinquantenaire', 'TEDxUGA',\n",
" 'TEDxYouth@Valladolid', 'TEDxUHasselt', 'TEDxVictoria'],\n",
" dtype=object)"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['event'].unique()"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"speaker\n",
"Matt Walker 18\n",
"Jen Gunter 11\n",
"Juan Enriquez 11\n",
"Hans Rosling 9\n",
"Wendy De La Rosa 9\n",
"Mona Chalabi 9\n",
"Bill Gates 9\n",
"Greg Gage 8\n",
"Marco Tempest 7\n",
"Al Gore 7\n",
"Michael Green 6\n",
"Rives 6\n",
"Jacqueline Novogratz 6\n",
"Dan Ariely 6\n",
"Barry Schwartz 5\n",
"Paola Antonelli 5\n",
"Julian Treasure 5\n",
"Kristen Bell + Giant Ant 5\n",
"Clay Shirky 5\n",
"A.J. Jacobs 5\n",
"Name: speaker, dtype: int64"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"not_ted_ed = df[df['event'] != 'TED-Ed']\n",
"not_ted_ed_speaker_count = not_ted_ed.groupby('speaker')['speaker'].count()\n",
"not_ted_ed_top_20_speakers = not_ted_ed_speaker_count.sort_values(ascending=False)[:20]\n",
"not_ted_ed_top_20_speakers"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<matplotlib.axes._subplots.AxesSubplot at 0x1e6daea3a90>"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 1080x720 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"not_ted_ed_speaker_count.plot(kind='hist', figsize=(15,10), bins=not_ted_ed_speaker_count.max())"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<matplotlib.axes._subplots.AxesSubplot at 0x1e6db3c37c0>"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 1080x720 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"not_ted_ed_top_20_speakers.plot(kind='bar', figsize=(15,10))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"So the most prolific Ted speaker, excluding Ted-Ed content, is sleep scientist [Matt Walker](https://www.ted.com/speakers/matthew_walker)."
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.3"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment