Skip to content

Instantly share code, notes, and snippets.

@simonw
Created July 17, 2020 20:25
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save simonw/bca9f55391607056dc75ea77b6124be1 to your computer and use it in GitHub Desktop.
Save simonw/bca9f55391607056dc75ea77b6124be1 to your computer and use it in GitHub Desktop.
Using Datasette with Pandas
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Datasette and Pandas"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"url = \"https://covid-19.datasettes.com/covid/latest_ny_times_counties_with_populations.csv?_stream=on&_size=max\""
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"import pandas"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>date</th>\n",
" <th>county</th>\n",
" <th>state</th>\n",
" <th>fips</th>\n",
" <th>cases</th>\n",
" <th>deaths</th>\n",
" <th>population</th>\n",
" <th>deaths_per_million</th>\n",
" <th>cases_per_million</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2020-07-16</td>\n",
" <td>Trousdale</td>\n",
" <td>Tennessee</td>\n",
" <td>47169</td>\n",
" <td>1528</td>\n",
" <td>6</td>\n",
" <td>11284</td>\n",
" <td>531</td>\n",
" <td>135412</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2020-07-16</td>\n",
" <td>Lake</td>\n",
" <td>Tennessee</td>\n",
" <td>47095</td>\n",
" <td>698</td>\n",
" <td>0</td>\n",
" <td>7016</td>\n",
" <td>0</td>\n",
" <td>99486</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2020-07-16</td>\n",
" <td>Lee</td>\n",
" <td>Arkansas</td>\n",
" <td>5077</td>\n",
" <td>854</td>\n",
" <td>5</td>\n",
" <td>8857</td>\n",
" <td>564</td>\n",
" <td>96420</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2020-07-16</td>\n",
" <td>Dakota</td>\n",
" <td>Nebraska</td>\n",
" <td>31043</td>\n",
" <td>1864</td>\n",
" <td>41</td>\n",
" <td>20026</td>\n",
" <td>2047</td>\n",
" <td>93078</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2020-07-16</td>\n",
" <td>Buena Vista</td>\n",
" <td>Iowa</td>\n",
" <td>19021</td>\n",
" <td>1745</td>\n",
" <td>12</td>\n",
" <td>19620</td>\n",
" <td>611</td>\n",
" <td>88939</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3078</th>\n",
" <td>2020-07-16</td>\n",
" <td>Summers</td>\n",
" <td>West Virginia</td>\n",
" <td>54089</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>12573</td>\n",
" <td>0</td>\n",
" <td>159</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3079</th>\n",
" <td>2020-07-16</td>\n",
" <td>Blaine</td>\n",
" <td>Montana</td>\n",
" <td>30005</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>6681</td>\n",
" <td>0</td>\n",
" <td>149</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3080</th>\n",
" <td>2020-07-16</td>\n",
" <td>Grant</td>\n",
" <td>Oregon</td>\n",
" <td>41023</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>7199</td>\n",
" <td>0</td>\n",
" <td>138</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3081</th>\n",
" <td>2020-07-16</td>\n",
" <td>Lee</td>\n",
" <td>Kentucky</td>\n",
" <td>21129</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>7403</td>\n",
" <td>0</td>\n",
" <td>135</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3082</th>\n",
" <td>2020-07-16</td>\n",
" <td>Webster</td>\n",
" <td>West Virginia</td>\n",
" <td>54101</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>8114</td>\n",
" <td>0</td>\n",
" <td>123</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>3083 rows × 9 columns</p>\n",
"</div>"
],
"text/plain": [
" date county state fips cases deaths \\\n",
"0 2020-07-16 Trousdale Tennessee 47169 1528 6 \n",
"1 2020-07-16 Lake Tennessee 47095 698 0 \n",
"2 2020-07-16 Lee Arkansas 5077 854 5 \n",
"3 2020-07-16 Dakota Nebraska 31043 1864 41 \n",
"4 2020-07-16 Buena Vista Iowa 19021 1745 12 \n",
"... ... ... ... ... ... ... \n",
"3078 2020-07-16 Summers West Virginia 54089 2 0 \n",
"3079 2020-07-16 Blaine Montana 30005 1 0 \n",
"3080 2020-07-16 Grant Oregon 41023 1 0 \n",
"3081 2020-07-16 Lee Kentucky 21129 1 0 \n",
"3082 2020-07-16 Webster West Virginia 54101 1 0 \n",
"\n",
" population deaths_per_million cases_per_million \n",
"0 11284 531 135412 \n",
"1 7016 0 99486 \n",
"2 8857 564 96420 \n",
"3 20026 2047 93078 \n",
"4 19620 611 88939 \n",
"... ... ... ... \n",
"3078 12573 0 159 \n",
"3079 6681 0 149 \n",
"3080 7199 0 138 \n",
"3081 7403 0 135 \n",
"3082 8114 0 123 \n",
"\n",
"[3083 rows x 9 columns]"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pandas.read_csv(url)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"sql = \"\"\"\n",
"select\n",
" date,\n",
" county,\n",
" state,\n",
" fips,\n",
" cases,\n",
" deaths,\n",
" population,\n",
" deaths_per_million,\n",
" cases_per_million\n",
"from\n",
" latest_ny_times_counties_with_populations\n",
"where\n",
" state = 'Texas'\n",
"order by\n",
" cases_per_million desc\n",
"\"\"\""
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"from urllib.parse import urlencode"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"url = \"https://covid-19.datasettes.com/covid.csv?\" + urlencode({\n",
" \"sql\": sql\n",
"})"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>date</th>\n",
" <th>county</th>\n",
" <th>state</th>\n",
" <th>fips</th>\n",
" <th>cases</th>\n",
" <th>deaths</th>\n",
" <th>population</th>\n",
" <th>deaths_per_million</th>\n",
" <th>cases_per_million</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2020-07-16</td>\n",
" <td>Moore</td>\n",
" <td>Texas</td>\n",
" <td>48341</td>\n",
" <td>924</td>\n",
" <td>15</td>\n",
" <td>20940</td>\n",
" <td>716</td>\n",
" <td>44126</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2020-07-16</td>\n",
" <td>Walker</td>\n",
" <td>Texas</td>\n",
" <td>48471</td>\n",
" <td>2513</td>\n",
" <td>28</td>\n",
" <td>72971</td>\n",
" <td>383</td>\n",
" <td>34438</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2020-07-16</td>\n",
" <td>Anderson</td>\n",
" <td>Texas</td>\n",
" <td>48001</td>\n",
" <td>1956</td>\n",
" <td>4</td>\n",
" <td>57735</td>\n",
" <td>69</td>\n",
" <td>33878</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2020-07-16</td>\n",
" <td>Titus</td>\n",
" <td>Texas</td>\n",
" <td>48449</td>\n",
" <td>1030</td>\n",
" <td>6</td>\n",
" <td>32750</td>\n",
" <td>183</td>\n",
" <td>31450</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2020-07-16</td>\n",
" <td>Jones</td>\n",
" <td>Texas</td>\n",
" <td>48253</td>\n",
" <td>608</td>\n",
" <td>0</td>\n",
" <td>20083</td>\n",
" <td>0</td>\n",
" <td>30274</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>244</th>\n",
" <td>2020-07-16</td>\n",
" <td>Kinney</td>\n",
" <td>Texas</td>\n",
" <td>48271</td>\n",
" <td>4</td>\n",
" <td>0</td>\n",
" <td>3667</td>\n",
" <td>0</td>\n",
" <td>1090</td>\n",
" </tr>\n",
" <tr>\n",
" <th>245</th>\n",
" <td>2020-07-16</td>\n",
" <td>Foard</td>\n",
" <td>Texas</td>\n",
" <td>48155</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1155</td>\n",
" <td>0</td>\n",
" <td>865</td>\n",
" </tr>\n",
" <tr>\n",
" <th>246</th>\n",
" <td>2020-07-16</td>\n",
" <td>Hall</td>\n",
" <td>Texas</td>\n",
" <td>48191</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>2964</td>\n",
" <td>337</td>\n",
" <td>674</td>\n",
" </tr>\n",
" <tr>\n",
" <th>247</th>\n",
" <td>2020-07-16</td>\n",
" <td>Coleman</td>\n",
" <td>Texas</td>\n",
" <td>48083</td>\n",
" <td>5</td>\n",
" <td>0</td>\n",
" <td>8175</td>\n",
" <td>0</td>\n",
" <td>611</td>\n",
" </tr>\n",
" <tr>\n",
" <th>248</th>\n",
" <td>2020-07-16</td>\n",
" <td>Baylor</td>\n",
" <td>Texas</td>\n",
" <td>48023</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>3509</td>\n",
" <td>0</td>\n",
" <td>284</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>249 rows × 9 columns</p>\n",
"</div>"
],
"text/plain": [
" date county state fips cases deaths population \\\n",
"0 2020-07-16 Moore Texas 48341 924 15 20940 \n",
"1 2020-07-16 Walker Texas 48471 2513 28 72971 \n",
"2 2020-07-16 Anderson Texas 48001 1956 4 57735 \n",
"3 2020-07-16 Titus Texas 48449 1030 6 32750 \n",
"4 2020-07-16 Jones Texas 48253 608 0 20083 \n",
".. ... ... ... ... ... ... ... \n",
"244 2020-07-16 Kinney Texas 48271 4 0 3667 \n",
"245 2020-07-16 Foard Texas 48155 1 0 1155 \n",
"246 2020-07-16 Hall Texas 48191 2 1 2964 \n",
"247 2020-07-16 Coleman Texas 48083 5 0 8175 \n",
"248 2020-07-16 Baylor Texas 48023 1 0 3509 \n",
"\n",
" deaths_per_million cases_per_million \n",
"0 716 44126 \n",
"1 383 34438 \n",
"2 69 33878 \n",
"3 183 31450 \n",
"4 0 30274 \n",
".. ... ... \n",
"244 0 1090 \n",
"245 0 865 \n",
"246 337 674 \n",
"247 0 611 \n",
"248 0 284 \n",
"\n",
"[249 rows x 9 columns]"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pandas.read_csv(url)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.3"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment