Last active
February 16, 2019 11:25
-
-
Save QuantumDamage/01f66d55caca2aade3627ffe7505e593 to your computer and use it in GitHub Desktop.
Jakość powietrza w Polsce #4 – zróbmy sobie własną bazę SQLite
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "import requests\nimport pandas as pd\nimport sqlite3\nfrom tqdm import tqdm", | |
"execution_count": 1, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "# Kod 4\nr = requests.get('http://api.gios.gov.pl/pjp-api/rest/station/findAll')\nstations = pd.io.json.json_normalize(r.json())", | |
"execution_count": 2, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"scrolled": true, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "print(stations.shape)", | |
"execution_count": 3, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": "(178, 10)\n", | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "# you have to create path to db manually\nconn = sqlite3.connect('../output/aqip.db')\nc = conn.cursor()", | |
"execution_count": 4, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "# Kod 1\n# stations\ncreate_query = '''\nCREATE TABLE IF NOT EXISTS stations (\nid integer PRIMARY KEY, \naddressStreet text, \ncity_commune_communeName text,\ncity_commune_districtName text,\ncity_commune_provinceName text,\ncity_id integer,\ncity_name text,\ngegrLat real,\ngegrLon real,\nstationName text)\n'''\nc.execute(create_query)", | |
"execution_count": 5, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 5, | |
"data": { | |
"text/plain": "<sqlite3.Cursor at 0x7facfdc0eab0>" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "# Kod 2\n# sensors\ncreate_query = '''\nCREATE TABLE IF NOT EXISTS sensors (\nid integer PRIMARY KEY,\nparam_idParam integer,\nparam_paramCode text,\nparam_paramFormula text,\nparam_paramName text, \nstationId integer)\n'''\nc.execute(create_query)", | |
"execution_count": 6, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 6, | |
"data": { | |
"text/plain": "<sqlite3.Cursor at 0x7facfdc0eab0>" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "# Kod 3\n# readings\ncreate_query = '''\nCREATE TABLE IF NOT EXISTS readings (\nid integer,\ndatetime text,\nvalue real,\nPRIMARY KEY (id, datetime))\n'''\nc.execute(create_query)", | |
"execution_count": 7, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 7, | |
"data": { | |
"text/plain": "<sqlite3.Cursor at 0x7facfdc0eab0>" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "# Kod 5\nfor index, station in stations.iterrows():\n c.execute(\"INSERT OR REPLACE INTO stations VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)\",\n ([station[\"id\"], \n station[\"addressStreet\"],\n station[\"city.commune.communeName\"],\n station[\"city.commune.districtName\"],\n station[\"city.commune.provinceName\"], \n station[\"city.id\"], \n station[\"city.name\"], \n station[\"gegrLat\"], \n station[\"gegrLon\"], \n station[\"stationName\"]]))\nconn.commit()", | |
"execution_count": 8, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "# Kod 6\nfor station_id in tqdm(pd.read_sql_query(\"select id from stations\", conn).values):\n station_id = station_id[0]\n #print(station_id)\n r = requests.get('http://api.gios.gov.pl/pjp-api/rest/station/sensors/' + str(station_id))\n sensors = pd.io.json.json_normalize(r.json())\n\n for index, sensor in sensors.iterrows():\n #print(sensor)\n c.execute(\"INSERT OR REPLACE INTO sensors VALUES (?, ?, ?, ?, ?, ?)\", \n ([sensor[\"id\"], \n sensor[\"param.idParam\"],\n sensor[\"param.paramCode\"],\n sensor[\"param.paramFormula\"],\n sensor[\"param.paramName\"], \n sensor[\"stationId\"]]))\n conn.commit()\n #break", | |
"execution_count": 9, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": "100%|██████████| 179/179 [00:46<00:00, 3.85it/s]\n", | |
"name": "stderr" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "# Kod 7\nlengths = []\nfor sensor_id in tqdm(pd.read_sql_query(\"select id from sensors\", conn).values):\n sensor_id = sensor_id[0]\n #print(sensor_id)\n r = requests.get('http://api.gios.gov.pl/pjp-api/rest/data/getData/' + str(sensor_id))\n readings = pd.io.json.json_normalize(r.json())\n \n readingsFrame = pd.DataFrame()\n \n readingsFrame[\"datetime\"] = [d[u'date'] for d in readings[\"values\"].values.item()]\n readingsFrame[\"value\"] = [d[u'value'] for d in readings[\"values\"].values.item()]\n \n length = len(readingsFrame)\n lengths.append(length)\n #if length == 0:\n # print(sensor_id)\n \n for index, reading in readingsFrame.iterrows():\n #print(reading[\"datetime\"])\n #print(reading[\"value\"])\n \n c.execute(\"INSERT OR REPLACE INTO readings VALUES (?, ?, ?)\", ([int(sensor_id), \n reading[\"datetime\"],\n reading[\"value\"],\n ]))\n conn.commit()\n #break", | |
"execution_count": 10, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": "100%|██████████| 715/715 [04:22<00:00, 2.73it/s]\n", | |
"name": "stderr" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "pd.Series(lengths).value_counts()", | |
"execution_count": 11, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 11, | |
"data": { | |
"text/plain": "60 693\n0 21\n61 1\ndtype: int64" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "### Reading data from database" | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "%%time\n# Kod 8\n\n\nquery = \"\"\"\nSELECT readings.id as sensor_id, datetime, value, param_paramFormula, gegrLat, gegrLon, stationId as station_id\nFROM readings, sensors, stations\nWHERE readings.id = sensors.id\n AND sensors.stationId = stations.id\n\"\"\"\n\ngios_data = pd.read_sql_query(query, conn)\n\ngios_data.sort_values(by = [\"sensor_id\", \"datetime\"], inplace=True)", | |
"execution_count": 13, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": "CPU times: user 469 ms, sys: 21.2 ms, total: 490 ms\nWall time: 534 ms\n", | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "gios_data.head()", | |
"execution_count": 14, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 14, | |
"data": { | |
"text/plain": " sensor_id datetime value param_paramFormula gegrLat \\\n23 88 2019-02-13 01:00:00 15.0055 NO2 50.972167 \n22 88 2019-02-13 02:00:00 11.3674 NO2 50.972167 \n21 88 2019-02-13 03:00:00 10.1511 NO2 50.972167 \n20 88 2019-02-13 04:00:00 10.5415 NO2 50.972167 \n19 88 2019-02-13 05:00:00 12.2634 NO2 50.972167 \n\n gegrLon station_id \n23 14.941319 14 \n22 14.941319 14 \n21 14.941319 14 \n20 14.941319 14 \n19 14.941319 14 ", | |
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>sensor_id</th>\n <th>datetime</th>\n <th>value</th>\n <th>param_paramFormula</th>\n <th>gegrLat</th>\n <th>gegrLon</th>\n <th>station_id</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>23</th>\n <td>88</td>\n <td>2019-02-13 01:00:00</td>\n <td>15.0055</td>\n <td>NO2</td>\n <td>50.972167</td>\n <td>14.941319</td>\n <td>14</td>\n </tr>\n <tr>\n <th>22</th>\n <td>88</td>\n <td>2019-02-13 02:00:00</td>\n <td>11.3674</td>\n <td>NO2</td>\n <td>50.972167</td>\n <td>14.941319</td>\n <td>14</td>\n </tr>\n <tr>\n <th>21</th>\n <td>88</td>\n <td>2019-02-13 03:00:00</td>\n <td>10.1511</td>\n <td>NO2</td>\n <td>50.972167</td>\n <td>14.941319</td>\n <td>14</td>\n </tr>\n <tr>\n <th>20</th>\n <td>88</td>\n <td>2019-02-13 04:00:00</td>\n <td>10.5415</td>\n <td>NO2</td>\n <td>50.972167</td>\n <td>14.941319</td>\n <td>14</td>\n </tr>\n <tr>\n <th>19</th>\n <td>88</td>\n <td>2019-02-13 05:00:00</td>\n <td>12.2634</td>\n <td>NO2</td>\n <td>50.972167</td>\n <td>14.941319</td>\n <td>14</td>\n </tr>\n </tbody>\n</table>\n</div>" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "gios_data.tail()", | |
"execution_count": 15, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 15, | |
"data": { | |
"text/plain": " sensor_id datetime value param_paramFormula gegrLat \\\n58240 20916 2019-02-16 08:00:00 133.9090 PM2.5 51.808662 \n58239 20916 2019-02-16 09:00:00 164.0960 PM2.5 51.808662 \n58238 20916 2019-02-16 10:00:00 120.0040 PM2.5 51.808662 \n58237 20916 2019-02-16 11:00:00 65.6273 PM2.5 51.808662 \n58236 20916 2019-02-16 12:00:00 25.7272 PM2.5 51.808662 \n\n gegrLon station_id \n58240 15.708193 11295 \n58239 15.708193 11295 \n58238 15.708193 11295 \n58237 15.708193 11295 \n58236 15.708193 11295 ", | |
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>sensor_id</th>\n <th>datetime</th>\n <th>value</th>\n <th>param_paramFormula</th>\n <th>gegrLat</th>\n <th>gegrLon</th>\n <th>station_id</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>58240</th>\n <td>20916</td>\n <td>2019-02-16 08:00:00</td>\n <td>133.9090</td>\n <td>PM2.5</td>\n <td>51.808662</td>\n <td>15.708193</td>\n <td>11295</td>\n </tr>\n <tr>\n <th>58239</th>\n <td>20916</td>\n <td>2019-02-16 09:00:00</td>\n <td>164.0960</td>\n <td>PM2.5</td>\n <td>51.808662</td>\n <td>15.708193</td>\n <td>11295</td>\n </tr>\n <tr>\n <th>58238</th>\n <td>20916</td>\n <td>2019-02-16 10:00:00</td>\n <td>120.0040</td>\n <td>PM2.5</td>\n <td>51.808662</td>\n <td>15.708193</td>\n <td>11295</td>\n </tr>\n <tr>\n <th>58237</th>\n <td>20916</td>\n <td>2019-02-16 11:00:00</td>\n <td>65.6273</td>\n <td>PM2.5</td>\n <td>51.808662</td>\n <td>15.708193</td>\n <td>11295</td>\n </tr>\n <tr>\n <th>58236</th>\n <td>20916</td>\n <td>2019-02-16 12:00:00</td>\n <td>25.7272</td>\n <td>PM2.5</td>\n <td>51.808662</td>\n <td>15.708193</td>\n <td>11295</td>\n </tr>\n </tbody>\n</table>\n</div>" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "", | |
"execution_count": null, | |
"outputs": [] | |
} | |
], | |
"metadata": { | |
"_draft": { | |
"nbviewer_url": "https://gist.github.com/01f66d55caca2aade3627ffe7505e593" | |
}, | |
"gist": { | |
"id": "01f66d55caca2aade3627ffe7505e593", | |
"data": { | |
"description": "Jakość powietrza w Polsce #4 – zróbmy sobie własną bazę SQLite", | |
"public": true | |
} | |
}, | |
"kernelspec": { | |
"name": "conda-env-jakbadacdane.pl-py", | |
"display_name": "Python [conda env:jakbadacdane.pl]", | |
"language": "python" | |
}, | |
"language_info": { | |
"name": "python", | |
"version": "3.6.7", | |
"mimetype": "text/x-python", | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"pygments_lexer": "ipython3", | |
"nbconvert_exporter": "python", | |
"file_extension": ".py" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment