Skip to content

Instantly share code, notes, and snippets.

@QuantumDamage
Last active February 16, 2019 11:25
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save QuantumDamage/01f66d55caca2aade3627ffe7505e593 to your computer and use it in GitHub Desktop.
Save QuantumDamage/01f66d55caca2aade3627ffe7505e593 to your computer and use it in GitHub Desktop.
Jakość powietrza w Polsce #4 – zróbmy sobie własną bazę SQLite
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "import requests\nimport pandas as pd\nimport sqlite3\nfrom tqdm import tqdm",
"execution_count": 1,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "# Kod 4\nr = requests.get('http://api.gios.gov.pl/pjp-api/rest/station/findAll')\nstations = pd.io.json.json_normalize(r.json())",
"execution_count": 2,
"outputs": []
},
{
"metadata": {
"scrolled": true,
"trusted": true
},
"cell_type": "code",
"source": "print(stations.shape)",
"execution_count": 3,
"outputs": [
{
"output_type": "stream",
"text": "(178, 10)\n",
"name": "stdout"
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "# you have to create path to db manually\nconn = sqlite3.connect('../output/aqip.db')\nc = conn.cursor()",
"execution_count": 4,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "# Kod 1\n# stations\ncreate_query = '''\nCREATE TABLE IF NOT EXISTS stations (\nid integer PRIMARY KEY, \naddressStreet text, \ncity_commune_communeName text,\ncity_commune_districtName text,\ncity_commune_provinceName text,\ncity_id integer,\ncity_name text,\ngegrLat real,\ngegrLon real,\nstationName text)\n'''\nc.execute(create_query)",
"execution_count": 5,
"outputs": [
{
"output_type": "execute_result",
"execution_count": 5,
"data": {
"text/plain": "<sqlite3.Cursor at 0x7facfdc0eab0>"
},
"metadata": {}
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "# Kod 2\n# sensors\ncreate_query = '''\nCREATE TABLE IF NOT EXISTS sensors (\nid integer PRIMARY KEY,\nparam_idParam integer,\nparam_paramCode text,\nparam_paramFormula text,\nparam_paramName text, \nstationId integer)\n'''\nc.execute(create_query)",
"execution_count": 6,
"outputs": [
{
"output_type": "execute_result",
"execution_count": 6,
"data": {
"text/plain": "<sqlite3.Cursor at 0x7facfdc0eab0>"
},
"metadata": {}
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "# Kod 3\n# readings\ncreate_query = '''\nCREATE TABLE IF NOT EXISTS readings (\nid integer,\ndatetime text,\nvalue real,\nPRIMARY KEY (id, datetime))\n'''\nc.execute(create_query)",
"execution_count": 7,
"outputs": [
{
"output_type": "execute_result",
"execution_count": 7,
"data": {
"text/plain": "<sqlite3.Cursor at 0x7facfdc0eab0>"
},
"metadata": {}
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "# Kod 5\nfor index, station in stations.iterrows():\n c.execute(\"INSERT OR REPLACE INTO stations VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)\",\n ([station[\"id\"], \n station[\"addressStreet\"],\n station[\"city.commune.communeName\"],\n station[\"city.commune.districtName\"],\n station[\"city.commune.provinceName\"], \n station[\"city.id\"], \n station[\"city.name\"], \n station[\"gegrLat\"], \n station[\"gegrLon\"], \n station[\"stationName\"]]))\nconn.commit()",
"execution_count": 8,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "# Kod 6\nfor station_id in tqdm(pd.read_sql_query(\"select id from stations\", conn).values):\n station_id = station_id[0]\n #print(station_id)\n r = requests.get('http://api.gios.gov.pl/pjp-api/rest/station/sensors/' + str(station_id))\n sensors = pd.io.json.json_normalize(r.json())\n\n for index, sensor in sensors.iterrows():\n #print(sensor)\n c.execute(\"INSERT OR REPLACE INTO sensors VALUES (?, ?, ?, ?, ?, ?)\", \n ([sensor[\"id\"], \n sensor[\"param.idParam\"],\n sensor[\"param.paramCode\"],\n sensor[\"param.paramFormula\"],\n sensor[\"param.paramName\"], \n sensor[\"stationId\"]]))\n conn.commit()\n #break",
"execution_count": 9,
"outputs": [
{
"output_type": "stream",
"text": "100%|██████████| 179/179 [00:46<00:00, 3.85it/s]\n",
"name": "stderr"
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "# Kod 7\nlengths = []\nfor sensor_id in tqdm(pd.read_sql_query(\"select id from sensors\", conn).values):\n sensor_id = sensor_id[0]\n #print(sensor_id)\n r = requests.get('http://api.gios.gov.pl/pjp-api/rest/data/getData/' + str(sensor_id))\n readings = pd.io.json.json_normalize(r.json())\n \n readingsFrame = pd.DataFrame()\n \n readingsFrame[\"datetime\"] = [d[u'date'] for d in readings[\"values\"].values.item()]\n readingsFrame[\"value\"] = [d[u'value'] for d in readings[\"values\"].values.item()]\n \n length = len(readingsFrame)\n lengths.append(length)\n #if length == 0:\n # print(sensor_id)\n \n for index, reading in readingsFrame.iterrows():\n #print(reading[\"datetime\"])\n #print(reading[\"value\"])\n \n c.execute(\"INSERT OR REPLACE INTO readings VALUES (?, ?, ?)\", ([int(sensor_id), \n reading[\"datetime\"],\n reading[\"value\"],\n ]))\n conn.commit()\n #break",
"execution_count": 10,
"outputs": [
{
"output_type": "stream",
"text": "100%|██████████| 715/715 [04:22<00:00, 2.73it/s]\n",
"name": "stderr"
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "pd.Series(lengths).value_counts()",
"execution_count": 11,
"outputs": [
{
"output_type": "execute_result",
"execution_count": 11,
"data": {
"text/plain": "60 693\n0 21\n61 1\ndtype: int64"
},
"metadata": {}
}
]
},
{
"metadata": {},
"cell_type": "markdown",
"source": "### Reading data from database"
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "%%time\n# Kod 8\n\n\nquery = \"\"\"\nSELECT readings.id as sensor_id, datetime, value, param_paramFormula, gegrLat, gegrLon, stationId as station_id\nFROM readings, sensors, stations\nWHERE readings.id = sensors.id\n AND sensors.stationId = stations.id\n\"\"\"\n\ngios_data = pd.read_sql_query(query, conn)\n\ngios_data.sort_values(by = [\"sensor_id\", \"datetime\"], inplace=True)",
"execution_count": 13,
"outputs": [
{
"output_type": "stream",
"text": "CPU times: user 469 ms, sys: 21.2 ms, total: 490 ms\nWall time: 534 ms\n",
"name": "stdout"
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "gios_data.head()",
"execution_count": 14,
"outputs": [
{
"output_type": "execute_result",
"execution_count": 14,
"data": {
"text/plain": " sensor_id datetime value param_paramFormula gegrLat \\\n23 88 2019-02-13 01:00:00 15.0055 NO2 50.972167 \n22 88 2019-02-13 02:00:00 11.3674 NO2 50.972167 \n21 88 2019-02-13 03:00:00 10.1511 NO2 50.972167 \n20 88 2019-02-13 04:00:00 10.5415 NO2 50.972167 \n19 88 2019-02-13 05:00:00 12.2634 NO2 50.972167 \n\n gegrLon station_id \n23 14.941319 14 \n22 14.941319 14 \n21 14.941319 14 \n20 14.941319 14 \n19 14.941319 14 ",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>sensor_id</th>\n <th>datetime</th>\n <th>value</th>\n <th>param_paramFormula</th>\n <th>gegrLat</th>\n <th>gegrLon</th>\n <th>station_id</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>23</th>\n <td>88</td>\n <td>2019-02-13 01:00:00</td>\n <td>15.0055</td>\n <td>NO2</td>\n <td>50.972167</td>\n <td>14.941319</td>\n <td>14</td>\n </tr>\n <tr>\n <th>22</th>\n <td>88</td>\n <td>2019-02-13 02:00:00</td>\n <td>11.3674</td>\n <td>NO2</td>\n <td>50.972167</td>\n <td>14.941319</td>\n <td>14</td>\n </tr>\n <tr>\n <th>21</th>\n <td>88</td>\n <td>2019-02-13 03:00:00</td>\n <td>10.1511</td>\n <td>NO2</td>\n <td>50.972167</td>\n <td>14.941319</td>\n <td>14</td>\n </tr>\n <tr>\n <th>20</th>\n <td>88</td>\n <td>2019-02-13 04:00:00</td>\n <td>10.5415</td>\n <td>NO2</td>\n <td>50.972167</td>\n <td>14.941319</td>\n <td>14</td>\n </tr>\n <tr>\n <th>19</th>\n <td>88</td>\n <td>2019-02-13 05:00:00</td>\n <td>12.2634</td>\n <td>NO2</td>\n <td>50.972167</td>\n <td>14.941319</td>\n <td>14</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"metadata": {}
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "gios_data.tail()",
"execution_count": 15,
"outputs": [
{
"output_type": "execute_result",
"execution_count": 15,
"data": {
"text/plain": " sensor_id datetime value param_paramFormula gegrLat \\\n58240 20916 2019-02-16 08:00:00 133.9090 PM2.5 51.808662 \n58239 20916 2019-02-16 09:00:00 164.0960 PM2.5 51.808662 \n58238 20916 2019-02-16 10:00:00 120.0040 PM2.5 51.808662 \n58237 20916 2019-02-16 11:00:00 65.6273 PM2.5 51.808662 \n58236 20916 2019-02-16 12:00:00 25.7272 PM2.5 51.808662 \n\n gegrLon station_id \n58240 15.708193 11295 \n58239 15.708193 11295 \n58238 15.708193 11295 \n58237 15.708193 11295 \n58236 15.708193 11295 ",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>sensor_id</th>\n <th>datetime</th>\n <th>value</th>\n <th>param_paramFormula</th>\n <th>gegrLat</th>\n <th>gegrLon</th>\n <th>station_id</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>58240</th>\n <td>20916</td>\n <td>2019-02-16 08:00:00</td>\n <td>133.9090</td>\n <td>PM2.5</td>\n <td>51.808662</td>\n <td>15.708193</td>\n <td>11295</td>\n </tr>\n <tr>\n <th>58239</th>\n <td>20916</td>\n <td>2019-02-16 09:00:00</td>\n <td>164.0960</td>\n <td>PM2.5</td>\n <td>51.808662</td>\n <td>15.708193</td>\n <td>11295</td>\n </tr>\n <tr>\n <th>58238</th>\n <td>20916</td>\n <td>2019-02-16 10:00:00</td>\n <td>120.0040</td>\n <td>PM2.5</td>\n <td>51.808662</td>\n <td>15.708193</td>\n <td>11295</td>\n </tr>\n <tr>\n <th>58237</th>\n <td>20916</td>\n <td>2019-02-16 11:00:00</td>\n <td>65.6273</td>\n <td>PM2.5</td>\n <td>51.808662</td>\n <td>15.708193</td>\n <td>11295</td>\n </tr>\n <tr>\n <th>58236</th>\n <td>20916</td>\n <td>2019-02-16 12:00:00</td>\n <td>25.7272</td>\n <td>PM2.5</td>\n <td>51.808662</td>\n <td>15.708193</td>\n <td>11295</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"metadata": {}
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "",
"execution_count": null,
"outputs": []
}
],
"metadata": {
"_draft": {
"nbviewer_url": "https://gist.github.com/01f66d55caca2aade3627ffe7505e593"
},
"gist": {
"id": "01f66d55caca2aade3627ffe7505e593",
"data": {
"description": "Jakość powietrza w Polsce #4 – zróbmy sobie własną bazę SQLite",
"public": true
}
},
"kernelspec": {
"name": "conda-env-jakbadacdane.pl-py",
"display_name": "Python [conda env:jakbadacdane.pl]",
"language": "python"
},
"language_info": {
"name": "python",
"version": "3.6.7",
"mimetype": "text/x-python",
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"pygments_lexer": "ipython3",
"nbconvert_exporter": "python",
"file_extension": ".py"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment