Created
June 26, 2018 21:47
-
-
Save michellemho/2713b4c93aa6ea7df0ca995728fe35ba to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Downloading and uploading Waze Data" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [], | |
"source": [ | |
"import cartoframes\n", | |
"import requests\n", | |
"import re\n", | |
"import csv\n", | |
"from io import StringIO\n", | |
"import requests\n", | |
"\n", | |
"api_key = '****'\n", | |
"username = 'traffico-geographica'\n", | |
"download_file = 'nyc_waze_data_jams.csv'\n", | |
"q = \"COPY nyc_waze_data_jams (blockingalert_uuid, city, country, date, delay, endnode, georss_date, length, level, roadtype, speed, startnode, street, the_geom, turntype, type, uuid) TO stdout WITH (FORMAT csv, HEADER true)\"\n", | |
"\n", | |
"\n", | |
"# request the download, specifying desired file name\n", | |
"url = \"http://%s.carto.com/api/v2/sql/copyto\" % username\n", | |
"r = requests.get(url, params={'api_key': api_key, 'q': q, 'filename': download_file}, stream=True)\n", | |
"r.raise_for_status()\n", | |
"\n", | |
"# read save file name from response headers\n", | |
"d = r.headers['content-disposition']\n", | |
"savefilename = re.findall(\"filename=(.+)\", d)\n", | |
"\n", | |
"if len(savefilename) > 0:\n", | |
" with open(savefilename[0], 'wb') as handle:\n", | |
" for block in r.iter_content(1024):\n", | |
" handle.write(block)\n", | |
" print(\"Downloaded to: %s\" % savefilename)\n", | |
"else:\n", | |
" print(\"Error: could not find read file name from headers\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 447, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Create table in account:\n", | |
"# I renamed the_geom to geometry to avoid potential confusion...\n", | |
"\n", | |
"create_query = '''\n", | |
"CREATE TABLE nyc_waze_data_jams (\n", | |
" blockingalert_uuid text,\n", | |
" city text,\n", | |
" country text,\n", | |
" date timestamp,\n", | |
" delay integer,\n", | |
" endnode text, \n", | |
" georss_date timestamp,\n", | |
" length real,\n", | |
" level integer,\n", | |
" roadtype integer,\n", | |
" speed real,\n", | |
" startnode text,\n", | |
" street text,\n", | |
" geometry text,\n", | |
" turntype text,\n", | |
" type text,\n", | |
" uuid text\n", | |
");\n", | |
"\n", | |
"-- adds the 'cartodb_id' and 'the_geom_webmercator'\n", | |
"-- adds the required triggers and indexes\n", | |
"SELECT CDB_CartodbfyTable('michellemho-carto','nyc_waze_data_jams');\n", | |
"\n", | |
"'''" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 448, | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [], | |
"source": [ | |
"cc = cartoframes.CartoContext(base_url='https://{username}.carto.com'.format(username='michellemho-carto'), api_key='29a2786e7462e8cdab49674be25159fce8939048')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 449, | |
"metadata": { | |
"scrolled": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>cdb_cartodbfytable</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>nyc_waze_data_jams</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" cdb_cartodbfytable\n", | |
"0 nyc_waze_data_jams" | |
] | |
}, | |
"execution_count": 449, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"cc.query(create_query)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 446, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
"Empty DataFrame\n", | |
"Columns: []\n", | |
"Index: []" | |
] | |
}, | |
"execution_count": 446, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# Run only if you need to re-do the table creation!\n", | |
"# cc.query('DROP TABLE nyc_waze_data_jams')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 450, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Upload to account\n", | |
"\n", | |
"api_key = '****'\n", | |
"username = 'michellemho-carto'\n", | |
"upload_file = 'nyc_waze_data_jams.csv'\n", | |
"q = '''COPY nyc_waze_data_jams (blockingalert_uuid,city,country,date,delay,endnode,georss_date,length,level,roadtype,speed,startnode,street,geometry,turntype,type,uuid) FROM STDIN WITH (FORMAT csv, HEADER true)'''" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 454, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"url = \"http://%s.carto.com/api/v2/sql/copyfrom\" % 'michellemho-carto'" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 456, | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Success: 50000 rows imported\n", | |
"Success: 50000 rows imported\n", | |
"Success: 10961 rows imported\n" | |
] | |
} | |
], | |
"source": [ | |
"def do_something(batch):\n", | |
" r = requests.post(url, params={'api_key': api_key, 'q': q}, data=batch, stream=True)\n", | |
"\n", | |
" if r.status_code != 200:\n", | |
" print(r.text)\n", | |
" else:\n", | |
" status = r.json()\n", | |
" print(\"Success: %s rows imported\" % status['total_rows'])\n", | |
"\n", | |
"with open(upload_file, 'r') as csvfile:\n", | |
" reader = csv.reader(csvfile)\n", | |
" header = next(reader) # skip header\n", | |
"\n", | |
" batch_size = 50000\n", | |
" batch = ''\n", | |
" count = 0\n", | |
"\n", | |
" for row in reader:\n", | |
" if count >= batch_size:\n", | |
"# batch += '''\"'''\n", | |
"# print(batch)\n", | |
" temp_stream = StringIO(batch)\n", | |
" do_something(temp_stream)\n", | |
" batch = ''\n", | |
" count = 0\n", | |
" batch += '\\n' +'''\"''' + '\", \"'.join(row) + '''\"'''\n", | |
" count += 1\n", | |
" if batch:\n", | |
" temp_stream = StringIO(batch)\n", | |
" do_something(temp_stream)" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.4" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment