Skip to content

Instantly share code, notes, and snippets.

@michellemho
Created June 26, 2018 21:47
Show Gist options
  • Save michellemho/2713b4c93aa6ea7df0ca995728fe35ba to your computer and use it in GitHub Desktop.
Save michellemho/2713b4c93aa6ea7df0ca995728fe35ba to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Downloading and uploading Waze Data"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"import cartoframes\n",
"import requests\n",
"import re\n",
"import csv\n",
"from io import StringIO\n",
"import requests\n",
"\n",
"api_key = '****'\n",
"username = 'traffico-geographica'\n",
"download_file = 'nyc_waze_data_jams.csv'\n",
"q = \"COPY nyc_waze_data_jams (blockingalert_uuid, city, country, date, delay, endnode, georss_date, length, level, roadtype, speed, startnode, street, the_geom, turntype, type, uuid) TO stdout WITH (FORMAT csv, HEADER true)\"\n",
"\n",
"\n",
"# request the download, specifying desired file name\n",
"url = \"http://%s.carto.com/api/v2/sql/copyto\" % username\n",
"r = requests.get(url, params={'api_key': api_key, 'q': q, 'filename': download_file}, stream=True)\n",
"r.raise_for_status()\n",
"\n",
"# read save file name from response headers\n",
"d = r.headers['content-disposition']\n",
"savefilename = re.findall(\"filename=(.+)\", d)\n",
"\n",
"if len(savefilename) > 0:\n",
" with open(savefilename[0], 'wb') as handle:\n",
" for block in r.iter_content(1024):\n",
" handle.write(block)\n",
" print(\"Downloaded to: %s\" % savefilename)\n",
"else:\n",
" print(\"Error: could not find read file name from headers\")"
]
},
{
"cell_type": "code",
"execution_count": 447,
"metadata": {},
"outputs": [],
"source": [
"# Create table in account:\n",
"# I renamed the_geom to geometry to avoid potential confusion...\n",
"\n",
"create_query = '''\n",
"CREATE TABLE nyc_waze_data_jams (\n",
" blockingalert_uuid text,\n",
" city text,\n",
" country text,\n",
" date timestamp,\n",
" delay integer,\n",
" endnode text, \n",
" georss_date timestamp,\n",
" length real,\n",
" level integer,\n",
" roadtype integer,\n",
" speed real,\n",
" startnode text,\n",
" street text,\n",
" geometry text,\n",
" turntype text,\n",
" type text,\n",
" uuid text\n",
");\n",
"\n",
"-- adds the 'cartodb_id' and 'the_geom_webmercator'\n",
"-- adds the required triggers and indexes\n",
"SELECT CDB_CartodbfyTable('michellemho-carto','nyc_waze_data_jams');\n",
"\n",
"'''"
]
},
{
"cell_type": "code",
"execution_count": 448,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"cc = cartoframes.CartoContext(base_url='https://{username}.carto.com'.format(username='michellemho-carto'), api_key='29a2786e7462e8cdab49674be25159fce8939048')"
]
},
{
"cell_type": "code",
"execution_count": 449,
"metadata": {
"scrolled": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>cdb_cartodbfytable</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>nyc_waze_data_jams</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" cdb_cartodbfytable\n",
"0 nyc_waze_data_jams"
]
},
"execution_count": 449,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cc.query(create_query)"
]
},
{
"cell_type": "code",
"execution_count": 446,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"Empty DataFrame\n",
"Columns: []\n",
"Index: []"
]
},
"execution_count": 446,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Run only if you need to re-do the table creation!\n",
"# cc.query('DROP TABLE nyc_waze_data_jams')"
]
},
{
"cell_type": "code",
"execution_count": 450,
"metadata": {},
"outputs": [],
"source": [
"# Upload to account\n",
"\n",
"api_key = '****'\n",
"username = 'michellemho-carto'\n",
"upload_file = 'nyc_waze_data_jams.csv'\n",
"q = '''COPY nyc_waze_data_jams (blockingalert_uuid,city,country,date,delay,endnode,georss_date,length,level,roadtype,speed,startnode,street,geometry,turntype,type,uuid) FROM STDIN WITH (FORMAT csv, HEADER true)'''"
]
},
{
"cell_type": "code",
"execution_count": 454,
"metadata": {},
"outputs": [],
"source": [
"url = \"http://%s.carto.com/api/v2/sql/copyfrom\" % 'michellemho-carto'"
]
},
{
"cell_type": "code",
"execution_count": 456,
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Success: 50000 rows imported\n",
"Success: 50000 rows imported\n",
"Success: 10961 rows imported\n"
]
}
],
"source": [
"def do_something(batch):\n",
" r = requests.post(url, params={'api_key': api_key, 'q': q}, data=batch, stream=True)\n",
"\n",
" if r.status_code != 200:\n",
" print(r.text)\n",
" else:\n",
" status = r.json()\n",
" print(\"Success: %s rows imported\" % status['total_rows'])\n",
"\n",
"with open(upload_file, 'r') as csvfile:\n",
" reader = csv.reader(csvfile)\n",
" header = next(reader) # skip header\n",
"\n",
" batch_size = 50000\n",
" batch = ''\n",
" count = 0\n",
"\n",
" for row in reader:\n",
" if count >= batch_size:\n",
"# batch += '''\"'''\n",
"# print(batch)\n",
" temp_stream = StringIO(batch)\n",
" do_something(temp_stream)\n",
" batch = ''\n",
" count = 0\n",
" batch += '\\n' +'''\"''' + '\", \"'.join(row) + '''\"'''\n",
" count += 1\n",
" if batch:\n",
" temp_stream = StringIO(batch)\n",
" do_something(temp_stream)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment