michellemho/Batch COPY.ipynb

## Batch COPY.ipynb
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Downloading and uploading Waze Data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "import cartoframes\n",
    "import requests\n",
    "import re\n",
    "import csv\n",
    "from io import StringIO\n",
    "import requests\n",
    "\n",
    "api_key = '****'\n",
    "username = 'traffico-geographica'\n",
    "download_file = 'nyc_waze_data_jams.csv'\n",
    "q = \"COPY nyc_waze_data_jams (blockingalert_uuid, city, country, date, delay, endnode, georss_date, length, level, roadtype, speed, startnode, street, the_geom, turntype, type, uuid) TO stdout WITH (FORMAT csv, HEADER true)\"\n",
    "\n",
    "\n",
    "# request the download, specifying desired file name\n",
    "url = \"http://%s.carto.com/api/v2/sql/copyto\" % username\n",
    "r = requests.get(url, params={'api_key': api_key, 'q': q, 'filename': download_file}, stream=True)\n",
    "r.raise_for_status()\n",
    "\n",
    "# read save file name from response headers\n",
    "d = r.headers['content-disposition']\n",
    "savefilename = re.findall(\"filename=(.+)\", d)\n",
    "\n",
    "if len(savefilename) > 0:\n",
    "    with open(savefilename[0], 'wb') as handle:\n",
    "        for block in r.iter_content(1024):\n",
    "            handle.write(block)\n",
    "    print(\"Downloaded to: %s\" % savefilename)\n",
    "else:\n",
    "    print(\"Error: could not find read file name from headers\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 447,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create table in account:\n",
    "# I renamed the_geom to geometry to avoid potential confusion...\n",
    "\n",
    "create_query = '''\n",
    "CREATE TABLE nyc_waze_data_jams (\n",
    "    blockingalert_uuid text,\n",
    "    city text,\n",
    "    country text,\n",
    "    date timestamp,\n",
    "    delay integer,\n",
    "    endnode text, \n",
    "    georss_date timestamp,\n",
    "    length real,\n",
    "    level integer,\n",
    "    roadtype integer,\n",
    "    speed real,\n",
    "    startnode text,\n",
    "    street text,\n",
    "    geometry text,\n",
    "    turntype text,\n",
    "    type text,\n",
    "    uuid text\n",
    ");\n",
    "\n",
    "-- adds the 'cartodb_id' and 'the_geom_webmercator'\n",
    "-- adds the required triggers and indexes\n",
    "SELECT CDB_CartodbfyTable('michellemho-carto','nyc_waze_data_jams');\n",
    "\n",
    "'''"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 448,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "cc = cartoframes.CartoContext(base_url='https://{username}.carto.com'.format(username='michellemho-carto'), api_key='29a2786e7462e8cdab49674be25159fce8939048')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 449,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>cdb_cartodbfytable</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>nyc_waze_data_jams</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   cdb_cartodbfytable\n",
       "0  nyc_waze_data_jams"
      ]
     },
     "execution_count": 449,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cc.query(create_query)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 446,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "Empty DataFrame\n",
       "Columns: []\n",
       "Index: []"
      ]
     },
     "execution_count": 446,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Run only if you need to re-do the table creation!\n",
    "# cc.query('DROP TABLE nyc_waze_data_jams')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 450,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Upload to account\n",
    "\n",
    "api_key = '****'\n",
    "username = 'michellemho-carto'\n",
    "upload_file = 'nyc_waze_data_jams.csv'\n",
    "q = '''COPY nyc_waze_data_jams (blockingalert_uuid,city,country,date,delay,endnode,georss_date,length,level,roadtype,speed,startnode,street,geometry,turntype,type,uuid) FROM STDIN WITH (FORMAT csv, HEADER true)'''"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 454,
   "metadata": {},
   "outputs": [],
   "source": [
    "url = \"http://%s.carto.com/api/v2/sql/copyfrom\" % 'michellemho-carto'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 456,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Success: 50000 rows imported\n",
      "Success: 50000 rows imported\n",
      "Success: 10961 rows imported\n"
     ]
    }
   ],
   "source": [
    "def do_something(batch):\n",
    "    r = requests.post(url, params={'api_key': api_key, 'q': q}, data=batch, stream=True)\n",
    "\n",
    "    if r.status_code != 200:\n",
    "        print(r.text)\n",
    "    else:\n",
    "        status = r.json()\n",
    "        print(\"Success: %s rows imported\" % status['total_rows'])\n",
    "\n",
    "with open(upload_file, 'r') as csvfile:\n",
    "    reader = csv.reader(csvfile)\n",
    "    header = next(reader) # skip header\n",
    "\n",
    "    batch_size = 50000\n",
    "    batch = ''\n",
    "    count = 0\n",
    "\n",
    "    for row in reader:\n",
    "        if count >= batch_size:\n",
    "#             batch += '''\"'''\n",
    "#             print(batch)\n",
    "            temp_stream = StringIO(batch)\n",
    "            do_something(temp_stream)\n",
    "            batch = ''\n",
    "            count = 0\n",
    "        batch +=  '\\n' +'''\"''' + '\", \"'.join(row) + '''\"'''\n",
    "        count += 1\n",
    "    if batch:\n",
    "        temp_stream = StringIO(batch)\n",
    "        do_something(temp_stream)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
	{
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"# Downloading and uploading Waze Data"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"scrolled": true
	},
	"outputs": [],
	"source": [
	"import cartoframes\n",
	"import requests\n",
	"import re\n",
	"import csv\n",
	"from io import StringIO\n",
	"import requests\n",
	"\n",
	"api_key = '****'\n",
	"username = 'traffico-geographica'\n",
	"download_file = 'nyc_waze_data_jams.csv'\n",
	"q = \"COPY nyc_waze_data_jams (blockingalert_uuid, city, country, date, delay, endnode, georss_date, length, level, roadtype, speed, startnode, street, the_geom, turntype, type, uuid) TO stdout WITH (FORMAT csv, HEADER true)\"\n",
	"\n",
	"\n",
	"# request the download, specifying desired file name\n",
	"url = \"http://%s.carto.com/api/v2/sql/copyto\" % username\n",
	"r = requests.get(url, params={'api_key': api_key, 'q': q, 'filename': download_file}, stream=True)\n",
	"r.raise_for_status()\n",
	"\n",
	"# read save file name from response headers\n",
	"d = r.headers['content-disposition']\n",
	"savefilename = re.findall(\"filename=(.+)\", d)\n",
	"\n",
	"if len(savefilename) > 0:\n",
	" with open(savefilename[0], 'wb') as handle:\n",
	" for block in r.iter_content(1024):\n",
	" handle.write(block)\n",
	" print(\"Downloaded to: %s\" % savefilename)\n",
	"else:\n",
	" print(\"Error: could not find read file name from headers\")"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 447,
	"metadata": {},
	"outputs": [],
	"source": [
	"# Create table in account:\n",
	"# I renamed the_geom to geometry to avoid potential confusion...\n",
	"\n",
	"create_query = '''\n",
	"CREATE TABLE nyc_waze_data_jams (\n",
	" blockingalert_uuid text,\n",
	" city text,\n",
	" country text,\n",
	" date timestamp,\n",
	" delay integer,\n",
	" endnode text, \n",
	" georss_date timestamp,\n",
	" length real,\n",
	" level integer,\n",
	" roadtype integer,\n",
	" speed real,\n",
	" startnode text,\n",
	" street text,\n",
	" geometry text,\n",
	" turntype text,\n",
	" type text,\n",
	" uuid text\n",
	");\n",
	"\n",
	"-- adds the 'cartodb_id' and 'the_geom_webmercator'\n",
	"-- adds the required triggers and indexes\n",
	"SELECT CDB_CartodbfyTable('michellemho-carto','nyc_waze_data_jams');\n",
	"\n",
	"'''"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 448,
	"metadata": {
	"scrolled": true
	},
	"outputs": [],
	"source": [
	"cc = cartoframes.CartoContext(base_url='https://{username}.carto.com'.format(username='michellemho-carto'), api_key='29a2786e7462e8cdab49674be25159fce8939048')"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 449,
	"metadata": {
	"scrolled": false
	},
	"outputs": [
	{
	"data": {
	"text/html": [
	"<div>\n",
	"<style scoped>\n",
	" .dataframe tbody tr th:only-of-type {\n",
	" vertical-align: middle;\n",
	" }\n",
	"\n",
	" .dataframe tbody tr th {\n",
	" vertical-align: top;\n",
	" }\n",
	"\n",
	" .dataframe thead th {\n",
	" text-align: right;\n",
	" }\n",
	"</style>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" <th>cdb_cartodbfytable</th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>0</th>\n",
	" <td>nyc_waze_data_jams</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"</div>"
	],
	"text/plain": [
	" cdb_cartodbfytable\n",
	"0 nyc_waze_data_jams"
	]
	},
	"execution_count": 449,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"cc.query(create_query)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 446,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/html": [
	"<div>\n",
	"<style scoped>\n",
	" .dataframe tbody tr th:only-of-type {\n",
	" vertical-align: middle;\n",
	" }\n",
	"\n",
	" .dataframe tbody tr th {\n",
	" vertical-align: top;\n",
	" }\n",
	"\n",
	" .dataframe thead th {\n",
	" text-align: right;\n",
	" }\n",
	"</style>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" </tbody>\n",
	"</table>\n",
	"</div>"
	],
	"text/plain": [
	"Empty DataFrame\n",
	"Columns: []\n",
	"Index: []"
	]
	},
	"execution_count": 446,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"# Run only if you need to re-do the table creation!\n",
	"# cc.query('DROP TABLE nyc_waze_data_jams')"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 450,
	"metadata": {},
	"outputs": [],
	"source": [
	"# Upload to account\n",
	"\n",
	"api_key = '****'\n",
	"username = 'michellemho-carto'\n",
	"upload_file = 'nyc_waze_data_jams.csv'\n",
	"q = '''COPY nyc_waze_data_jams (blockingalert_uuid,city,country,date,delay,endnode,georss_date,length,level,roadtype,speed,startnode,street,geometry,turntype,type,uuid) FROM STDIN WITH (FORMAT csv, HEADER true)'''"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 454,
	"metadata": {},
	"outputs": [],
	"source": [
	"url = \"http://%s.carto.com/api/v2/sql/copyfrom\" % 'michellemho-carto'"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 456,
	"metadata": {
	"scrolled": true
	},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Success: 50000 rows imported\n",
	"Success: 50000 rows imported\n",
	"Success: 10961 rows imported\n"
	]
	}
	],
	"source": [
	"def do_something(batch):\n",
	" r = requests.post(url, params={'api_key': api_key, 'q': q}, data=batch, stream=True)\n",
	"\n",
	" if r.status_code != 200:\n",
	" print(r.text)\n",
	" else:\n",
	" status = r.json()\n",
	" print(\"Success: %s rows imported\" % status['total_rows'])\n",
	"\n",
	"with open(upload_file, 'r') as csvfile:\n",
	" reader = csv.reader(csvfile)\n",
	" header = next(reader) # skip header\n",
	"\n",
	" batch_size = 50000\n",
	" batch = ''\n",
	" count = 0\n",
	"\n",
	" for row in reader:\n",
	" if count >= batch_size:\n",
	"# batch += '''\"'''\n",
	"# print(batch)\n",
	" temp_stream = StringIO(batch)\n",
	" do_something(temp_stream)\n",
	" batch = ''\n",
	" count = 0\n",
	" batch += '\\n' +'''\"''' + '\", \"'.join(row) + '''\"'''\n",
	" count += 1\n",
	" if batch:\n",
	" temp_stream = StringIO(batch)\n",
	" do_something(temp_stream)"
	]
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.6.4"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}