phenders/sdk_test.ipynb

## sdk_test.ipynb
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import enigma\n",
    "import requests\n",
    "import json\n",
    "import pandas as pd\n",
    "import time"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "public = enigma.Public()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "public.set_auth(apikey='xxx')\n",
    "headers = {'authorization': 'Bearer xxx'}"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### GET /collections/\n",
    "#### Get all top level collections"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "ICANN\n",
      "74a03066-c3ab-4887-b040-a2a009be7371\n",
      "United States\n",
      "41026df2-2db0-41b1-8d7f-5a6c8e34de62\n",
      "Governments\n",
      "651a30cd-c864-49ca-8d8b-9418029127db\n",
      "Inbox\n",
      "d774fad5-d5a9-48fa-ac82-70483352739f\n",
      "Enigma Internal\n",
      "e0fac462-cb1c-410d-8331-99c85b08d6d2\n",
      "experiments\n",
      "0de52fad-7e7b-40e1-83c2-3cefd6d7edfc\n",
      "test\n",
      "0ec0eef3-54a4-4a2b-bba1-c9386dbb056c\n",
      "Curated Collections\n",
      "52dfb31c-f22e-49fb-bc05-8f5d8a5e7cab\n",
      "Companies\n",
      "5f8faa60-e6c3-4dc0-8eea-ade8c81d1265\n",
      "Organizations\n",
      "bc5c2c88-687e-4da2-93c3-32237ece39f0\n",
      "Universities\n",
      "c396139e-d685-4311-a447-2dd7669d963a\n"
     ]
    }
   ],
   "source": [
    "# Using the SDK\n",
    "collections = public.collections.list()\n",
    "for collection in collections:\n",
    "    print(collection.display_name)\n",
    "    print(collection.id)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "United States\n",
      "41026df2-2db0-41b1-8d7f-5a6c8e34de62\n",
      "Governments\n",
      "651a30cd-c864-49ca-8d8b-9418029127db\n",
      "Curated Collections\n",
      "52dfb31c-f22e-49fb-bc05-8f5d8a5e7cab\n",
      "Companies\n",
      "5f8faa60-e6c3-4dc0-8eea-ade8c81d1265\n",
      "Organizations\n",
      "bc5c2c88-687e-4da2-93c3-32237ece39f0\n",
      "Universities\n",
      "c396139e-d685-4311-a447-2dd7669d963a\n"
     ]
    }
   ],
   "source": [
    "# Using the API\n",
    "collections = requests.get('https://public.enigma.com/api/collections/').json()\n",
    "for collection in collections:\n",
    "    print(collection['display_name'])\n",
    "    print(collection['id'])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Get all collections with the specified parent collection"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "U.S. Federal Government\n",
      "U.S. State Governments\n"
     ]
    }
   ],
   "source": [
    "# Using the SDK\n",
    "collections = public.collections.list(parent_collection_id='41026df2-2db0-41b1-8d7f-5a6c8e34de62')\n",
    "for collection in collections:\n",
    "    print(collection.display_name)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "U.S. Federal Government\n",
      "U.S. State Governments\n"
     ]
    }
   ],
   "source": [
    "# Using the API\n",
    "params = {'parent_collection_id': '41026df2-2db0-41b1-8d7f-5a6c8e34de62'}\n",
    "collections = requests.get('https://public.enigma.com/api/collections/', params=params).json()\n",
    "for collection in collections:\n",
    "    print(collection['display_name'])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### GET /collections/id\n",
    "#### Get all child datasets for the specified collection"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Oil & Gas Wells - Alaska\n",
      "Oil & Gas Wells - Arkansas\n",
      "Oil & Gas Wells - California\n",
      "Oil & Gas Wells - New Mexico\n",
      "Oil & Gas Wells - North Dakota\n",
      "Oil & Gas Wells - Oklahoma\n",
      "Oil & Gas Wells - Pennsylvania\n",
      "Oil & Gas Wells - U.S. Offshore\n",
      "Oil & Gas Wells - Utah\n",
      "Oil & Gas Wells - Wyoming\n"
     ]
    }
   ],
   "source": [
    "# Using the SDK\n",
    "collection = public.collections.get('de8af1e6-f4f0-4762-b5a3-d85d3c3f0967')\n",
    "datasets = collection.child_datasets()\n",
    "for dataset in datasets:\n",
    "    print(dataset.display_name)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Not possible using GET /collections/id\n",
    "# Instead, use GET /datasets/?parent_collection_id=<ID>"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### GET /datasets/\n",
    "#### Get the first three Enigma Public datasets (by name)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "100th Congree Amendments\n",
      "100th Congress Basic Information\n",
      "100th Congress Becoming Law\n"
     ]
    }
   ],
   "source": [
    "# Using the SDK\n",
    "datasets = public.datasets.list()[0:3]\n",
    "for dataset in datasets:\n",
    "    print(dataset.display_name)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "100th Congree Amendments\n",
      "100th Congress Basic Information\n",
      "100th Congress Becoming Law\n"
     ]
    }
   ],
   "source": [
    "# Using the API\n",
    "datasets = requests.get('https://public.enigma.com/api/datasets/', headers={'Range': 'resources=0-2'}).json()\n",
    "for dataset in datasets:\n",
    "    print(dataset['display_name'])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Get the first row of each of the first three datasets"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[['hres427-100', 'h610-100', 'h', 'h', '610', '1']]\n",
      "[['hres71-100', 'hres', '71', '100', '1987-02-04', '1']]\n",
      "[['sjres245-100', '100', 'public', '287', '1']]\n"
     ]
    }
   ],
   "source": [
    "# Using the SDK\n",
    "datasets = public.datasets.list(row_limit=1)[0:3]\n",
    "for dataset in datasets:\n",
    "    print(dataset.current_snapshot.table_rows.rows)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[['hres427-100', 'h610-100', 'h', 'h', '610', '1']]\n",
      "[['hres71-100', 'hres', '71', '100', '1987-02-04', '1']]\n",
      "[['sjres245-100', '100', 'public', '287', '1']]\n"
     ]
    }
   ],
   "source": [
    "# Using the API\n",
    "datasets = requests.get('https://public.enigma.com/api/datasets/?row_limit=1', headers={'Range': 'resources=0-2'}).json()\n",
    "for dataset in datasets:\n",
    "    print(dataset['current_snapshot']['table_rows']['rows'])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Get first three datasets containing the specified text where words are immediately adjacent"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "PhilGEPS - Bidder's List\n",
      "FEC Federal Campaign Contributions - 2016\n",
      "PhilGEPS - Awards\n"
     ]
    }
   ],
   "source": [
    "# Using the SDK\n",
    "datasets = public.datasets.list(query='enigma technologies', mode='phrase', phrase_distance=0)[0:3]\n",
    "for dataset in datasets:\n",
    "    print(dataset.display_name)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "PhilGEPS - Bidder's List\n",
      "FEC Federal Campaign Contributions - 2016\n",
      "PhilGEPS - Awards\n"
     ]
    }
   ],
   "source": [
    "# Using the API\n",
    "params = {'query': 'enigma technologies', 'mode': 'phrase', 'phrase_distance': 0}\n",
    "datasets = requests.get('https://public.enigma.com/api/datasets/', params=params, headers={'Range': 'resources=0-2'}).json()\n",
    "for dataset in datasets:\n",
    "    print(dataset['display_name'])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Get all datasets within the specified collection"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "New York City Restaurant Inspections\n",
      "d8c29d0d-f283-4eb5-b4d4-460c9779d05d\n",
      "Restaurant Inspections - Florida\n",
      "bedaf052-5fcd-4758-8d27-048ce8746c6a\n"
     ]
    }
   ],
   "source": [
    "# Using the SDK\n",
    "datasets = public.datasets.list(parent_collection_id=['206b675d-e387-4019-a7a3-b837956d8f4f'])\n",
    "for dataset in datasets:\n",
    "    print(dataset.display_name)\n",
    "    print(dataset.id)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "New York City Restaurant Inspections\n",
      "d8c29d0d-f283-4eb5-b4d4-460c9779d05d\n"
     ]
    }
   ],
   "source": [
    "# Using the API\n",
    "params = {'parent_collection_id': '206b675d-e387-4019-a7a3-b837956d8f4f'}\n",
    "datasets = requests.get('https://public.enigma.com/api/datasets/', params=params).json()\n",
    "for dataset in datasets:\n",
    "    print(dataset['display_name'])\n",
    "    print(dataset['id'])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### GET /datasets/id\n",
    "#### Get the ID and first row of the current snapshot"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "c4e2c92a-aa04-4f39-a805-0f144b9a5e6e\n",
      "['30075445', 'Morris Park Bake Shop', 'Bronx', '1007 Morris Park Ave', '10462', 'Morris Park', '7188924968', 'Bakery', 'Bakery', '2018-05-11T00:00:00', '08C 10F', 'A', '5.0', '40.848459999999996', '-73.85624', {'lat': 40.84846, 'lng': -73.85624}]\n"
     ]
    }
   ],
   "source": [
    "# Using the SDK\n",
    "dataset = public.datasets.get('d8c29d0d-f283-4eb5-b4d4-460c9779d05d', row_limit=10)\n",
    "print(dataset.current_snapshot.id)\n",
    "print(dataset.current_snapshot.table_rows.rows[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "c4e2c92a-aa04-4f39-a805-0f144b9a5e6e\n",
      "['30075445', 'Morris Park Bake Shop', 'Bronx', '1007 Morris Park Ave', '10462', 'Morris Park', '7188924968', 'Bakery', 'Bakery', '2018-05-11T00:00:00', '08C 10F', 'A', '5.0', '40.848459999999996', '-73.85624', {'lat': 40.84846, 'lng': -73.85624}]\n"
     ]
    }
   ],
   "source": [
    "# Using the API\n",
    "dataset = requests.get('https://public.enigma.com/api/datasets/d8c29d0d-f283-4eb5-b4d4-460c9779d05d?row_limit=10').json()\n",
    "print(dataset['current_snapshot']['id'])\n",
    "print(dataset['current_snapshot']['table_rows']['rows'][0])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Export the current snapshot as a dataframe"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>camis</th>\n",
       "      <th>dba</th>\n",
       "      <th>boro</th>\n",
       "      <th>address</th>\n",
       "      <th>zipcode</th>\n",
       "      <th>neighborhood</th>\n",
       "      <th>phone</th>\n",
       "      <th>cuisine_description</th>\n",
       "      <th>food_type</th>\n",
       "      <th>inspection_date</th>\n",
       "      <th>violation_code</th>\n",
       "      <th>grade</th>\n",
       "      <th>score</th>\n",
       "      <th>latitude</th>\n",
       "      <th>longitude</th>\n",
       "      <th>geo_location</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>30075445</td>\n",
       "      <td>Morris Park Bake Shop</td>\n",
       "      <td>Bronx</td>\n",
       "      <td>1007 Morris Park Ave</td>\n",
       "      <td>10462</td>\n",
       "      <td>Morris Park</td>\n",
       "      <td>7188924968</td>\n",
       "      <td>Bakery</td>\n",
       "      <td>Bakery</td>\n",
       "      <td>2018-05-11T00:00:00</td>\n",
       "      <td>08C 10F</td>\n",
       "      <td>A</td>\n",
       "      <td>5.0</td>\n",
       "      <td>40.84846</td>\n",
       "      <td>-73.85624</td>\n",
       "      <td>40.84846,-73.85624</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>30112340</td>\n",
       "      <td>Wendy's</td>\n",
       "      <td>Brooklyn</td>\n",
       "      <td>469 Flatbush Ave</td>\n",
       "      <td>11225</td>\n",
       "      <td>Prospect Park</td>\n",
       "      <td>7182875005</td>\n",
       "      <td>Hamburgers</td>\n",
       "      <td>Hamburgers</td>\n",
       "      <td>2018-03-13T00:00:00</td>\n",
       "      <td>04L 08A 10B</td>\n",
       "      <td>A</td>\n",
       "      <td>12.0</td>\n",
       "      <td>40.66313</td>\n",
       "      <td>-73.96232</td>\n",
       "      <td>40.66313,-73.96232</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>30191841</td>\n",
       "      <td>Dj Reynolds Pub and Restaurant</td>\n",
       "      <td>Manhattan</td>\n",
       "      <td>351 W 57th St</td>\n",
       "      <td>10019</td>\n",
       "      <td>Hell's Kitchen</td>\n",
       "      <td>2122452912</td>\n",
       "      <td>Irish</td>\n",
       "      <td>Irish</td>\n",
       "      <td>2018-05-16T00:00:00</td>\n",
       "      <td>04L 08A 10F</td>\n",
       "      <td>A</td>\n",
       "      <td>12.0</td>\n",
       "      <td>40.76782</td>\n",
       "      <td>-73.98481</td>\n",
       "      <td>40.76782,-73.98481</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      camis                             dba       boro               address  \\\n",
       "0  30075445           Morris Park Bake Shop      Bronx  1007 Morris Park Ave   \n",
       "1  30112340                         Wendy's   Brooklyn      469 Flatbush Ave   \n",
       "2  30191841  Dj Reynolds Pub and Restaurant  Manhattan         351 W 57th St   \n",
       "\n",
       "   zipcode    neighborhood       phone cuisine_description   food_type  \\\n",
       "0    10462     Morris Park  7188924968              Bakery      Bakery   \n",
       "1    11225   Prospect Park  7182875005          Hamburgers  Hamburgers   \n",
       "2    10019  Hell's Kitchen  2122452912               Irish       Irish   \n",
       "\n",
       "       inspection_date violation_code grade  score  latitude  longitude  \\\n",
       "0  2018-05-11T00:00:00        08C 10F     A    5.0  40.84846  -73.85624   \n",
       "1  2018-03-13T00:00:00    04L 08A 10B     A   12.0  40.66313  -73.96232   \n",
       "2  2018-05-16T00:00:00    04L 08A 10F     A   12.0  40.76782  -73.98481   \n",
       "\n",
       "         geo_location  \n",
       "0  40.84846,-73.85624  \n",
       "1  40.66313,-73.96232  \n",
       "2  40.76782,-73.98481  "
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Using the SDK\n",
    "dataset = public.datasets.get('d8c29d0d-f283-4eb5-b4d4-460c9779d05d')\n",
    "df = dataset.current_snapshot.export_dataframe()\n",
    "df.head(3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>camis</th>\n",
       "      <th>dba</th>\n",
       "      <th>boro</th>\n",
       "      <th>address</th>\n",
       "      <th>zipcode</th>\n",
       "      <th>neighborhood</th>\n",
       "      <th>phone</th>\n",
       "      <th>cuisine_description</th>\n",
       "      <th>food_type</th>\n",
       "      <th>inspection_date</th>\n",
       "      <th>violation_code</th>\n",
       "      <th>grade</th>\n",
       "      <th>score</th>\n",
       "      <th>latitude</th>\n",
       "      <th>longitude</th>\n",
       "      <th>geo_location</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>30075445</td>\n",
       "      <td>Morris Park Bake Shop</td>\n",
       "      <td>Bronx</td>\n",
       "      <td>1007 Morris Park Ave</td>\n",
       "      <td>10462</td>\n",
       "      <td>Morris Park</td>\n",
       "      <td>7188924968</td>\n",
       "      <td>Bakery</td>\n",
       "      <td>Bakery</td>\n",
       "      <td>2018-05-11T00:00:00</td>\n",
       "      <td>08C 10F</td>\n",
       "      <td>A</td>\n",
       "      <td>5.0</td>\n",
       "      <td>40.84846</td>\n",
       "      <td>-73.85624</td>\n",
       "      <td>40.84846,-73.85624</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>30112340</td>\n",
       "      <td>Wendy's</td>\n",
       "      <td>Brooklyn</td>\n",
       "      <td>469 Flatbush Ave</td>\n",
       "      <td>11225</td>\n",
       "      <td>Prospect Park</td>\n",
       "      <td>7182875005</td>\n",
       "      <td>Hamburgers</td>\n",
       "      <td>Hamburgers</td>\n",
       "      <td>2018-03-13T00:00:00</td>\n",
       "      <td>04L 08A 10B</td>\n",
       "      <td>A</td>\n",
       "      <td>12.0</td>\n",
       "      <td>40.66313</td>\n",
       "      <td>-73.96232</td>\n",
       "      <td>40.66313,-73.96232</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>30191841</td>\n",
       "      <td>Dj Reynolds Pub and Restaurant</td>\n",
       "      <td>Manhattan</td>\n",
       "      <td>351 W 57th St</td>\n",
       "      <td>10019</td>\n",
       "      <td>Hell's Kitchen</td>\n",
       "      <td>2122452912</td>\n",
       "      <td>Irish</td>\n",
       "      <td>Irish</td>\n",
       "      <td>2018-05-16T00:00:00</td>\n",
       "      <td>04L 08A 10F</td>\n",
       "      <td>A</td>\n",
       "      <td>12.0</td>\n",
       "      <td>40.76782</td>\n",
       "      <td>-73.98481</td>\n",
       "      <td>40.76782,-73.98481</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      camis                             dba       boro               address  \\\n",
       "0  30075445           Morris Park Bake Shop      Bronx  1007 Morris Park Ave   \n",
       "1  30112340                         Wendy's   Brooklyn      469 Flatbush Ave   \n",
       "2  30191841  Dj Reynolds Pub and Restaurant  Manhattan         351 W 57th St   \n",
       "\n",
       "   zipcode    neighborhood       phone cuisine_description   food_type  \\\n",
       "0    10462     Morris Park  7188924968              Bakery      Bakery   \n",
       "1    11225   Prospect Park  7182875005          Hamburgers  Hamburgers   \n",
       "2    10019  Hell's Kitchen  2122452912               Irish       Irish   \n",
       "\n",
       "       inspection_date violation_code grade  score  latitude  longitude  \\\n",
       "0  2018-05-11T00:00:00        08C 10F     A    5.0  40.84846  -73.85624   \n",
       "1  2018-03-13T00:00:00    04L 08A 10B     A   12.0  40.66313  -73.96232   \n",
       "2  2018-05-16T00:00:00    04L 08A 10F     A   12.0  40.76782  -73.98481   \n",
       "\n",
       "         geo_location  \n",
       "0  40.84846,-73.85624  \n",
       "1  40.66313,-73.96232  \n",
       "2  40.76782,-73.98481  "
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Using the API\n",
    "import io\n",
    "dataset = requests.get('https://public.enigma.com/api/datasets/d8c29d0d-f283-4eb5-b4d4-460c9779d05d').json()\n",
    "response = requests.get('https://public.enigma.com/api/export/{}'.format(dataset['current_snapshot']['id']), headers=headers).content\n",
    "df = pd.read_csv(io.StringIO(response.decode('utf-8')))\n",
    "df.head(3)                    "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Get all snapshot field names"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['CAMIS ID No.', 'Business Name', 'Borough', 'Address', 'Zipcode', 'Neighborhood', 'Business Phone', 'Cuisine Description', 'Food Type', 'Inspection Date', 'Violation Code', 'Restaurant Grade', 'Inspection Score', 'Latitude', 'Longitude', 'Geo Location']\n"
     ]
    }
   ],
   "source": [
    "# Using the SDK\n",
    "dataset = public.datasets.get('d8c29d0d-f283-4eb5-b4d4-460c9779d05d', row_limit=3)\n",
    "fields = [field.display_name for field in dataset.current_snapshot.fields]\n",
    "print(fields)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['CAMIS ID No.', 'Business Name', 'Borough', 'Address', 'Zipcode', 'Neighborhood', 'Business Phone', 'Cuisine Description', 'Food Type', 'Inspection Date', 'Violation Code', 'Restaurant Grade', 'Inspection Score', 'Latitude', 'Longitude', 'Geo Location']\n"
     ]
    }
   ],
   "source": [
    "# Using the API\n",
    "dataset = requests.get('https://public.enigma.com/api/datasets/d8c29d0d-f283-4eb5-b4d4-460c9779d05d?row_limit=3').json()\n",
    "fields = [field['display_name'] for field in dataset['current_snapshot']['fields']]\n",
    "print(fields)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### GET /datasets/{id}/snapshots\n",
    "#### Get the IDs of all snapshots created after July 1, 2018 for the specified dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "# No SDK function?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['f21b2014-5332-493d-9725-dee65e6fafb2', 'c4e2c92a-aa04-4f39-a805-0f144b9a5e6e']\n"
     ]
    }
   ],
   "source": [
    "# Using the API\n",
    "params = {'filter': 'created_at>2018-07-01'}\n",
    "snapshots = requests.get('https://public.enigma.com/api/datasets/d8c29d0d-f283-4eb5-b4d4-460c9779d05d/snapshots/', params=params).json()\n",
    "ids = [snapshot['id'] for snapshot in snapshots]\n",
    "print(ids)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Get /snapshots/id\n",
    "#### Get the row count for the specified snapshot"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "24940\n"
     ]
    }
   ],
   "source": [
    "# Using the SDK\n",
    "snapshot = public.snapshots.get('c4e2c92a-aa04-4f39-a805-0f144b9a5e6e')\n",
    "print(snapshot.row_count)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "24940\n"
     ]
    }
   ],
   "source": [
    "# Using the API\n",
    "snapshot = requests.get('https://public.enigma.com/api/snapshots/c4e2c92a-aa04-4f39-a805-0f144b9a5e6e').json()\n",
    "print(snapshot['row_count'])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Get rows 11-13 of the specified snapshot when rows are ordered by the specified column"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['50007884', '(library) Four & Twenty Blackbirds', 'Brooklyn', '10 Grand Army Plz', '11238', 'Prospect Heights', '3475743474', 'CafÃ©/Coffee/Tea', 'Coffee and Tea', '2018-04-17T00:00:00', '02C 09C 10F', 'A', '12.0', '40.673809999999996', '-73.96759', {'lat': 40.67381, 'lng': -73.96759}]\n",
      "['50059670', '+ 81 Gallery New York', 'Manhattan', '167 Elizabeth St', '10012', 'Nolita', '6469984386', 'CafÃ©/Coffee/Tea', 'Coffee and Tea', '2018-05-03T00:00:00', '10F', 'A', '2.0', '40.721', '-73.99508', {'lat': 40.721, 'lng': -73.99508}]\n",
      "['41322152', '1 2 3 Burger Shot Beer', 'Manhattan', '738 10th Ave', '10019', \"Hell's Kitchen\", '2123150123', 'American', 'American', '2017-12-08T00:00:00', '06C 09C 10B 10F', 'A', '12.0', '40.76477', '-73.99136', {'lat': 40.76477, 'lng': -73.99136}]\n"
     ]
    }
   ],
   "source": [
    "# Using the SDK\n",
    "snapshot = public.snapshots.get('c4e2c92a-aa04-4f39-a805-0f144b9a5e6e', row_limit=3, row_offset=10, row_sort='dba')\n",
    "for row in snapshot.table_rows.rows:\n",
    "    print(row)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['50007884', '(library) Four & Twenty Blackbirds', 'Brooklyn', '10 Grand Army Plz', '11238', 'Prospect Heights', '3475743474', 'CafÃ©/Coffee/Tea', 'Coffee and Tea', '2018-04-17T00:00:00', '02C 09C 10F', 'A', '12.0', '40.673809999999996', '-73.96759', {'lat': 40.67381, 'lng': -73.96759}]\n",
      "['50059670', '+ 81 Gallery New York', 'Manhattan', '167 Elizabeth St', '10012', 'Nolita', '6469984386', 'CafÃ©/Coffee/Tea', 'Coffee and Tea', '2018-05-03T00:00:00', '10F', 'A', '2.0', '40.721', '-73.99508', {'lat': 40.721, 'lng': -73.99508}]\n",
      "['41322152', '1 2 3 Burger Shot Beer', 'Manhattan', '738 10th Ave', '10019', \"Hell's Kitchen\", '2123150123', 'American', 'American', '2017-12-08T00:00:00', '06C 09C 10B 10F', 'A', '12.0', '40.76477', '-73.99136', {'lat': 40.76477, 'lng': -73.99136}]\n"
     ]
    }
   ],
   "source": [
    "# Using the API\n",
    "params = {'row_limit': 3, 'row_offset': 10, 'row_sort': 'dba'}\n",
    "snapshot = requests.get('https://public.enigma.com/api/snapshots/c4e2c92a-aa04-4f39-a805-0f144b9a5e6e', params=params).json()\n",
    "for row in snapshot['table_rows']['rows']:\n",
    "    print(row)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Count the number of rows where certain columns have specific values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "437"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Using the API\n",
    "params = {'query_mode': 'advanced', 'query': '(boro:(Brooklyn) && cuisine_description:(Pizza))', 'row_limit': 1}\n",
    "snapshot = requests.get('https://public.enigma.com/api/snapshots/c4e2c92a-aa04-4f39-a805-0f144b9a5e6e', params=params).json()\n",
    "snapshot['table_rows']['count']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "437"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Using the SDK\n",
    "advanced_query = '(boro:(Brooklyn) && cuisine_description:(Pizza))'\n",
    "snapshot = public.snapshots.get('c4e2c92a-aa04-4f39-a805-0f144b9a5e6e', row_limit=1, query_mode='advanced', query=advanced_query)\n",
    "snapshot.table_rows.count"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### GET /tags/\n",
    "#### Get all tags"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['agriculture', 'art', 'buildings', 'companies', 'compliance', 'demographics', 'energy', 'health', 'politics', 'transit', 'restaurant inspections', 'immigration']\n"
     ]
    }
   ],
   "source": [
    "# Using the SDK\n",
    "tags = public.tags.list()\n",
    "tag_names = [tag.name for tag in tags]\n",
    "print(tag_names)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['agriculture', 'art', 'buildings', 'companies', 'compliance', 'demographics', 'energy', 'health', 'politics', 'transit', 'restaurant inspections', 'immigration']\n"
     ]
    }
   ],
   "source": [
    "# Using the API\n",
    "tags = requests.get('https://public.enigma.com/api/tags/').json()\n",
    "tag_names = [tag['name'] for tag in tags]\n",
    "print(tag_names)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Get all collections with the specified tag"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[<Collection 'Cooper Hewitt, Smithsonian Design Museum'>, <Collection 'The Tate Collection'>, <Collection 'Museum of Modern Art (MoMA)'>]"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Using the SDK\n",
    "tags = public.tags.list()\n",
    "tags[1].collections()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Not possible using GET /tags/ API endpoint\n",
    "# Instead, use GET /collections/?has_tag=<tag_name>"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Get all datasets with the specified tag"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[<Dataset 'Institute of Museum and Library Services - Museum Universe'>, <Dataset 'The Metropolitan Museum of Art - Objects'>, <Dataset 'UN Commodity Trade Statistics - Works of Art'>, <Dataset 'University of Pennsylvania Museum of Archaeology and Anthropology - Objects'>]"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Using the SDK\n",
    "tags = public.tags.list()\n",
    "tags[1].datasets()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Not possible using GET /tags/ API endpoint\n",
    "# Instead, use GET /datasets/?has_tag=<tag_name>"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### POST /collections/\n",
    "#### Create a new collection"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'SDK Test Collection'"
      ]
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Using the SDK\n",
    "parent = {'id': '0de52fad-7e7b-40e1-83c2-3cefd6d7edfc'}\n",
    "collection = public.collections.create(parent_collection=parent, display_name='SDK Test Collection')\n",
    "collection.display_name"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'API Test Collection'"
      ]
     },
     "execution_count": 39,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Using the API\n",
    "payload = {'parent_collection': {'id': '0de52fad-7e7b-40e1-83c2-3cefd6d7edfc'}, 'display_name': 'API Test Collection'}\n",
    "collection = requests.post('https://public.enigma.com/api/collections/', json=payload, headers=headers).json()\n",
    "collection['display_name']"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### POST /datasets/\n",
    "#### Create a new dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'SDK Test Dataset'"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Using the SDK\n",
    "parent = {'id': 'c4f70b83-14e9-47c7-b857-37387476ac14'}\n",
    "dataset = public.datasets.create(parent_collection=parent, display_name='SDK Test Dataset')\n",
    "dataset.display_name"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'API Test Dataset'"
      ]
     },
     "execution_count": 42,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Using the API\n",
    "payload = {'parent_collection': {'id': 'c3c4ffe6-8d59-47c3-aa29-a6d310f8742b'}, 'display_name': 'API Test Dataset'}\n",
    "dataset = requests.post('https://public.enigma.com/api/datasets/', json=payload, headers=headers).json()\n",
    "dataset['display_name']"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### POST /datasets/id/snapshots/\n",
    "#### Post a snapshot to an existing dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INGESTING\n",
      "INGESTING\n",
      "INGESTING\n",
      "SUCCESS\n"
     ]
    }
   ],
   "source": [
    "# Using the SDK\n",
    "payload = {\n",
    "  \"fields\": [\n",
    "    {\n",
    "      \"data_type\": \"string\",\n",
    "      \"name\": \"field1\"\n",
    "    },\n",
    "    {\n",
    "      \"data_type\": \"string\",\n",
    "      \"name\": \"field2\"\n",
    "    },\n",
    "    {\n",
    "      \"data_type\": \"string\",\n",
    "      \"name\": \"field3\"\n",
    "    }\n",
    "  ],\n",
    "  \"data_url\": \"https://docs.enigma.com/public/downloads/snapshot.csv\"\n",
    "}\n",
    "dataset = public.datasets.get('4f5af1de-5ca9-4dd7-af8c-ccd0e292f46c')\n",
    "snapshot = dataset.create_snapshot(payload)\n",
    "print(snapshot.ingest_status.state)\n",
    "\n",
    "# Check status\n",
    "while (snapshot.refresh().ingest_status.state != 'SUCCESS'):\n",
    "    time.sleep(5)\n",
    "    print(snapshot.refresh().ingest_status.state)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INGESTING\n",
      "SUCCESS\n"
     ]
    }
   ],
   "source": [
    "# Using the API\n",
    "payload = json.dumps({\n",
    "  \"fields\": [\n",
    "    {\n",
    "      \"data_type\": \"string\",\n",
    "      \"name\": \"field1\"\n",
    "    },\n",
    "    {\n",
    "      \"data_type\": \"string\",\n",
    "      \"name\": \"field2\"\n",
    "    },\n",
    "    {\n",
    "      \"data_type\": \"string\",\n",
    "      \"name\": \"field3\"\n",
    "    }\n",
    "  ],\n",
    "  \"data_url\": \"https://docs.enigma.com/public/downloads/snapshot.csv\"\n",
    "})\n",
    "files = {'snapshot': ('snapshot', payload, 'application/json')}\n",
    "url = 'https://public.enigma.com/api/datasets/4fd52f56-cbed-4632-8e7f-6598163b2aca/snapshots/'\n",
    "snapshot = requests.post(url, files=files, headers=headers).json()\n",
    "print(snapshot['ingest_status']['state'])\n",
    "\n",
    "# Check status\n",
    "while (snapshot['ingest_status']['state'] != 'SUCCESS'):\n",
    "    time.sleep(5)\n",
    "    snapshot = requests.get('https://public.enigma.com/api/snapshots/{}'.format(snapshot['id']), headers=headers).json()\n",
    "    print(snapshot['ingest_status']['state'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}