Skip to content

Instantly share code, notes, and snippets.

@phenders
Last active July 11, 2018 13:54
Show Gist options
  • Save phenders/23cbdc312667c96b54efa567f74d6b63 to your computer and use it in GitHub Desktop.
Save phenders/23cbdc312667c96b54efa567f74d6b63 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import enigma\n",
"import requests\n",
"import json\n",
"import pandas as pd\n",
"import time"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"public = enigma.Public()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"public.set_auth(apikey='xxx')\n",
"headers = {'authorization': 'Bearer xxx'}"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### GET /collections/\n",
"#### Get all top level collections"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"ICANN\n",
"74a03066-c3ab-4887-b040-a2a009be7371\n",
"United States\n",
"41026df2-2db0-41b1-8d7f-5a6c8e34de62\n",
"Governments\n",
"651a30cd-c864-49ca-8d8b-9418029127db\n",
"Inbox\n",
"d774fad5-d5a9-48fa-ac82-70483352739f\n",
"Enigma Internal\n",
"e0fac462-cb1c-410d-8331-99c85b08d6d2\n",
"experiments\n",
"0de52fad-7e7b-40e1-83c2-3cefd6d7edfc\n",
"test\n",
"0ec0eef3-54a4-4a2b-bba1-c9386dbb056c\n",
"Curated Collections\n",
"52dfb31c-f22e-49fb-bc05-8f5d8a5e7cab\n",
"Companies\n",
"5f8faa60-e6c3-4dc0-8eea-ade8c81d1265\n",
"Organizations\n",
"bc5c2c88-687e-4da2-93c3-32237ece39f0\n",
"Universities\n",
"c396139e-d685-4311-a447-2dd7669d963a\n"
]
}
],
"source": [
"# Using the SDK\n",
"collections = public.collections.list()\n",
"for collection in collections:\n",
" print(collection.display_name)\n",
" print(collection.id)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"United States\n",
"41026df2-2db0-41b1-8d7f-5a6c8e34de62\n",
"Governments\n",
"651a30cd-c864-49ca-8d8b-9418029127db\n",
"Curated Collections\n",
"52dfb31c-f22e-49fb-bc05-8f5d8a5e7cab\n",
"Companies\n",
"5f8faa60-e6c3-4dc0-8eea-ade8c81d1265\n",
"Organizations\n",
"bc5c2c88-687e-4da2-93c3-32237ece39f0\n",
"Universities\n",
"c396139e-d685-4311-a447-2dd7669d963a\n"
]
}
],
"source": [
"# Using the API\n",
"collections = requests.get('https://public.enigma.com/api/collections/').json()\n",
"for collection in collections:\n",
" print(collection['display_name'])\n",
" print(collection['id'])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Get all collections with the specified parent collection"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"U.S. Federal Government\n",
"U.S. State Governments\n"
]
}
],
"source": [
"# Using the SDK\n",
"collections = public.collections.list(parent_collection_id='41026df2-2db0-41b1-8d7f-5a6c8e34de62')\n",
"for collection in collections:\n",
" print(collection.display_name)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"U.S. Federal Government\n",
"U.S. State Governments\n"
]
}
],
"source": [
"# Using the API\n",
"params = {'parent_collection_id': '41026df2-2db0-41b1-8d7f-5a6c8e34de62'}\n",
"collections = requests.get('https://public.enigma.com/api/collections/', params=params).json()\n",
"for collection in collections:\n",
" print(collection['display_name'])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### GET /collections/id\n",
"#### Get all child datasets for the specified collection"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Oil & Gas Wells - Alaska\n",
"Oil & Gas Wells - Arkansas\n",
"Oil & Gas Wells - California\n",
"Oil & Gas Wells - New Mexico\n",
"Oil & Gas Wells - North Dakota\n",
"Oil & Gas Wells - Oklahoma\n",
"Oil & Gas Wells - Pennsylvania\n",
"Oil & Gas Wells - U.S. Offshore\n",
"Oil & Gas Wells - Utah\n",
"Oil & Gas Wells - Wyoming\n"
]
}
],
"source": [
"# Using the SDK\n",
"collection = public.collections.get('de8af1e6-f4f0-4762-b5a3-d85d3c3f0967')\n",
"datasets = collection.child_datasets()\n",
"for dataset in datasets:\n",
" print(dataset.display_name)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"# Not possible using GET /collections/id\n",
"# Instead, use GET /datasets/?parent_collection_id=<ID>"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### GET /datasets/\n",
"#### Get the first three Enigma Public datasets (by name)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"100th Congree Amendments\n",
"100th Congress Basic Information\n",
"100th Congress Becoming Law\n"
]
}
],
"source": [
"# Using the SDK\n",
"datasets = public.datasets.list()[0:3]\n",
"for dataset in datasets:\n",
" print(dataset.display_name)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"100th Congree Amendments\n",
"100th Congress Basic Information\n",
"100th Congress Becoming Law\n"
]
}
],
"source": [
"# Using the API\n",
"datasets = requests.get('https://public.enigma.com/api/datasets/', headers={'Range': 'resources=0-2'}).json()\n",
"for dataset in datasets:\n",
" print(dataset['display_name'])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Get the first row of each of the first three datasets"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[['hres427-100', 'h610-100', 'h', 'h', '610', '1']]\n",
"[['hres71-100', 'hres', '71', '100', '1987-02-04', '1']]\n",
"[['sjres245-100', '100', 'public', '287', '1']]\n"
]
}
],
"source": [
"# Using the SDK\n",
"datasets = public.datasets.list(row_limit=1)[0:3]\n",
"for dataset in datasets:\n",
" print(dataset.current_snapshot.table_rows.rows)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[['hres427-100', 'h610-100', 'h', 'h', '610', '1']]\n",
"[['hres71-100', 'hres', '71', '100', '1987-02-04', '1']]\n",
"[['sjres245-100', '100', 'public', '287', '1']]\n"
]
}
],
"source": [
"# Using the API\n",
"datasets = requests.get('https://public.enigma.com/api/datasets/?row_limit=1', headers={'Range': 'resources=0-2'}).json()\n",
"for dataset in datasets:\n",
" print(dataset['current_snapshot']['table_rows']['rows'])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Get first three datasets containing the specified text where words are immediately adjacent"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"PhilGEPS - Bidder's List\n",
"FEC Federal Campaign Contributions - 2016\n",
"PhilGEPS - Awards\n"
]
}
],
"source": [
"# Using the SDK\n",
"datasets = public.datasets.list(query='enigma technologies', mode='phrase', phrase_distance=0)[0:3]\n",
"for dataset in datasets:\n",
" print(dataset.display_name)"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"PhilGEPS - Bidder's List\n",
"FEC Federal Campaign Contributions - 2016\n",
"PhilGEPS - Awards\n"
]
}
],
"source": [
"# Using the API\n",
"params = {'query': 'enigma technologies', 'mode': 'phrase', 'phrase_distance': 0}\n",
"datasets = requests.get('https://public.enigma.com/api/datasets/', params=params, headers={'Range': 'resources=0-2'}).json()\n",
"for dataset in datasets:\n",
" print(dataset['display_name'])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Get all datasets within the specified collection"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"New York City Restaurant Inspections\n",
"d8c29d0d-f283-4eb5-b4d4-460c9779d05d\n",
"Restaurant Inspections - Florida\n",
"bedaf052-5fcd-4758-8d27-048ce8746c6a\n"
]
}
],
"source": [
"# Using the SDK\n",
"datasets = public.datasets.list(parent_collection_id=['206b675d-e387-4019-a7a3-b837956d8f4f'])\n",
"for dataset in datasets:\n",
" print(dataset.display_name)\n",
" print(dataset.id)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"New York City Restaurant Inspections\n",
"d8c29d0d-f283-4eb5-b4d4-460c9779d05d\n"
]
}
],
"source": [
"# Using the API\n",
"params = {'parent_collection_id': '206b675d-e387-4019-a7a3-b837956d8f4f'}\n",
"datasets = requests.get('https://public.enigma.com/api/datasets/', params=params).json()\n",
"for dataset in datasets:\n",
" print(dataset['display_name'])\n",
" print(dataset['id'])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### GET /datasets/id\n",
"#### Get the ID and first row of the current snapshot"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"c4e2c92a-aa04-4f39-a805-0f144b9a5e6e\n",
"['30075445', 'Morris Park Bake Shop', 'Bronx', '1007 Morris Park Ave', '10462', 'Morris Park', '7188924968', 'Bakery', 'Bakery', '2018-05-11T00:00:00', '08C 10F', 'A', '5.0', '40.848459999999996', '-73.85624', {'lat': 40.84846, 'lng': -73.85624}]\n"
]
}
],
"source": [
"# Using the SDK\n",
"dataset = public.datasets.get('d8c29d0d-f283-4eb5-b4d4-460c9779d05d', row_limit=10)\n",
"print(dataset.current_snapshot.id)\n",
"print(dataset.current_snapshot.table_rows.rows[0])"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"c4e2c92a-aa04-4f39-a805-0f144b9a5e6e\n",
"['30075445', 'Morris Park Bake Shop', 'Bronx', '1007 Morris Park Ave', '10462', 'Morris Park', '7188924968', 'Bakery', 'Bakery', '2018-05-11T00:00:00', '08C 10F', 'A', '5.0', '40.848459999999996', '-73.85624', {'lat': 40.84846, 'lng': -73.85624}]\n"
]
}
],
"source": [
"# Using the API\n",
"dataset = requests.get('https://public.enigma.com/api/datasets/d8c29d0d-f283-4eb5-b4d4-460c9779d05d?row_limit=10').json()\n",
"print(dataset['current_snapshot']['id'])\n",
"print(dataset['current_snapshot']['table_rows']['rows'][0])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Export the current snapshot as a dataframe"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>camis</th>\n",
" <th>dba</th>\n",
" <th>boro</th>\n",
" <th>address</th>\n",
" <th>zipcode</th>\n",
" <th>neighborhood</th>\n",
" <th>phone</th>\n",
" <th>cuisine_description</th>\n",
" <th>food_type</th>\n",
" <th>inspection_date</th>\n",
" <th>violation_code</th>\n",
" <th>grade</th>\n",
" <th>score</th>\n",
" <th>latitude</th>\n",
" <th>longitude</th>\n",
" <th>geo_location</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>30075445</td>\n",
" <td>Morris Park Bake Shop</td>\n",
" <td>Bronx</td>\n",
" <td>1007 Morris Park Ave</td>\n",
" <td>10462</td>\n",
" <td>Morris Park</td>\n",
" <td>7188924968</td>\n",
" <td>Bakery</td>\n",
" <td>Bakery</td>\n",
" <td>2018-05-11T00:00:00</td>\n",
" <td>08C 10F</td>\n",
" <td>A</td>\n",
" <td>5.0</td>\n",
" <td>40.84846</td>\n",
" <td>-73.85624</td>\n",
" <td>40.84846,-73.85624</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>30112340</td>\n",
" <td>Wendy's</td>\n",
" <td>Brooklyn</td>\n",
" <td>469 Flatbush Ave</td>\n",
" <td>11225</td>\n",
" <td>Prospect Park</td>\n",
" <td>7182875005</td>\n",
" <td>Hamburgers</td>\n",
" <td>Hamburgers</td>\n",
" <td>2018-03-13T00:00:00</td>\n",
" <td>04L 08A 10B</td>\n",
" <td>A</td>\n",
" <td>12.0</td>\n",
" <td>40.66313</td>\n",
" <td>-73.96232</td>\n",
" <td>40.66313,-73.96232</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>30191841</td>\n",
" <td>Dj Reynolds Pub and Restaurant</td>\n",
" <td>Manhattan</td>\n",
" <td>351 W 57th St</td>\n",
" <td>10019</td>\n",
" <td>Hell's Kitchen</td>\n",
" <td>2122452912</td>\n",
" <td>Irish</td>\n",
" <td>Irish</td>\n",
" <td>2018-05-16T00:00:00</td>\n",
" <td>04L 08A 10F</td>\n",
" <td>A</td>\n",
" <td>12.0</td>\n",
" <td>40.76782</td>\n",
" <td>-73.98481</td>\n",
" <td>40.76782,-73.98481</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" camis dba boro address \\\n",
"0 30075445 Morris Park Bake Shop Bronx 1007 Morris Park Ave \n",
"1 30112340 Wendy's Brooklyn 469 Flatbush Ave \n",
"2 30191841 Dj Reynolds Pub and Restaurant Manhattan 351 W 57th St \n",
"\n",
" zipcode neighborhood phone cuisine_description food_type \\\n",
"0 10462 Morris Park 7188924968 Bakery Bakery \n",
"1 11225 Prospect Park 7182875005 Hamburgers Hamburgers \n",
"2 10019 Hell's Kitchen 2122452912 Irish Irish \n",
"\n",
" inspection_date violation_code grade score latitude longitude \\\n",
"0 2018-05-11T00:00:00 08C 10F A 5.0 40.84846 -73.85624 \n",
"1 2018-03-13T00:00:00 04L 08A 10B A 12.0 40.66313 -73.96232 \n",
"2 2018-05-16T00:00:00 04L 08A 10F A 12.0 40.76782 -73.98481 \n",
"\n",
" geo_location \n",
"0 40.84846,-73.85624 \n",
"1 40.66313,-73.96232 \n",
"2 40.76782,-73.98481 "
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Using the SDK\n",
"dataset = public.datasets.get('d8c29d0d-f283-4eb5-b4d4-460c9779d05d')\n",
"df = dataset.current_snapshot.export_dataframe()\n",
"df.head(3)"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>camis</th>\n",
" <th>dba</th>\n",
" <th>boro</th>\n",
" <th>address</th>\n",
" <th>zipcode</th>\n",
" <th>neighborhood</th>\n",
" <th>phone</th>\n",
" <th>cuisine_description</th>\n",
" <th>food_type</th>\n",
" <th>inspection_date</th>\n",
" <th>violation_code</th>\n",
" <th>grade</th>\n",
" <th>score</th>\n",
" <th>latitude</th>\n",
" <th>longitude</th>\n",
" <th>geo_location</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>30075445</td>\n",
" <td>Morris Park Bake Shop</td>\n",
" <td>Bronx</td>\n",
" <td>1007 Morris Park Ave</td>\n",
" <td>10462</td>\n",
" <td>Morris Park</td>\n",
" <td>7188924968</td>\n",
" <td>Bakery</td>\n",
" <td>Bakery</td>\n",
" <td>2018-05-11T00:00:00</td>\n",
" <td>08C 10F</td>\n",
" <td>A</td>\n",
" <td>5.0</td>\n",
" <td>40.84846</td>\n",
" <td>-73.85624</td>\n",
" <td>40.84846,-73.85624</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>30112340</td>\n",
" <td>Wendy's</td>\n",
" <td>Brooklyn</td>\n",
" <td>469 Flatbush Ave</td>\n",
" <td>11225</td>\n",
" <td>Prospect Park</td>\n",
" <td>7182875005</td>\n",
" <td>Hamburgers</td>\n",
" <td>Hamburgers</td>\n",
" <td>2018-03-13T00:00:00</td>\n",
" <td>04L 08A 10B</td>\n",
" <td>A</td>\n",
" <td>12.0</td>\n",
" <td>40.66313</td>\n",
" <td>-73.96232</td>\n",
" <td>40.66313,-73.96232</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>30191841</td>\n",
" <td>Dj Reynolds Pub and Restaurant</td>\n",
" <td>Manhattan</td>\n",
" <td>351 W 57th St</td>\n",
" <td>10019</td>\n",
" <td>Hell's Kitchen</td>\n",
" <td>2122452912</td>\n",
" <td>Irish</td>\n",
" <td>Irish</td>\n",
" <td>2018-05-16T00:00:00</td>\n",
" <td>04L 08A 10F</td>\n",
" <td>A</td>\n",
" <td>12.0</td>\n",
" <td>40.76782</td>\n",
" <td>-73.98481</td>\n",
" <td>40.76782,-73.98481</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" camis dba boro address \\\n",
"0 30075445 Morris Park Bake Shop Bronx 1007 Morris Park Ave \n",
"1 30112340 Wendy's Brooklyn 469 Flatbush Ave \n",
"2 30191841 Dj Reynolds Pub and Restaurant Manhattan 351 W 57th St \n",
"\n",
" zipcode neighborhood phone cuisine_description food_type \\\n",
"0 10462 Morris Park 7188924968 Bakery Bakery \n",
"1 11225 Prospect Park 7182875005 Hamburgers Hamburgers \n",
"2 10019 Hell's Kitchen 2122452912 Irish Irish \n",
"\n",
" inspection_date violation_code grade score latitude longitude \\\n",
"0 2018-05-11T00:00:00 08C 10F A 5.0 40.84846 -73.85624 \n",
"1 2018-03-13T00:00:00 04L 08A 10B A 12.0 40.66313 -73.96232 \n",
"2 2018-05-16T00:00:00 04L 08A 10F A 12.0 40.76782 -73.98481 \n",
"\n",
" geo_location \n",
"0 40.84846,-73.85624 \n",
"1 40.66313,-73.96232 \n",
"2 40.76782,-73.98481 "
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Using the API\n",
"import io\n",
"dataset = requests.get('https://public.enigma.com/api/datasets/d8c29d0d-f283-4eb5-b4d4-460c9779d05d').json()\n",
"response = requests.get('https://public.enigma.com/api/export/{}'.format(dataset['current_snapshot']['id']), headers=headers).content\n",
"df = pd.read_csv(io.StringIO(response.decode('utf-8')))\n",
"df.head(3) "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Get all snapshot field names"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['CAMIS ID No.', 'Business Name', 'Borough', 'Address', 'Zipcode', 'Neighborhood', 'Business Phone', 'Cuisine Description', 'Food Type', 'Inspection Date', 'Violation Code', 'Restaurant Grade', 'Inspection Score', 'Latitude', 'Longitude', 'Geo Location']\n"
]
}
],
"source": [
"# Using the SDK\n",
"dataset = public.datasets.get('d8c29d0d-f283-4eb5-b4d4-460c9779d05d', row_limit=3)\n",
"fields = [field.display_name for field in dataset.current_snapshot.fields]\n",
"print(fields)"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['CAMIS ID No.', 'Business Name', 'Borough', 'Address', 'Zipcode', 'Neighborhood', 'Business Phone', 'Cuisine Description', 'Food Type', 'Inspection Date', 'Violation Code', 'Restaurant Grade', 'Inspection Score', 'Latitude', 'Longitude', 'Geo Location']\n"
]
}
],
"source": [
"# Using the API\n",
"dataset = requests.get('https://public.enigma.com/api/datasets/d8c29d0d-f283-4eb5-b4d4-460c9779d05d?row_limit=3').json()\n",
"fields = [field['display_name'] for field in dataset['current_snapshot']['fields']]\n",
"print(fields)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### GET /datasets/{id}/snapshots\n",
"#### Get the IDs of all snapshots created after July 1, 2018 for the specified dataset"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [],
"source": [
"# No SDK function?"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['f21b2014-5332-493d-9725-dee65e6fafb2', 'c4e2c92a-aa04-4f39-a805-0f144b9a5e6e']\n"
]
}
],
"source": [
"# Using the API\n",
"params = {'filter': 'created_at>2018-07-01'}\n",
"snapshots = requests.get('https://public.enigma.com/api/datasets/d8c29d0d-f283-4eb5-b4d4-460c9779d05d/snapshots/', params=params).json()\n",
"ids = [snapshot['id'] for snapshot in snapshots]\n",
"print(ids)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Get /snapshots/id\n",
"#### Get the row count for the specified snapshot"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"24940\n"
]
}
],
"source": [
"# Using the SDK\n",
"snapshot = public.snapshots.get('c4e2c92a-aa04-4f39-a805-0f144b9a5e6e')\n",
"print(snapshot.row_count)"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"24940\n"
]
}
],
"source": [
"# Using the API\n",
"snapshot = requests.get('https://public.enigma.com/api/snapshots/c4e2c92a-aa04-4f39-a805-0f144b9a5e6e').json()\n",
"print(snapshot['row_count'])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Get rows 11-13 of the specified snapshot when rows are ordered by the specified column"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['50007884', '(library) Four & Twenty Blackbirds', 'Brooklyn', '10 Grand Army Plz', '11238', 'Prospect Heights', '3475743474', 'Café/Coffee/Tea', 'Coffee and Tea', '2018-04-17T00:00:00', '02C 09C 10F', 'A', '12.0', '40.673809999999996', '-73.96759', {'lat': 40.67381, 'lng': -73.96759}]\n",
"['50059670', '+ 81 Gallery New York', 'Manhattan', '167 Elizabeth St', '10012', 'Nolita', '6469984386', 'Café/Coffee/Tea', 'Coffee and Tea', '2018-05-03T00:00:00', '10F', 'A', '2.0', '40.721', '-73.99508', {'lat': 40.721, 'lng': -73.99508}]\n",
"['41322152', '1 2 3 Burger Shot Beer', 'Manhattan', '738 10th Ave', '10019', \"Hell's Kitchen\", '2123150123', 'American', 'American', '2017-12-08T00:00:00', '06C 09C 10B 10F', 'A', '12.0', '40.76477', '-73.99136', {'lat': 40.76477, 'lng': -73.99136}]\n"
]
}
],
"source": [
"# Using the SDK\n",
"snapshot = public.snapshots.get('c4e2c92a-aa04-4f39-a805-0f144b9a5e6e', row_limit=3, row_offset=10, row_sort='dba')\n",
"for row in snapshot.table_rows.rows:\n",
" print(row)"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['50007884', '(library) Four & Twenty Blackbirds', 'Brooklyn', '10 Grand Army Plz', '11238', 'Prospect Heights', '3475743474', 'Café/Coffee/Tea', 'Coffee and Tea', '2018-04-17T00:00:00', '02C 09C 10F', 'A', '12.0', '40.673809999999996', '-73.96759', {'lat': 40.67381, 'lng': -73.96759}]\n",
"['50059670', '+ 81 Gallery New York', 'Manhattan', '167 Elizabeth St', '10012', 'Nolita', '6469984386', 'Café/Coffee/Tea', 'Coffee and Tea', '2018-05-03T00:00:00', '10F', 'A', '2.0', '40.721', '-73.99508', {'lat': 40.721, 'lng': -73.99508}]\n",
"['41322152', '1 2 3 Burger Shot Beer', 'Manhattan', '738 10th Ave', '10019', \"Hell's Kitchen\", '2123150123', 'American', 'American', '2017-12-08T00:00:00', '06C 09C 10B 10F', 'A', '12.0', '40.76477', '-73.99136', {'lat': 40.76477, 'lng': -73.99136}]\n"
]
}
],
"source": [
"# Using the API\n",
"params = {'row_limit': 3, 'row_offset': 10, 'row_sort': 'dba'}\n",
"snapshot = requests.get('https://public.enigma.com/api/snapshots/c4e2c92a-aa04-4f39-a805-0f144b9a5e6e', params=params).json()\n",
"for row in snapshot['table_rows']['rows']:\n",
" print(row)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Count the number of rows where certain columns have specific values"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"437"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Using the API\n",
"params = {'query_mode': 'advanced', 'query': '(boro:(Brooklyn) && cuisine_description:(Pizza))', 'row_limit': 1}\n",
"snapshot = requests.get('https://public.enigma.com/api/snapshots/c4e2c92a-aa04-4f39-a805-0f144b9a5e6e', params=params).json()\n",
"snapshot['table_rows']['count']"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"437"
]
},
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Using the SDK\n",
"advanced_query = '(boro:(Brooklyn) && cuisine_description:(Pizza))'\n",
"snapshot = public.snapshots.get('c4e2c92a-aa04-4f39-a805-0f144b9a5e6e', row_limit=1, query_mode='advanced', query=advanced_query)\n",
"snapshot.table_rows.count"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### GET /tags/\n",
"#### Get all tags"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['agriculture', 'art', 'buildings', 'companies', 'compliance', 'demographics', 'energy', 'health', 'politics', 'transit', 'restaurant inspections', 'immigration']\n"
]
}
],
"source": [
"# Using the SDK\n",
"tags = public.tags.list()\n",
"tag_names = [tag.name for tag in tags]\n",
"print(tag_names)"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['agriculture', 'art', 'buildings', 'companies', 'compliance', 'demographics', 'energy', 'health', 'politics', 'transit', 'restaurant inspections', 'immigration']\n"
]
}
],
"source": [
"# Using the API\n",
"tags = requests.get('https://public.enigma.com/api/tags/').json()\n",
"tag_names = [tag['name'] for tag in tags]\n",
"print(tag_names)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Get all collections with the specified tag"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[<Collection 'Cooper Hewitt, Smithsonian Design Museum'>, <Collection 'The Tate Collection'>, <Collection 'Museum of Modern Art (MoMA)'>]"
]
},
"execution_count": 34,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Using the SDK\n",
"tags = public.tags.list()\n",
"tags[1].collections()"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [],
"source": [
"# Not possible using GET /tags/ API endpoint\n",
"# Instead, use GET /collections/?has_tag=<tag_name>"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Get all datasets with the specified tag"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[<Dataset 'Institute of Museum and Library Services - Museum Universe'>, <Dataset 'The Metropolitan Museum of Art - Objects'>, <Dataset 'UN Commodity Trade Statistics - Works of Art'>, <Dataset 'University of Pennsylvania Museum of Archaeology and Anthropology - Objects'>]"
]
},
"execution_count": 36,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Using the SDK\n",
"tags = public.tags.list()\n",
"tags[1].datasets()"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"outputs": [],
"source": [
"# Not possible using GET /tags/ API endpoint\n",
"# Instead, use GET /datasets/?has_tag=<tag_name>"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### POST /collections/\n",
"#### Create a new collection"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'SDK Test Collection'"
]
},
"execution_count": 38,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Using the SDK\n",
"parent = {'id': '0de52fad-7e7b-40e1-83c2-3cefd6d7edfc'}\n",
"collection = public.collections.create(parent_collection=parent, display_name='SDK Test Collection')\n",
"collection.display_name"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'API Test Collection'"
]
},
"execution_count": 39,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Using the API\n",
"payload = {'parent_collection': {'id': '0de52fad-7e7b-40e1-83c2-3cefd6d7edfc'}, 'display_name': 'API Test Collection'}\n",
"collection = requests.post('https://public.enigma.com/api/collections/', json=payload, headers=headers).json()\n",
"collection['display_name']"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### POST /datasets/\n",
"#### Create a new dataset"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'SDK Test Dataset'"
]
},
"execution_count": 41,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Using the SDK\n",
"parent = {'id': 'c4f70b83-14e9-47c7-b857-37387476ac14'}\n",
"dataset = public.datasets.create(parent_collection=parent, display_name='SDK Test Dataset')\n",
"dataset.display_name"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'API Test Dataset'"
]
},
"execution_count": 42,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Using the API\n",
"payload = {'parent_collection': {'id': 'c3c4ffe6-8d59-47c3-aa29-a6d310f8742b'}, 'display_name': 'API Test Dataset'}\n",
"dataset = requests.post('https://public.enigma.com/api/datasets/', json=payload, headers=headers).json()\n",
"dataset['display_name']"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### POST /datasets/id/snapshots/\n",
"#### Post a snapshot to an existing dataset"
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"INGESTING\n",
"INGESTING\n",
"INGESTING\n",
"SUCCESS\n"
]
}
],
"source": [
"# Using the SDK\n",
"payload = {\n",
" \"fields\": [\n",
" {\n",
" \"data_type\": \"string\",\n",
" \"name\": \"field1\"\n",
" },\n",
" {\n",
" \"data_type\": \"string\",\n",
" \"name\": \"field2\"\n",
" },\n",
" {\n",
" \"data_type\": \"string\",\n",
" \"name\": \"field3\"\n",
" }\n",
" ],\n",
" \"data_url\": \"https://docs.enigma.com/public/downloads/snapshot.csv\"\n",
"}\n",
"dataset = public.datasets.get('4f5af1de-5ca9-4dd7-af8c-ccd0e292f46c')\n",
"snapshot = dataset.create_snapshot(payload)\n",
"print(snapshot.ingest_status.state)\n",
"\n",
"# Check status\n",
"while (snapshot.refresh().ingest_status.state != 'SUCCESS'):\n",
" time.sleep(5)\n",
" print(snapshot.refresh().ingest_status.state)"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"INGESTING\n",
"SUCCESS\n"
]
}
],
"source": [
"# Using the API\n",
"payload = json.dumps({\n",
" \"fields\": [\n",
" {\n",
" \"data_type\": \"string\",\n",
" \"name\": \"field1\"\n",
" },\n",
" {\n",
" \"data_type\": \"string\",\n",
" \"name\": \"field2\"\n",
" },\n",
" {\n",
" \"data_type\": \"string\",\n",
" \"name\": \"field3\"\n",
" }\n",
" ],\n",
" \"data_url\": \"https://docs.enigma.com/public/downloads/snapshot.csv\"\n",
"})\n",
"files = {'snapshot': ('snapshot', payload, 'application/json')}\n",
"url = 'https://public.enigma.com/api/datasets/4fd52f56-cbed-4632-8e7f-6598163b2aca/snapshots/'\n",
"snapshot = requests.post(url, files=files, headers=headers).json()\n",
"print(snapshot['ingest_status']['state'])\n",
"\n",
"# Check status\n",
"while (snapshot['ingest_status']['state'] != 'SUCCESS'):\n",
" time.sleep(5)\n",
" snapshot = requests.get('https://public.enigma.com/api/snapshots/{}'.format(snapshot['id']), headers=headers).json()\n",
" print(snapshot['ingest_status']['state'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment