Skip to content

Instantly share code, notes, and snippets.

@phenders
Created August 24, 2018 19:51
Show Gist options
  • Save phenders/e784d5735624368bc86e41bcd2ab54de to your computer and use it in GitHub Desktop.
Save phenders/e784d5735624368bc86e41bcd2ab54de to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"580"
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import requests\n",
"\n",
"session = requests.Session()\n",
"headers = {'authorization': 'Bearer ' + '<YOUR_API_KEY>'}\n",
"session.headers.update(headers)\n",
"\n",
"url = 'https://public.enigma.com/api/datasets/'\n",
"phrase = 'michael cohen'\n",
"params = {'query': phrase, \n",
" 'row_limit':1000, \n",
" 'match_metadata':'false', \n",
" 'include_serialids':'false'\n",
" }\n",
"response = session.head(url, headers=headers, params=params)\n",
"ds_count = int(response.headers.get('content-range').split(\"/\")[1])\n",
"ds_count"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"580"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import enigma\n",
"\n",
"public = enigma.Public()\n",
"public.set_auth(apikey='YOUR-API-KEY')\n",
"\n",
"phrase = 'michael cohen'\n",
"datasets = public.datasets.list(\n",
" query=phrase, \n",
" row_limit=1000, \n",
" match_metadata=False, \n",
" include_serialids=False\n",
").all()\n",
"len(datasets)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"24432"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"results = []\n",
"for i in range(0, ds_count, 10):\n",
" headers['Range'] = 'resources={}-{}'.format(i, i + 9)\n",
" session.headers.update(headers)\n",
" response = session.get(url, headers=headers, params=params).json()\n",
" for dataset in response:\n",
" results.append([dataset['display_name']])\n",
" results.append(dataset['current_snapshot']['table_rows']['fields'])\n",
" rows = dataset['current_snapshot']['table_rows']['rows']\n",
" for row in rows:\n",
" row = [value[:100] if isinstance(value, str) else '' for value in row]\n",
" results.append(row)\n",
"len(results)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"24432"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"results = []\n",
"for dataset in datasets:\n",
" tableview = dataset.current_snapshot.table_rows\n",
" results.append([dataset.display_name] + [''] * 10)\n",
" results.append([field.display_name for field in tableview.fields])\n",
" for row in tableview:\n",
" row = [value[:100] if isinstance(value, str) else '' for value in row]\n",
" results.append(row)\n",
"len(results)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"import string\n",
"\n",
"table = str.maketrans('', '', string.punctuation)\n",
"\n",
"results = []\n",
"distance = 2\n",
"\n",
"for dataset in datasets:\n",
" tableview = dataset.current_snapshot.table_rows\n",
" first_match = True\n",
" for row in tableview:\n",
" row = [str(value) if isinstance(value, bool) else value for value in row ]\n",
" match = False\n",
" for j in range (0, len(row) - distance + 1):\n",
" words = (' '.join(filter(None, row[j: j + distance]))).lower().translate(table)\n",
" if all(s in words.split() for s in phrase.lower().split()):\n",
" match = True\n",
" break\n",
" if match:\n",
" if first_match:\n",
" results.append([dataset.display_name] + [''] * 10)\n",
" results.append([field.display_name for field in tableview.fields])\n",
" first_match = False\n",
" row = [value[:100] if isinstance(value, str) else '' for value in row ]\n",
" results.append(row)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"\n",
"df = pd.DataFrame(results)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment