phenders/sdk_blog_post.ipynb

## sdk_blog_post.ipynb
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "580"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import requests\n",
    "\n",
    "session = requests.Session()\n",
    "headers = {'authorization': 'Bearer ' + '<YOUR_API_KEY>'}\n",
    "session.headers.update(headers)\n",
    "\n",
    "url = 'https://public.enigma.com/api/datasets/'\n",
    "phrase = 'michael cohen'\n",
    "params = {'query': phrase, \n",
    "          'row_limit':1000, \n",
    "          'match_metadata':'false', \n",
    "          'include_serialids':'false'\n",
    "         }\n",
    "response = session.head(url, headers=headers, params=params)\n",
    "ds_count = int(response.headers.get('content-range').split(\"/\")[1])\n",
    "ds_count"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "580"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import enigma\n",
    "\n",
    "public = enigma.Public()\n",
    "public.set_auth(apikey='YOUR-API-KEY')\n",
    "\n",
    "phrase = 'michael cohen'\n",
    "datasets = public.datasets.list(\n",
    "    query=phrase, \n",
    "    row_limit=1000, \n",
    "    match_metadata=False, \n",
    "    include_serialids=False\n",
    ").all()\n",
    "len(datasets)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "24432"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "results = []\n",
    "for i in range(0, ds_count, 10):\n",
    "    headers['Range'] = 'resources={}-{}'.format(i, i + 9)\n",
    "    session.headers.update(headers)\n",
    "    response = session.get(url, headers=headers, params=params).json()\n",
    "    for dataset in response:\n",
    "        results.append([dataset['display_name']])\n",
    "        results.append(dataset['current_snapshot']['table_rows']['fields'])\n",
    "        rows = dataset['current_snapshot']['table_rows']['rows']\n",
    "        for row in rows:\n",
    "            row = [value[:100] if isinstance(value, str) else '' for value in row]\n",
    "            results.append(row)\n",
    "len(results)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "24432"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "results = []\n",
    "for dataset in datasets:\n",
    "    tableview = dataset.current_snapshot.table_rows\n",
    "    results.append([dataset.display_name] + [''] * 10)\n",
    "    results.append([field.display_name for field in tableview.fields])\n",
    "    for row in tableview:\n",
    "        row = [value[:100] if isinstance(value, str) else '' for value in row]\n",
    "        results.append(row)\n",
    "len(results)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "import string\n",
    "\n",
    "table = str.maketrans('', '', string.punctuation)\n",
    "\n",
    "results = []\n",
    "distance = 2\n",
    "\n",
    "for dataset in datasets:\n",
    "    tableview = dataset.current_snapshot.table_rows\n",
    "    first_match = True\n",
    "    for row in tableview:\n",
    "        row = [str(value) if isinstance(value, bool) else value for value in row ]\n",
    "        match = False\n",
    "        for j in range (0, len(row) - distance + 1):\n",
    "            words = (' '.join(filter(None, row[j: j + distance]))).lower().translate(table)\n",
    "            if all(s in words.split() for s in phrase.lower().split()):\n",
    "                match = True\n",
    "                break\n",
    "        if match:\n",
    "            if first_match:\n",
    "                results.append([dataset.display_name] + [''] * 10)\n",
    "                results.append([field.display_name for field in tableview.fields])\n",
    "                first_match = False\n",
    "            row = [value[:100] if isinstance(value, str) else '' for value in row ]\n",
    "            results.append(row)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "df = pd.DataFrame(results)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 1,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"580"
	]
	},
	"execution_count": 1,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"import requests\n",
	"\n",
	"session = requests.Session()\n",
	"headers = {'authorization': 'Bearer ' + '<YOUR_API_KEY>'}\n",
	"session.headers.update(headers)\n",
	"\n",
	"url = 'https://public.enigma.com/api/datasets/'\n",
	"phrase = 'michael cohen'\n",
	"params = {'query': phrase, \n",
	" 'row_limit':1000, \n",
	" 'match_metadata':'false', \n",
	" 'include_serialids':'false'\n",
	" }\n",
	"response = session.head(url, headers=headers, params=params)\n",
	"ds_count = int(response.headers.get('content-range').split(\"/\")[1])\n",
	"ds_count"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 2,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"580"
	]
	},
	"execution_count": 2,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"import enigma\n",
	"\n",
	"public = enigma.Public()\n",
	"public.set_auth(apikey='YOUR-API-KEY')\n",
	"\n",
	"phrase = 'michael cohen'\n",
	"datasets = public.datasets.list(\n",
	" query=phrase, \n",
	" row_limit=1000, \n",
	" match_metadata=False, \n",
	" include_serialids=False\n",
	").all()\n",
	"len(datasets)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 3,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"24432"
	]
	},
	"execution_count": 3,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"results = []\n",
	"for i in range(0, ds_count, 10):\n",
	" headers['Range'] = 'resources={}-{}'.format(i, i + 9)\n",
	" session.headers.update(headers)\n",
	" response = session.get(url, headers=headers, params=params).json()\n",
	" for dataset in response:\n",
	" results.append([dataset['display_name']])\n",
	" results.append(dataset['current_snapshot']['table_rows']['fields'])\n",
	" rows = dataset['current_snapshot']['table_rows']['rows']\n",
	" for row in rows:\n",
	" row = [value[:100] if isinstance(value, str) else '' for value in row]\n",
	" results.append(row)\n",
	"len(results)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 4,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"24432"
	]
	},
	"execution_count": 4,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"results = []\n",
	"for dataset in datasets:\n",
	" tableview = dataset.current_snapshot.table_rows\n",
	" results.append([dataset.display_name] + [''] * 10)\n",
	" results.append([field.display_name for field in tableview.fields])\n",
	" for row in tableview:\n",
	" row = [value[:100] if isinstance(value, str) else '' for value in row]\n",
	" results.append(row)\n",
	"len(results)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 5,
	"metadata": {},
	"outputs": [],
	"source": [
	"import string\n",
	"\n",
	"table = str.maketrans('', '', string.punctuation)\n",
	"\n",
	"results = []\n",
	"distance = 2\n",
	"\n",
	"for dataset in datasets:\n",
	" tableview = dataset.current_snapshot.table_rows\n",
	" first_match = True\n",
	" for row in tableview:\n",
	" row = [str(value) if isinstance(value, bool) else value for value in row ]\n",
	" match = False\n",
	" for j in range (0, len(row) - distance + 1):\n",
	" words = (' '.join(filter(None, row[j: j + distance]))).lower().translate(table)\n",
	" if all(s in words.split() for s in phrase.lower().split()):\n",
	" match = True\n",
	" break\n",
	" if match:\n",
	" if first_match:\n",
	" results.append([dataset.display_name] + [''] * 10)\n",
	" results.append([field.display_name for field in tableview.fields])\n",
	" first_match = False\n",
	" row = [value[:100] if isinstance(value, str) else '' for value in row ]\n",
	" results.append(row)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 6,
	"metadata": {},
	"outputs": [],
	"source": [
	"import pandas as pd\n",
	"\n",
	"df = pd.DataFrame(results)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": []
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.6.5"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}