joshcarter/ElasticSearch.ipynb

## ElasticSearch.ipynb
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# ElasticSearch Tests\n",
    "\n",
    "- Assumes ES running locally\n",
    "\n",
    "- Uses MediaInfo JSON test data (see below)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Imports\n",
    "from datetime import datetime\n",
    "from elasticsearch import Elasticsearch\n",
    "import pandas as pd\n",
    "from pathlib import Path\n",
    "import json, random, string, uuid, datetime\n",
    "\n",
    "def letters(length=10):\n",
    "    \"\"\"Generate a random string of fixed length \"\"\"\n",
    "    chars = string.ascii_lowercase\n",
    "    return ''.join(random.choice(chars) for i in range(length))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Load MediaInfo Test Data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "loading info from /Users/josh/Projects/jupyter_test/MediaInfoExamples/MediaInfo1.json\n",
      "- title: Back to the Boat\n",
      "loading info from /Users/josh/Projects/jupyter_test/MediaInfoExamples/MediaInfo2.json\n",
      "- title: Behind the Scenes\n",
      "loading info from /Users/josh/Projects/jupyter_test/MediaInfoExamples/MediaInfo3.json\n",
      "- title: Captain's Tour: Inside the Boat\n",
      "loading info from /Users/josh/Projects/jupyter_test/MediaInfoExamples/MediaInfo4.json\n",
      "- title: Maria's Take\n",
      "loading info from /Users/josh/Projects/jupyter_test/MediaInfoExamples/MediaInfo5.json\n",
      "- title: The Battle of the Atlantic\n",
      "loading info from /Users/josh/Projects/jupyter_test/MediaInfoExamples/MediaInfo6.json\n",
      "- title: The Perfect Boat: The Director's Cut\n",
      "loading info from /Users/josh/Projects/jupyter_test/MediaInfoExamples/MediaInfo7.json\n",
      "- title: Das Boot (1981) Director's Cut\n"
     ]
    }
   ],
   "source": [
    "# Load test data\n",
    "es = Elasticsearch()\n",
    "idx = 'test'\n",
    "base_dir = Path('/Users/josh/Projects/jupyter_test/MediaInfoExamples')\n",
    "media_info = []\n",
    "general_info = []\n",
    "\n",
    "for i in range(7):\n",
    "    filename = f\"{base_dir}/MediaInfo{i+1}.json\"\n",
    "    print(f\"loading info from {filename}\")\n",
    "    with open(filename) as f:\n",
    "        mf = json.load(f)\n",
    "        gf = mf['media']['track'][0]\n",
    "        media_info.append(mf)\n",
    "        general_info.append(gf)\n",
    "        if 'Title' in gf.keys():\n",
    "            print(f\"- title: {gf['Title']}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "inserting Back to the Boat\n",
      "inserting Behind the Scenes\n",
      "inserting Captain's Tour: Inside the Boat\n",
      "inserting Maria's Take\n",
      "inserting The Battle of the Atlantic\n",
      "inserting The Perfect Boat: The Director's Cut\n",
      "inserting Das Boot (1981) Director's Cut\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "{'_shards': {'total': 2, 'successful': 1, 'failed': 0}}"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Insert Test data into ES\n",
    "for i in range(len(general_info)):\n",
    "    gf = general_info[i]\n",
    "    print(f\"inserting {gf['Title']}\")\n",
    "    es.index(index=idx, id=i, body=gf)\n",
    "\n",
    "es.indices.refresh(index=idx)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Create Simple Test Data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'_shards': {'total': 2, 'successful': 1, 'failed': 0}}"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def insert_test_data(index, n):\n",
    "    # make test data consistent from run to run\n",
    "    random.seed(0)\n",
    "\n",
    "    # insert new data\n",
    "    for i in range(n):\n",
    "        meta = {\n",
    "            \"Title\": f\"object-{i}\",\n",
    "            \"Type\": random.choice([\"mp4\", \"aac\", \"mkv\", \"txt\"]),\n",
    "            \"FileSize\": random.randint(1000, 1000000),\n",
    "        }\n",
    "\n",
    "        es.index(index=index, id=i, body=meta)\n",
    "\n",
    "idx2 = 'test'\n",
    "insert_test_data(idx2, 1000)\n",
    "es.indices.refresh(index=idx2)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# MediaInfo Query and Search"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'Back to the Boat'"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Query by ID\n",
    "es.get(index=idx, id=0)['_source']['Title']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>score</th>\n",
       "      <th>title</th>\n",
       "      <th>size</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.849856</td>\n",
       "      <td>Back to the Boat</td>\n",
       "      <td>1138580029</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.773912</td>\n",
       "      <td>Captain's Tour: Inside the Boat</td>\n",
       "      <td>249778146</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.710427</td>\n",
       "      <td>The Perfect Boat: The Director's Cut</td>\n",
       "      <td>369443217</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      score                                 title        size\n",
       "0  0.849856                      Back to the Boat  1138580029\n",
       "1  0.773912       Captain's Tour: Inside the Boat   249778146\n",
       "2  0.710427  The Perfect Boat: The Director's Cut   369443217"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Search by title\n",
    "res = es.search(index=idx, body={\"query\": {\"match\": {\"Title\": \"Boat\"}}})\n",
    "dat = []\n",
    "for hit in res['hits']['hits']:\n",
    "    dat.append({\n",
    "        'score': hit['_score'],\n",
    "        'title': hit['_source']['Title'],\n",
    "        'size': hit['_source']['FileSize'],\n",
    "    })\n",
    "\n",
    "pd.DataFrame(dat, columns=['score', 'title', 'size'])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Simple Test Data Search"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>score</th>\n",
       "      <th>title</th>\n",
       "      <th>type</th>\n",
       "      <th>size</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1.464175</td>\n",
       "      <td>object-2</td>\n",
       "      <td>mkv</td>\n",
       "      <td>537110</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1.464175</td>\n",
       "      <td>object-4</td>\n",
       "      <td>mkv</td>\n",
       "      <td>500748</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1.464175</td>\n",
       "      <td>object-5</td>\n",
       "      <td>mkv</td>\n",
       "      <td>612720</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1.464175</td>\n",
       "      <td>object-10</td>\n",
       "      <td>mkv</td>\n",
       "      <td>954938</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1.464175</td>\n",
       "      <td>object-14</td>\n",
       "      <td>mkv</td>\n",
       "      <td>496077</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>1.464175</td>\n",
       "      <td>object-19</td>\n",
       "      <td>mkv</td>\n",
       "      <td>66304</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>1.464175</td>\n",
       "      <td>object-24</td>\n",
       "      <td>mkv</td>\n",
       "      <td>256759</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>1.464175</td>\n",
       "      <td>object-25</td>\n",
       "      <td>mkv</td>\n",
       "      <td>738822</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>1.464175</td>\n",
       "      <td>object-32</td>\n",
       "      <td>mkv</td>\n",
       "      <td>579045</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>1.464175</td>\n",
       "      <td>object-33</td>\n",
       "      <td>mkv</td>\n",
       "      <td>741883</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      score      title type    size\n",
       "0  1.464175   object-2  mkv  537110\n",
       "1  1.464175   object-4  mkv  500748\n",
       "2  1.464175   object-5  mkv  612720\n",
       "3  1.464175  object-10  mkv  954938\n",
       "4  1.464175  object-14  mkv  496077\n",
       "5  1.464175  object-19  mkv   66304\n",
       "6  1.464175  object-24  mkv  256759\n",
       "7  1.464175  object-25  mkv  738822\n",
       "8  1.464175  object-32  mkv  579045\n",
       "9  1.464175  object-33  mkv  741883"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Search by type\n",
    "res = es.search(index=idx2, body={\"query\": {\"match\": {\"Type\": \"mkv\"}}})\n",
    "dat = []\n",
    "for hit in res['hits']['hits']:\n",
    "    dat.append({\n",
    "        'score': hit['_score'],\n",
    "        'title': hit['_source']['Title'],\n",
    "        'type': hit['_source']['Type'],\n",
    "        'size': hit['_source']['FileSize'],\n",
    "    })\n",
    "\n",
    "pd.DataFrame(dat, columns=['score', 'title', 'type', 'size'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "7 entries\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>score</th>\n",
       "      <th>title</th>\n",
       "      <th>type</th>\n",
       "      <th>size</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1.0</td>\n",
       "      <td>object-66</td>\n",
       "      <td>mp4</td>\n",
       "      <td>106494</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1.0</td>\n",
       "      <td>object-146</td>\n",
       "      <td>aac</td>\n",
       "      <td>106047</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1.0</td>\n",
       "      <td>object-315</td>\n",
       "      <td>mp4</td>\n",
       "      <td>105030</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1.0</td>\n",
       "      <td>object-347</td>\n",
       "      <td>mp4</td>\n",
       "      <td>108563</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1.0</td>\n",
       "      <td>object-801</td>\n",
       "      <td>mkv</td>\n",
       "      <td>101983</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>1.0</td>\n",
       "      <td>object-814</td>\n",
       "      <td>mkv</td>\n",
       "      <td>104269</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>1.0</td>\n",
       "      <td>object-855</td>\n",
       "      <td>mkv</td>\n",
       "      <td>102247</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   score       title type    size\n",
       "0    1.0   object-66  mp4  106494\n",
       "1    1.0  object-146  aac  106047\n",
       "2    1.0  object-315  mp4  105030\n",
       "3    1.0  object-347  mp4  108563\n",
       "4    1.0  object-801  mkv  101983\n",
       "5    1.0  object-814  mkv  104269\n",
       "6    1.0  object-855  mkv  102247"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Search by length\n",
    "res = es.search(index=idx2, body={\"query\": {\"range\": {\"FileSize\": {\"gte\": 100000, \"lte\": 110000}}}})\n",
    "dat = []\n",
    "for hit in res['hits']['hits']:\n",
    "    dat.append({\n",
    "        'score': hit['_score'],\n",
    "        'title': hit['_source']['Title'],\n",
    "        'type': hit['_source']['Type'],\n",
    "        'size': hit['_source']['FileSize'],\n",
    "    })\n",
    "\n",
    "print(f'{len(dat)} entries')\n",
    "pd.DataFrame(dat, columns=['score', 'title', 'type', 'size'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'took': 3,\n",
       " 'timed_out': False,\n",
       " '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0},\n",
       " 'hits': {'total': {'value': 3, 'relation': 'eq'},\n",
       "  'max_score': 1.0,\n",
       "  'hits': [{'_shard': '[media_info2][0]',\n",
       "    '_node': 'BHkqVJoZTWCqJgx3hwbBUg',\n",
       "    '_index': 'media_info2',\n",
       "    '_type': '_doc',\n",
       "    '_id': '801',\n",
       "    '_score': 1.0,\n",
       "    '_source': {'Title': 'object-801', 'Type': 'mkv', 'FileSize': 101983},\n",
       "    '_explanation': {'value': 1.0,\n",
       "     'description': 'FileSize:[100000 TO 105000]',\n",
       "     'details': []}},\n",
       "   {'_shard': '[media_info2][0]',\n",
       "    '_node': 'BHkqVJoZTWCqJgx3hwbBUg',\n",
       "    '_index': 'media_info2',\n",
       "    '_type': '_doc',\n",
       "    '_id': '814',\n",
       "    '_score': 1.0,\n",
       "    '_source': {'Title': 'object-814', 'Type': 'mkv', 'FileSize': 104269},\n",
       "    '_explanation': {'value': 1.0,\n",
       "     'description': 'FileSize:[100000 TO 105000]',\n",
       "     'details': []}},\n",
       "   {'_shard': '[media_info2][0]',\n",
       "    '_node': 'BHkqVJoZTWCqJgx3hwbBUg',\n",
       "    '_index': 'media_info2',\n",
       "    '_type': '_doc',\n",
       "    '_id': '855',\n",
       "    '_score': 1.0,\n",
       "    '_source': {'Title': 'object-855', 'Type': 'mkv', 'FileSize': 102247},\n",
       "    '_explanation': {'value': 1.0,\n",
       "     'description': 'FileSize:[100000 TO 105000]',\n",
       "     'details': []}}]}}"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Explain search by length\n",
    "es.search(index=idx2, body={\"explain\": True, \"size\": 25, \"query\": {\"range\": {\"FileSize\": {\"gte\": 100000, \"lte\": 105000}}}})"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Leftovers"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Alternate display using tabulate package\n",
    "# from IPython.display import HTML, display\n",
    "# import tabulate\n",
    "# \n",
    "# res = es.search(index=idx, body={\"query\": {\"match\": {\"Title\": \"Boat\"}}})\n",
    "# dat = []\n",
    "# for hit in res['hits']['hits']:\n",
    "#    dat.append({\n",
    "#        'score': hit['_score'],\n",
    "#        'title': hit['_source']['Title'],\n",
    "#        'size': hit['_source']['FileSize'],\n",
    "#    })\n",
    "#\n",
    "# display(HTML(tabulate.tabulate(dat, tablefmt='html')))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'2019-01-25T00:00:00'"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "datetime.datetime(2019,1,random.randint(1,30)).isoformat()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'pvzljmteydmlqphuwlulnilmyywjdpjdoelhxfkphdvmmoqosthvmqjphkqvacpkmhnbsybncigxkfdfzwlahbamraedttgcxoga'"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "letters(100)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "loading info from /Users/josh/Projects/jupyter_test/MediaInfoTemplates/mkv.json\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "'{\\n    \"media\": {\\n        \"@ref\": \"/Users/josh/Downloads/2019-05-01/Das.Boot.1981.Directors.Cut.Bluray.1080p.DTS-HD.x264-Grym/Das.Boot.1981.Directors.Cut.Bluray.1080p.DTS-HD.x264-Grym@BTNET/Das.Boot.Extras-Grym@BTNET/Back.to.the.Boat.(Doc)-Grym@BTNET/Back.to.the.Boat.(Doc)-Grym@BTNET.mkv\",\\n        \"track\": [\\n            {\\n                \"@type\": \"General\",\\n                \"UniqueID\": \"{uuid.uuid4()}\",\\n                \"VideoCount\": \"1\",\\n                \"AudioCount\": \"1\",\\n                \"MenuCount\": \"1\",\\n                \"FileExtension\": \"mkv\",\\n                \"Format\": \"Matroska\",\\n                \"Format_Version\": \"2\",\\n                \"FileSize\": \"{random.randint(10000,1000000)}\",\\n                \"Duration\": \"2688.436\",\\n                \"OverallBitRate\": \"3388082\",\\n                \"FrameRate\": \"23.976\",\\n                \"FrameCount\": \"{random.randint(100,10000)}\",\\n                \"StreamSize\": \"22656697\",\\n                \"IsStreamable\": \"Yes\",\\n                \"Title\": \"{letters(100)}\",\\n                \"Movie\": \"{letters(50)}\",\\n                \"Encoded_Date\": \"{datetime.datetime(2019,1,random.randint(1,30)).isoformat()}\",\\n                \"File_Modified_Date\": \"UTC 2013-12-08 10:09:36\",\\n                \"File_Modified_Date_Local\": \"2013-12-08 03:09:36\",\\n                \"Encoded_Application\": \"DVDFab\",\\n                \"Encoded_Library\": \"libebml v0.7.8 + libmatroska v0.8.1\"\\n            },\\n            {\\n                \"@type\": \"Video\",\\n                \"StreamOrder\": \"0\",\\n                \"ID\": \"1\",\\n                \"UniqueID\": \"329804448\",\\n                \"Format\": \"AVC\",\\n                \"Format_Profile\": \"High\",\\n                \"Format_Level\": \"4.1\",\\n                \"Format_Settings_CABAC\": \"Yes\",\\n                \"Format_Settings_RefFrames\": \"2\",\\n                \"CodecID\": \"V_MPEG4/ISO/AVC\",\\n                \"Duration\": \"2688.438\",\\n                \"BitRate\": \"2872659\",\\n                \"Width\": \"1280\",\\n                \"Height\": \"720\",\\n                \"Sampled_Width\": \"1280\",\\n                \"Sampled_Height\": \"720\",\\n                \"PixelAspectRatio\": \"1.000\",\\n                \"DisplayAspectRatio\": \"1.778\",\\n                \"FrameRate_Mode\": \"CFR\",\\n                \"FrameRate\": \"23.976\",\\n                \"FrameCount\": \"64458\",\\n                \"ColorSpace\": \"YUV\",\\n                \"ChromaSubsampling\": \"4:2:0\",\\n                \"BitDepth\": \"8\",\\n                \"ScanType\": \"Progressive\",\\n                \"Delay\": \"0.000\",\\n                \"StreamSize\": \"965370916\",\\n                \"Encoded_Library\": \"x264 - core\",\\n                \"Encoded_Library_Name\": \"x264\",\\n                \"Encoded_Library_Version\": \"core\",\\n                \"Default\": \"Yes\",\\n                \"Forced\": \"No\",\\n                \"colour_description_present\": \"Yes\",\\n                \"colour_description_present_Source\": \"Stream\",\\n                \"colour_range\": \"Limited\",\\n                \"colour_range_Source\": \"Stream\",\\n                \"colour_primaries\": \"BT.709\",\\n                \"colour_primaries_Source\": \"Stream\",\\n                \"transfer_characteristics\": \"BT.709\",\\n                \"transfer_characteristics_Source\": \"Stream\",\\n                \"matrix_coefficients\": \"BT.709\",\\n                \"matrix_coefficients_Source\": \"Stream\"\\n            },\\n            {\\n                \"@type\": \"Audio\",\\n                \"StreamOrder\": \"1\",\\n                \"ID\": \"2\",\\n                \"UniqueID\": \"857171869\",\\n                \"Format\": \"AC-3\",\\n                \"Format_Commercial_IfAny\": \"Dolby Digital\",\\n                \"Format_Settings_Endianness\": \"Big\",\\n                \"CodecID\": \"A_AC3\",\\n                \"Duration\": \"2688.436\",\\n                \"BitRate_Mode\": \"CBR\",\\n                \"BitRate\": \"448000\",\\n                \"Channels\": \"6\",\\n                \"ChannelPositions\": \"Front: L C R, Side: L R, LFE\",\\n                \"ChannelLayout\": \"L R C LFE Ls Rs\",\\n                \"SamplesPerFrame\": \"1536\",\\n                \"SamplingRate\": \"48000\",\\n                \"SamplingCount\": \"129044928\",\\n                \"FrameRate\": \"31.250\",\\n                \"BitDepth\": \"16\",\\n                \"Compression_Mode\": \"Lossy\",\\n                \"Delay\": \"0.000\",\\n                \"Delay_Source\": \"Container\",\\n                \"StreamSize\": \"150552416\",\\n                \"StreamSize_Proportion\": \"0.13223\",\\n                \"Language\": \"en\",\\n                \"ServiceKind\": \"CM\",\\n                \"Default\": \"Yes\",\\n                \"Forced\": \"No\",\\n                \"extra\": {\\n                    \"bsid\": \"8\",\\n                    \"dialnorm\": \"-31\",\\n                    \"compr\": \"-0.28\",\\n                    \"acmod\": \"7\",\\n                    \"lfeon\": \"1\",\\n                    \"dialnorm_Average\": \"-31\",\\n                    \"dialnorm_Minimum\": \"-31\",\\n                    \"compr_Average\": \"0.42\",\\n                    \"compr_Minimum\": \"-2.50\",\\n                    \"compr_Maximum\": \"1.94\",\\n                    \"compr_Count\": \"334\",\\n                    \"dynrng_Average\": \"0.25\",\\n                    \"dynrng_Minimum\": \"-2.68\",\\n                    \"dynrng_Maximum\": \"1.94\",\\n                    \"dynrng_Count\": \"556\"\\n                }\\n            },\\n            {\\n                \"@type\": \"Menu\",\\n                \"extra\": {\\n                    \"_00_00_00_000\": \"en:(01)00:00:00:000\",\\n                    \"_00_44_48_769\": \"en:(02)00:44:48:769\"\\n                }\\n            }\\n        ]\\n    }\\n}\\n'"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "base_dir = Path('/Users/josh/Projects/jupyter_test/MediaInfoTemplates')\n",
    "raw = ''\n",
    "filename = f\"{base_dir}/mkv.json\"\n",
    "print(f\"loading info from {filename}\")\n",
    "with open(filename) as f:\n",
    "    raw = f.read()\n",
    "\n",
    "# string.Formatter.format(mf)\n",
    "# mf = json.load(f)\n",
    "raw# .format(**locals())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'key': 'rec-500',\n",
       " 'letters': 'dgruaxbaaaaagaikwqabkxcanpilhwupedytitgdrwcrrckrhvocapkewubqaedycdvmdekmrlzxlotfjcfjcaibwbosfjwwlgpoyoljnfbhodarfhzidcyynijczqha',\n",
       " 'number': 500}"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Query by ID\n",
    "es.get(index=\"es_test\", id=\"rec-500\")['_source']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "10 entries\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>score</th>\n",
       "      <th>key</th>\n",
       "      <th>number</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1.0</td>\n",
       "      <td>rec-500</td>\n",
       "      <td>500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1.0</td>\n",
       "      <td>rec-501</td>\n",
       "      <td>501</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1.0</td>\n",
       "      <td>rec-502</td>\n",
       "      <td>502</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1.0</td>\n",
       "      <td>rec-503</td>\n",
       "      <td>503</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1.0</td>\n",
       "      <td>rec-504</td>\n",
       "      <td>504</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>1.0</td>\n",
       "      <td>rec-505</td>\n",
       "      <td>505</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>1.0</td>\n",
       "      <td>rec-506</td>\n",
       "      <td>506</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>1.0</td>\n",
       "      <td>rec-507</td>\n",
       "      <td>507</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>1.0</td>\n",
       "      <td>rec-508</td>\n",
       "      <td>508</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>1.0</td>\n",
       "      <td>rec-509</td>\n",
       "      <td>509</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   score      key  number\n",
       "0    1.0  rec-500     500\n",
       "1    1.0  rec-501     501\n",
       "2    1.0  rec-502     502\n",
       "3    1.0  rec-503     503\n",
       "4    1.0  rec-504     504\n",
       "5    1.0  rec-505     505\n",
       "6    1.0  rec-506     506\n",
       "7    1.0  rec-507     507\n",
       "8    1.0  rec-508     508\n",
       "9    1.0  rec-509     509"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Search by length\n",
    "res = es.search(index=\"es_test\", body={\"query\": {\"range\": {\"number\": {\"gte\": 500, \"lte\": 510}}}})\n",
    "dat = []\n",
    "for hit in res['hits']['hits']:\n",
    "    dat.append({\n",
    "        'score': hit['_score'],\n",
    "        'key': hit['_source']['key'],\n",
    "        'number': hit['_source']['number'],\n",
    "    })\n",
    "\n",
    "print(f'{len(dat)} entries')\n",
    "pd.DataFrame(dat, columns=['score', 'key', 'number'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}