drorata/gist:a8eccd3e07c2f72970b2

## gistfile1.json
{
 "metadata": {
  "name": "",
  "signature": "sha256:d5b3e958f7624194296f7f343c6dd48ae49eb42a47ccc694f35bc575a2bd23d7"
 },
 "nbformat": 3,
 "nbformat_minor": 0,
 "worksheets": [
  {
   "cells": [
    {
     "cell_type": "heading",
     "level": 1,
     "metadata": {},
     "source": [
      "Processing elasticsearch buckets"
     ]
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "When using the 'terms' aggregation of Elasticsearch, the returned value is a list of buckets. Something like:"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "x = [\n",
      "    {\n",
      "        \"key\": \"John\",\n",
      "        \"doc_count\": 174736\n",
      "    },\n",
      "    {\n",
      "        \"key\": \"Martin\",\n",
      "        \"doc_count\": 37789\n",
      "    },\n",
      "    {\n",
      "        \"key\": \"Lev\",\n",
      "        \"doc_count\": 10261\n",
      "    },\n",
      "    {\n",
      "        \"key\": \"Joel\",\n",
      "        \"doc_count\": 8638\n",
      "    },\n",
      "    {\n",
      "        \"key\": \"Smith\",\n",
      "        \"doc_count\": 6672\n",
      "    }\n",
      "]"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 1
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "This output might be hard to work with. It would be nice to convert it into something more friendly."
     ]
    },
    {
     "cell_type": "heading",
     "level": 6,
     "metadata": {},
     "source": [
      "Using <code>tuple</code>s (you can replace tuples with <code>list</code>s):"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "tuple(element for tup in tuple((dic['key'],dic['doc_count']) for dic in x) for element in tup)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 2,
       "text": [
        "('John', 174736, 'Martin', 37789, 'Lev', 10261, 'Joel', 8638, 'Smith', 6672)"
       ]
      }
     ],
     "prompt_number": 2
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "tuple((dic['key'], dic['doc_count']) for dic in x)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 3,
       "text": [
        "(('John', 174736),\n",
        " ('Martin', 37789),\n",
        " ('Lev', 10261),\n",
        " ('Joel', 8638),\n",
        " ('Smith', 6672))"
       ]
      }
     ],
     "prompt_number": 3
    },
    {
     "cell_type": "heading",
     "level": 6,
     "metadata": {},
     "source": [
      "Using dictionaries"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "{dic['key']: dic['doc_count'] for dic in x}"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 4,
       "text": [
        "{'Joel': 8638, 'John': 174736, 'Lev': 10261, 'Martin': 37789, 'Smith': 6672}"
       ]
      }
     ],
     "prompt_number": 4
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "Note that once switching to a dictionary the order may be broken! This can be tackled using Pandas; see next."
     ]
    },
    {
     "cell_type": "heading",
     "level": 6,
     "metadata": {},
     "source": [
      "Sorting using Pandas"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "import pandas as pd"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 5
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "pd.Series(\n",
      "    {dic['key']: dic['doc_count'] for dic in x}\n",
      ").order(ascending=False)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 6,
       "text": [
        "John      174736\n",
        "Martin     37789\n",
        "Lev        10261\n",
        "Joel        8638\n",
        "Smith       6672\n",
        "dtype: int64"
       ]
      }
     ],
     "prompt_number": 6
    }
   ],
   "metadata": {}
  }
 ]
}
	{
	"metadata": {
	"name": "",
	"signature": "sha256:d5b3e958f7624194296f7f343c6dd48ae49eb42a47ccc694f35bc575a2bd23d7"
	},
	"nbformat": 3,
	"nbformat_minor": 0,
	"worksheets": [
	{
	"cells": [
	{
	"cell_type": "heading",
	"level": 1,
	"metadata": {},
	"source": [
	"Processing elasticsearch buckets"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"When using the 'terms' aggregation of Elasticsearch, the returned value is a list of buckets. Something like:"
	]
	},
	{
	"cell_type": "code",
	"collapsed": false,
	"input": [
	"x = [\n",
	" {\n",
	" \"key\": \"John\",\n",
	" \"doc_count\": 174736\n",
	" },\n",
	" {\n",
	" \"key\": \"Martin\",\n",
	" \"doc_count\": 37789\n",
	" },\n",
	" {\n",
	" \"key\": \"Lev\",\n",
	" \"doc_count\": 10261\n",
	" },\n",
	" {\n",
	" \"key\": \"Joel\",\n",
	" \"doc_count\": 8638\n",
	" },\n",
	" {\n",
	" \"key\": \"Smith\",\n",
	" \"doc_count\": 6672\n",
	" }\n",
	"]"
	],
	"language": "python",
	"metadata": {},
	"outputs": [],
	"prompt_number": 1
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"This output might be hard to work with. It would be nice to convert it into something more friendly."
	]
	},
	{
	"cell_type": "heading",
	"level": 6,
	"metadata": {},
	"source": [
	"Using <code>tuple</code>s (you can replace tuples with <code>list</code>s):"
	]
	},
	{
	"cell_type": "code",
	"collapsed": false,
	"input": [
	"tuple(element for tup in tuple((dic['key'],dic['doc_count']) for dic in x) for element in tup)"
	],
	"language": "python",
	"metadata": {},
	"outputs": [
	{
	"metadata": {},
	"output_type": "pyout",
	"prompt_number": 2,
	"text": [
	"('John', 174736, 'Martin', 37789, 'Lev', 10261, 'Joel', 8638, 'Smith', 6672)"
	]
	}
	],
	"prompt_number": 2
	},
	{
	"cell_type": "code",
	"collapsed": false,
	"input": [
	"tuple((dic['key'], dic['doc_count']) for dic in x)"
	],
	"language": "python",
	"metadata": {},
	"outputs": [
	{
	"metadata": {},
	"output_type": "pyout",
	"prompt_number": 3,
	"text": [
	"(('John', 174736),\n",
	" ('Martin', 37789),\n",
	" ('Lev', 10261),\n",
	" ('Joel', 8638),\n",
	" ('Smith', 6672))"
	]
	}
	],
	"prompt_number": 3
	},
	{
	"cell_type": "heading",
	"level": 6,
	"metadata": {},
	"source": [
	"Using dictionaries"
	]
	},
	{
	"cell_type": "code",
	"collapsed": false,
	"input": [
	"{dic['key']: dic['doc_count'] for dic in x}"
	],
	"language": "python",
	"metadata": {},
	"outputs": [
	{
	"metadata": {},
	"output_type": "pyout",
	"prompt_number": 4,
	"text": [
	"{'Joel': 8638, 'John': 174736, 'Lev': 10261, 'Martin': 37789, 'Smith': 6672}"
	]
	}
	],
	"prompt_number": 4
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"Note that once switching to a dictionary the order may be broken! This can be tackled using Pandas; see next."
	]
	},
	{
	"cell_type": "heading",
	"level": 6,
	"metadata": {},
	"source": [
	"Sorting using Pandas"
	]
	},
	{
	"cell_type": "code",
	"collapsed": false,
	"input": [
	"import pandas as pd"
	],
	"language": "python",
	"metadata": {},
	"outputs": [],
	"prompt_number": 5
	},
	{
	"cell_type": "code",
	"collapsed": false,
	"input": [
	"pd.Series(\n",
	" {dic['key']: dic['doc_count'] for dic in x}\n",
	").order(ascending=False)"
	],
	"language": "python",
	"metadata": {},
	"outputs": [
	{
	"metadata": {},
	"output_type": "pyout",
	"prompt_number": 6,
	"text": [
	"John 174736\n",
	"Martin 37789\n",
	"Lev 10261\n",
	"Joel 8638\n",
	"Smith 6672\n",
	"dtype: int64"
	]
	}
	],
	"prompt_number": 6
	}
	],
	"metadata": {}
	}
	]
	}