Navigation Menu

Skip to content

Instantly share code, notes, and snippets.

@tilarids
Created September 10, 2016 15:19
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tilarids/7d01bdc205423998796ae81ef0b7e394 to your computer and use it in GitHub Desktop.
Save tilarids/7d01bdc205423998796ae81ef0b7e394 to your computer and use it in GitHub Desktop.
JSON + Pandas query demo
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"import json\n",
"import glob\n",
"\n",
"data = [json.load(open(x)) for x in glob.glob('/tmp/tenders/*/*.json')]"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import itertools\n",
"import pandas as pd\n",
"\n",
"def get_field(node, path):\n",
" if not path:\n",
" yield node\n",
" return\n",
" value = node.get(path[0], [])\n",
" if isinstance(value, list):\n",
" for v in value:\n",
" for x in get_field(v, path[1:]):\n",
" yield x\n",
" else:\n",
" for x in get_field(value, path[1:]):\n",
" yield x\n",
" \n",
"def query(data, *query_elems):\n",
" paths = [x.split('.') for x in query_elems]\n",
" def generator():\n",
" for elem in data:\n",
" row = []\n",
" for path in paths:\n",
" row.append(get_field(elem, path))\n",
" for x in itertools.product(*row):\n",
" yield x\n",
" return pd.DataFrame(generator(), columns=query_elems)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"q2 = query(data, 'tenderID', 'value.amount', 'status', 'bids.tenderers.name', 'bids.value.amount', \n",
" 'bids.tenderers.identifier.id', 'awards.value.amount')"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>tenderID</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>UA-2015-03-06-000009</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>UA-2015-03-17-000014</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>UA-2015-03-17-000019</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>UA-2015-03-30-000001</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>UA-2015-04-06-000007</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>UA-2015-04-16-000015</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>UA-2015-04-17-000008</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>UA-2015-04-28-000003</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>UA-2015-04-28-000013</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>UA-2015-04-28-000033</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>UA-2015-04-29-000002</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>UA-2015-05-05-000024</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>UA-2015-05-05-000026</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>UA-2015-05-06-000023</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>UA-2015-05-07-000011</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>UA-2015-05-08-000006</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>UA-2015-05-08-000011</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>UA-2015-05-12-000001</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>UA-2015-05-12-000009</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>UA-2015-05-13-000001</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20</th>\n",
" <td>UA-2015-05-13-000002</td>\n",
" </tr>\n",
" <tr>\n",
" <th>21</th>\n",
" <td>UA-2015-05-14-000002</td>\n",
" </tr>\n",
" <tr>\n",
" <th>22</th>\n",
" <td>UA-2015-05-14-000025</td>\n",
" </tr>\n",
" <tr>\n",
" <th>23</th>\n",
" <td>UA-2015-05-14-000026</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24</th>\n",
" <td>UA-2015-05-15-000001</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25</th>\n",
" <td>UA-2015-05-15-000002</td>\n",
" </tr>\n",
" <tr>\n",
" <th>26</th>\n",
" <td>UA-2015-05-18-000001</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27</th>\n",
" <td>UA-2015-05-18-000024</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28</th>\n",
" <td>UA-2015-05-18-000026</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29</th>\n",
" <td>UA-2015-05-18-000027</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7690</th>\n",
" <td>UA-2016-02-25-000246-b</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7691</th>\n",
" <td>UA-2016-02-25-000307-b</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7692</th>\n",
" <td>UA-2016-02-25-000338-b</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7693</th>\n",
" <td>UA-2016-02-25-000344-b</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7694</th>\n",
" <td>UA-2016-02-25-000377-b</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7695</th>\n",
" <td>UA-2016-02-26-000089-b</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7696</th>\n",
" <td>UA-2016-02-26-000098-c</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7697</th>\n",
" <td>UA-2016-02-26-000135-b</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7698</th>\n",
" <td>UA-2016-02-26-000166-b</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7699</th>\n",
" <td>UA-2016-02-26-000315-b</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7700</th>\n",
" <td>UA-2016-02-26-000329-b</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7701</th>\n",
" <td>UA-2016-02-26-000331-b</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7702</th>\n",
" <td>UA-2016-02-26-000332-b</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7703</th>\n",
" <td>UA-2016-02-26-000334-b</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7704</th>\n",
" <td>UA-2016-02-26-000343-b</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7705</th>\n",
" <td>UA-2016-02-28-000001-b</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7706</th>\n",
" <td>UA-2016-02-29-000086-b</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7707</th>\n",
" <td>UA-2016-02-29-000087-c</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7708</th>\n",
" <td>UA-2016-02-29-000131-c</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7709</th>\n",
" <td>UA-2016-02-29-000150-a</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7710</th>\n",
" <td>UA-2016-03-01-000110-a</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7711</th>\n",
" <td>UA-2016-03-01-000135-c</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7712</th>\n",
" <td>UA-2016-03-01-000195-c</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7713</th>\n",
" <td>UA-2016-03-02-000039-c</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7714</th>\n",
" <td>UA-2016-03-02-000051-a</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7715</th>\n",
" <td>UA-2016-03-02-000070-c</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7716</th>\n",
" <td>UA-2016-03-02-000119-c</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7717</th>\n",
" <td>UA-2016-03-02-000120-b</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7718</th>\n",
" <td>UA-2016-03-02-000266-c</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7719</th>\n",
" <td>UA-2016-03-03-000088-b</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>7720 rows × 1 columns</p>\n",
"</div>"
],
"text/plain": [
" tenderID\n",
"0 UA-2015-03-06-000009\n",
"1 UA-2015-03-17-000014\n",
"2 UA-2015-03-17-000019\n",
"3 UA-2015-03-30-000001\n",
"4 UA-2015-04-06-000007\n",
"5 UA-2015-04-16-000015\n",
"6 UA-2015-04-17-000008\n",
"7 UA-2015-04-28-000003\n",
"8 UA-2015-04-28-000013\n",
"9 UA-2015-04-28-000033\n",
"10 UA-2015-04-29-000002\n",
"11 UA-2015-05-05-000024\n",
"12 UA-2015-05-05-000026\n",
"13 UA-2015-05-06-000023\n",
"14 UA-2015-05-07-000011\n",
"15 UA-2015-05-08-000006\n",
"16 UA-2015-05-08-000011\n",
"17 UA-2015-05-12-000001\n",
"18 UA-2015-05-12-000009\n",
"19 UA-2015-05-13-000001\n",
"20 UA-2015-05-13-000002\n",
"21 UA-2015-05-14-000002\n",
"22 UA-2015-05-14-000025\n",
"23 UA-2015-05-14-000026\n",
"24 UA-2015-05-15-000001\n",
"25 UA-2015-05-15-000002\n",
"26 UA-2015-05-18-000001\n",
"27 UA-2015-05-18-000024\n",
"28 UA-2015-05-18-000026\n",
"29 UA-2015-05-18-000027\n",
"... ...\n",
"7690 UA-2016-02-25-000246-b\n",
"7691 UA-2016-02-25-000307-b\n",
"7692 UA-2016-02-25-000338-b\n",
"7693 UA-2016-02-25-000344-b\n",
"7694 UA-2016-02-25-000377-b\n",
"7695 UA-2016-02-26-000089-b\n",
"7696 UA-2016-02-26-000098-c\n",
"7697 UA-2016-02-26-000135-b\n",
"7698 UA-2016-02-26-000166-b\n",
"7699 UA-2016-02-26-000315-b\n",
"7700 UA-2016-02-26-000329-b\n",
"7701 UA-2016-02-26-000331-b\n",
"7702 UA-2016-02-26-000332-b\n",
"7703 UA-2016-02-26-000334-b\n",
"7704 UA-2016-02-26-000343-b\n",
"7705 UA-2016-02-28-000001-b\n",
"7706 UA-2016-02-29-000086-b\n",
"7707 UA-2016-02-29-000087-c\n",
"7708 UA-2016-02-29-000131-c\n",
"7709 UA-2016-02-29-000150-a\n",
"7710 UA-2016-03-01-000110-a\n",
"7711 UA-2016-03-01-000135-c\n",
"7712 UA-2016-03-01-000195-c\n",
"7713 UA-2016-03-02-000039-c\n",
"7714 UA-2016-03-02-000051-a\n",
"7715 UA-2016-03-02-000070-c\n",
"7716 UA-2016-03-02-000119-c\n",
"7717 UA-2016-03-02-000120-b\n",
"7718 UA-2016-03-02-000266-c\n",
"7719 UA-2016-03-03-000088-b\n",
"\n",
"[7720 rows x 1 columns]"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"bid_counts2 = q2[q2['status']=='complete'].groupby(['tenderID'])['bids.tenderers.name'].count()\n",
"bid_counts2[bid_counts2 == 1].reset_index()[['tenderID']]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.12"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment