Created
September 10, 2016 15:19
-
-
Save tilarids/7d01bdc205423998796ae81ef0b7e394 to your computer and use it in GitHub Desktop.
JSON + Pandas query demo
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"import json\n", | |
"import glob\n", | |
"\n", | |
"data = [json.load(open(x)) for x in glob.glob('/tmp/tenders/*/*.json')]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"import itertools\n", | |
"import pandas as pd\n", | |
"\n", | |
"def get_field(node, path):\n", | |
" if not path:\n", | |
" yield node\n", | |
" return\n", | |
" value = node.get(path[0], [])\n", | |
" if isinstance(value, list):\n", | |
" for v in value:\n", | |
" for x in get_field(v, path[1:]):\n", | |
" yield x\n", | |
" else:\n", | |
" for x in get_field(value, path[1:]):\n", | |
" yield x\n", | |
" \n", | |
"def query(data, *query_elems):\n", | |
" paths = [x.split('.') for x in query_elems]\n", | |
" def generator():\n", | |
" for elem in data:\n", | |
" row = []\n", | |
" for path in paths:\n", | |
" row.append(get_field(elem, path))\n", | |
" for x in itertools.product(*row):\n", | |
" yield x\n", | |
" return pd.DataFrame(generator(), columns=query_elems)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"q2 = query(data, 'tenderID', 'value.amount', 'status', 'bids.tenderers.name', 'bids.value.amount', \n", | |
" 'bids.tenderers.identifier.id', 'awards.value.amount')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>tenderID</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>UA-2015-03-06-000009</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>UA-2015-03-17-000014</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>UA-2015-03-17-000019</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>UA-2015-03-30-000001</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>UA-2015-04-06-000007</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>UA-2015-04-16-000015</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td>UA-2015-04-17-000008</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7</th>\n", | |
" <td>UA-2015-04-28-000003</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8</th>\n", | |
" <td>UA-2015-04-28-000013</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9</th>\n", | |
" <td>UA-2015-04-28-000033</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>10</th>\n", | |
" <td>UA-2015-04-29-000002</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>11</th>\n", | |
" <td>UA-2015-05-05-000024</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>12</th>\n", | |
" <td>UA-2015-05-05-000026</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>13</th>\n", | |
" <td>UA-2015-05-06-000023</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>14</th>\n", | |
" <td>UA-2015-05-07-000011</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>15</th>\n", | |
" <td>UA-2015-05-08-000006</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>16</th>\n", | |
" <td>UA-2015-05-08-000011</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>17</th>\n", | |
" <td>UA-2015-05-12-000001</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>18</th>\n", | |
" <td>UA-2015-05-12-000009</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>19</th>\n", | |
" <td>UA-2015-05-13-000001</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>20</th>\n", | |
" <td>UA-2015-05-13-000002</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>21</th>\n", | |
" <td>UA-2015-05-14-000002</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>22</th>\n", | |
" <td>UA-2015-05-14-000025</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>23</th>\n", | |
" <td>UA-2015-05-14-000026</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>24</th>\n", | |
" <td>UA-2015-05-15-000001</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>25</th>\n", | |
" <td>UA-2015-05-15-000002</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>26</th>\n", | |
" <td>UA-2015-05-18-000001</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>27</th>\n", | |
" <td>UA-2015-05-18-000024</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>28</th>\n", | |
" <td>UA-2015-05-18-000026</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>29</th>\n", | |
" <td>UA-2015-05-18-000027</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>...</th>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7690</th>\n", | |
" <td>UA-2016-02-25-000246-b</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7691</th>\n", | |
" <td>UA-2016-02-25-000307-b</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7692</th>\n", | |
" <td>UA-2016-02-25-000338-b</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7693</th>\n", | |
" <td>UA-2016-02-25-000344-b</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7694</th>\n", | |
" <td>UA-2016-02-25-000377-b</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7695</th>\n", | |
" <td>UA-2016-02-26-000089-b</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7696</th>\n", | |
" <td>UA-2016-02-26-000098-c</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7697</th>\n", | |
" <td>UA-2016-02-26-000135-b</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7698</th>\n", | |
" <td>UA-2016-02-26-000166-b</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7699</th>\n", | |
" <td>UA-2016-02-26-000315-b</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7700</th>\n", | |
" <td>UA-2016-02-26-000329-b</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7701</th>\n", | |
" <td>UA-2016-02-26-000331-b</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7702</th>\n", | |
" <td>UA-2016-02-26-000332-b</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7703</th>\n", | |
" <td>UA-2016-02-26-000334-b</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7704</th>\n", | |
" <td>UA-2016-02-26-000343-b</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7705</th>\n", | |
" <td>UA-2016-02-28-000001-b</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7706</th>\n", | |
" <td>UA-2016-02-29-000086-b</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7707</th>\n", | |
" <td>UA-2016-02-29-000087-c</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7708</th>\n", | |
" <td>UA-2016-02-29-000131-c</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7709</th>\n", | |
" <td>UA-2016-02-29-000150-a</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7710</th>\n", | |
" <td>UA-2016-03-01-000110-a</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7711</th>\n", | |
" <td>UA-2016-03-01-000135-c</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7712</th>\n", | |
" <td>UA-2016-03-01-000195-c</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7713</th>\n", | |
" <td>UA-2016-03-02-000039-c</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7714</th>\n", | |
" <td>UA-2016-03-02-000051-a</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7715</th>\n", | |
" <td>UA-2016-03-02-000070-c</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7716</th>\n", | |
" <td>UA-2016-03-02-000119-c</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7717</th>\n", | |
" <td>UA-2016-03-02-000120-b</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7718</th>\n", | |
" <td>UA-2016-03-02-000266-c</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7719</th>\n", | |
" <td>UA-2016-03-03-000088-b</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>7720 rows × 1 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" tenderID\n", | |
"0 UA-2015-03-06-000009\n", | |
"1 UA-2015-03-17-000014\n", | |
"2 UA-2015-03-17-000019\n", | |
"3 UA-2015-03-30-000001\n", | |
"4 UA-2015-04-06-000007\n", | |
"5 UA-2015-04-16-000015\n", | |
"6 UA-2015-04-17-000008\n", | |
"7 UA-2015-04-28-000003\n", | |
"8 UA-2015-04-28-000013\n", | |
"9 UA-2015-04-28-000033\n", | |
"10 UA-2015-04-29-000002\n", | |
"11 UA-2015-05-05-000024\n", | |
"12 UA-2015-05-05-000026\n", | |
"13 UA-2015-05-06-000023\n", | |
"14 UA-2015-05-07-000011\n", | |
"15 UA-2015-05-08-000006\n", | |
"16 UA-2015-05-08-000011\n", | |
"17 UA-2015-05-12-000001\n", | |
"18 UA-2015-05-12-000009\n", | |
"19 UA-2015-05-13-000001\n", | |
"20 UA-2015-05-13-000002\n", | |
"21 UA-2015-05-14-000002\n", | |
"22 UA-2015-05-14-000025\n", | |
"23 UA-2015-05-14-000026\n", | |
"24 UA-2015-05-15-000001\n", | |
"25 UA-2015-05-15-000002\n", | |
"26 UA-2015-05-18-000001\n", | |
"27 UA-2015-05-18-000024\n", | |
"28 UA-2015-05-18-000026\n", | |
"29 UA-2015-05-18-000027\n", | |
"... ...\n", | |
"7690 UA-2016-02-25-000246-b\n", | |
"7691 UA-2016-02-25-000307-b\n", | |
"7692 UA-2016-02-25-000338-b\n", | |
"7693 UA-2016-02-25-000344-b\n", | |
"7694 UA-2016-02-25-000377-b\n", | |
"7695 UA-2016-02-26-000089-b\n", | |
"7696 UA-2016-02-26-000098-c\n", | |
"7697 UA-2016-02-26-000135-b\n", | |
"7698 UA-2016-02-26-000166-b\n", | |
"7699 UA-2016-02-26-000315-b\n", | |
"7700 UA-2016-02-26-000329-b\n", | |
"7701 UA-2016-02-26-000331-b\n", | |
"7702 UA-2016-02-26-000332-b\n", | |
"7703 UA-2016-02-26-000334-b\n", | |
"7704 UA-2016-02-26-000343-b\n", | |
"7705 UA-2016-02-28-000001-b\n", | |
"7706 UA-2016-02-29-000086-b\n", | |
"7707 UA-2016-02-29-000087-c\n", | |
"7708 UA-2016-02-29-000131-c\n", | |
"7709 UA-2016-02-29-000150-a\n", | |
"7710 UA-2016-03-01-000110-a\n", | |
"7711 UA-2016-03-01-000135-c\n", | |
"7712 UA-2016-03-01-000195-c\n", | |
"7713 UA-2016-03-02-000039-c\n", | |
"7714 UA-2016-03-02-000051-a\n", | |
"7715 UA-2016-03-02-000070-c\n", | |
"7716 UA-2016-03-02-000119-c\n", | |
"7717 UA-2016-03-02-000120-b\n", | |
"7718 UA-2016-03-02-000266-c\n", | |
"7719 UA-2016-03-03-000088-b\n", | |
"\n", | |
"[7720 rows x 1 columns]" | |
] | |
}, | |
"execution_count": 8, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"bid_counts2 = q2[q2['status']=='complete'].groupby(['tenderID'])['bids.tenderers.name'].count()\n", | |
"bid_counts2[bid_counts2 == 1].reset_index()[['tenderID']]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 2", | |
"language": "python", | |
"name": "python2" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 2 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython2", | |
"version": "2.7.12" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 0 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment