Created
October 24, 2012 17:39
-
-
Save FrancescAlted/3947559 to your computer and use it in GitHub Desktop.
Query2 for PyTables tutorial
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"metadata": { | |
"name": "Queries2" | |
}, | |
"nbformat": 3, | |
"nbformat_minor": 0, | |
"worksheets": [ | |
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"import numpy as np\n", | |
"import tables as tb" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 128 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"class Client(tb.IsDescription):\n", | |
" id = tb.IntCol()\n", | |
" name = tb.StringCol(200)\n", | |
" age = tb.Int8Col()" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 250 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"class Bill(tb.IsDescription):\n", | |
" client_id = tb.IntCol()\n", | |
" item_id = tb.IntCol()\n", | |
" price = tb.IntCol()" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 251 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"f = tb.openFile('bills.h5', 'w')" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 264 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"clients = f.createTable(f.root, 'clients', Client, 'Clients', filters=tb.Filters(1))\n", | |
"r = clients.row\n", | |
"for i in xrange(1000):\n", | |
" r['id'] = i # 1000 different clients\n", | |
" r['name'] = \"Client %d\" % i\n", | |
" r['age'] = (i % 80) + 21 # ages between 21 and 101\n", | |
" r.append()\n", | |
"clients.flush()" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 265 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"bills = f.createTable(f.root, 'bills', Bill, 'Bills', filters=tb.Filters(1))\n", | |
"r = bills.row\n", | |
"for i in xrange(1000*1000):\n", | |
" r['client_id'] = i % 1000 # less than 1000 clients\n", | |
" r['item_id'] = i % 100 # less than 100 items\n", | |
" r['price'] = i % 10 # an item cannot be more than $10\n", | |
" r.append()\n", | |
"bills.flush()" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 266 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"def query(age1, age2, pricethr):\n", | |
" totals = {}\n", | |
" for client in clients.where('(age >= %d) & (age < %d)' % (age1, age2)):\n", | |
" totals[client['name']] = 0\n", | |
" #for bill in bills.where('(client_id == cid) & (price > pricethr)', {'cid': client['id'], 'pricethr': pricethr}):\n", | |
" cid = client['id']\n", | |
" for bill in bills.where('(client_id == cid) & (price > pricethr)'):\n", | |
" totals[client['name']] += bill['item_id']\n", | |
" return dict((cid, value) for cid, value in totals.iteritems() if value > 0)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 267 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"%time totals = query(20, 30, 5)\n", | |
"print totals" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"CPU times: user 0.23 s, sys: 0.01 s, total: 0.24 s\n", | |
"Wall time: 0.25 s\n", | |
"{'Client 728': 28000, 'Client 8': 8000, 'Client 86': 86000, 'Client 87': 87000, 'Client 326': 26000, 'Client 327': 27000, 'Client 247': 47000, 'Client 246': 46000, 'Client 726': 26000, 'Client 248': 48000, 'Client 328': 28000, 'Client 88': 88000, 'Client 807': 7000, 'Client 806': 6000, 'Client 647': 47000, 'Client 646': 46000, 'Client 168': 68000, 'Client 166': 66000, 'Client 167': 67000, 'Client 648': 48000, 'Client 808': 8000, 'Client 966': 66000, 'Client 967': 67000, 'Client 888': 88000, 'Client 887': 87000, 'Client 886': 86000, 'Client 968': 68000, 'Client 7': 7000, 'Client 6': 6000, 'Client 566': 66000, 'Client 567': 67000, 'Client 568': 68000, 'Client 727': 27000, 'Client 407': 7000, 'Client 406': 6000, 'Client 408': 8000, 'Client 487': 87000, 'Client 486': 86000, 'Client 488': 88000}\n" | |
] | |
} | |
], | |
"prompt_number": 274 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"bills.cols.client_id.createCSIndex()" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "pyout", | |
"prompt_number": 269, | |
"text": [ | |
"1000000" | |
] | |
} | |
], | |
"prompt_number": 269 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"sbills = f.createTable(f.root, 'sbills', Bill, 'Sorted bills', filters=tb.Filters(1))" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 271 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"r = sbills.row\n", | |
"for b in bills.itersorted('client_id'):\n", | |
" r['client_id'] = b['client_id']\n", | |
" r['item_id'] = b['item_id']\n", | |
" r['price'] = b['price']\n", | |
" r.append()\n", | |
"sbills.flush()" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 272 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"sbills.cols.client_id.createCSIndex()\n", | |
"bills = sbills # bills reference to new table" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 273 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"f.close()" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 263 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"|" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [] | |
} | |
], | |
"metadata": {} | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment