Navigation Menu

Skip to content

Instantly share code, notes, and snippets.

@FrancescAlted
Created October 24, 2012 17:39
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save FrancescAlted/3947559 to your computer and use it in GitHub Desktop.
Save FrancescAlted/3947559 to your computer and use it in GitHub Desktop.
Query2 for PyTables tutorial
{
"metadata": {
"name": "Queries2"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": [
"import numpy as np\n",
"import tables as tb"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 128
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"class Client(tb.IsDescription):\n",
" id = tb.IntCol()\n",
" name = tb.StringCol(200)\n",
" age = tb.Int8Col()"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 250
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"class Bill(tb.IsDescription):\n",
" client_id = tb.IntCol()\n",
" item_id = tb.IntCol()\n",
" price = tb.IntCol()"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 251
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"f = tb.openFile('bills.h5', 'w')"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 264
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"clients = f.createTable(f.root, 'clients', Client, 'Clients', filters=tb.Filters(1))\n",
"r = clients.row\n",
"for i in xrange(1000):\n",
" r['id'] = i # 1000 different clients\n",
" r['name'] = \"Client %d\" % i\n",
" r['age'] = (i % 80) + 21 # ages between 21 and 101\n",
" r.append()\n",
"clients.flush()"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 265
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"bills = f.createTable(f.root, 'bills', Bill, 'Bills', filters=tb.Filters(1))\n",
"r = bills.row\n",
"for i in xrange(1000*1000):\n",
" r['client_id'] = i % 1000 # less than 1000 clients\n",
" r['item_id'] = i % 100 # less than 100 items\n",
" r['price'] = i % 10 # an item cannot be more than $10\n",
" r.append()\n",
"bills.flush()"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 266
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"def query(age1, age2, pricethr):\n",
" totals = {}\n",
" for client in clients.where('(age >= %d) & (age < %d)' % (age1, age2)):\n",
" totals[client['name']] = 0\n",
" #for bill in bills.where('(client_id == cid) & (price > pricethr)', {'cid': client['id'], 'pricethr': pricethr}):\n",
" cid = client['id']\n",
" for bill in bills.where('(client_id == cid) & (price > pricethr)'):\n",
" totals[client['name']] += bill['item_id']\n",
" return dict((cid, value) for cid, value in totals.iteritems() if value > 0)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 267
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"%time totals = query(20, 30, 5)\n",
"print totals"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"CPU times: user 0.23 s, sys: 0.01 s, total: 0.24 s\n",
"Wall time: 0.25 s\n",
"{'Client 728': 28000, 'Client 8': 8000, 'Client 86': 86000, 'Client 87': 87000, 'Client 326': 26000, 'Client 327': 27000, 'Client 247': 47000, 'Client 246': 46000, 'Client 726': 26000, 'Client 248': 48000, 'Client 328': 28000, 'Client 88': 88000, 'Client 807': 7000, 'Client 806': 6000, 'Client 647': 47000, 'Client 646': 46000, 'Client 168': 68000, 'Client 166': 66000, 'Client 167': 67000, 'Client 648': 48000, 'Client 808': 8000, 'Client 966': 66000, 'Client 967': 67000, 'Client 888': 88000, 'Client 887': 87000, 'Client 886': 86000, 'Client 968': 68000, 'Client 7': 7000, 'Client 6': 6000, 'Client 566': 66000, 'Client 567': 67000, 'Client 568': 68000, 'Client 727': 27000, 'Client 407': 7000, 'Client 406': 6000, 'Client 408': 8000, 'Client 487': 87000, 'Client 486': 86000, 'Client 488': 88000}\n"
]
}
],
"prompt_number": 274
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"bills.cols.client_id.createCSIndex()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "pyout",
"prompt_number": 269,
"text": [
"1000000"
]
}
],
"prompt_number": 269
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"sbills = f.createTable(f.root, 'sbills', Bill, 'Sorted bills', filters=tb.Filters(1))"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 271
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"r = sbills.row\n",
"for b in bills.itersorted('client_id'):\n",
" r['client_id'] = b['client_id']\n",
" r['item_id'] = b['item_id']\n",
" r['price'] = b['price']\n",
" r.append()\n",
"sbills.flush()"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 272
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"sbills.cols.client_id.createCSIndex()\n",
"bills = sbills # bills reference to new table"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 273
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"f.close()"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 263
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"|"
],
"language": "python",
"metadata": {},
"outputs": []
}
],
"metadata": {}
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment