Skip to content

Instantly share code, notes, and snippets.

@statcompute
Created November 24, 2018 06:43
Show Gist options
  • Save statcompute/aa2b43a03ac2ba683805cd40b7900dcd to your computer and use it in GitHub Desktop.
Save statcompute/aa2b43a03ac2ba683805cd40b7900dcd to your computer and use it in GitHub Desktop.
Manipulating dictionary list with SQLite back-end
from astropy.io.ascii import read
selected = ["origin", "dep_delay", "distance"]
csv = read("Downloads/nycflights.csv", format = 'csv', data_end = 11)[selected]
lst = map(lambda x: dict(zip(x.colnames, x)), csv)
from dataset import connect
### CREATE IN-MEMORY SQLITE DB ###
db = connect('sqlite:///:memory:', row_type = dict)
tbl = db.create_table("tbl", primary_id = False)
tbl.insert_many(lst)
list(db.query("select * from tbl limit 3"))
# [{u'dep_delay': 2, u'distance': 1400, u'origin': u'EWR'},
# {u'dep_delay': 4, u'distance': 1416, u'origin': u'LGA'},
# {u'dep_delay': 2, u'distance': 1089, u'origin': u'JFK'}]
sum1 = db.create_table("sum1", primary_id = False)
from numpy import nanmedian
sum1.insert_many(
map(lambda x: dict(origin = x,
med_dist = nanmedian([i["distance"] for i in
db.query("select distance from tbl where origin = :origin", {"origin": x})])),
[i["origin"] for i in db.query("select distinct origin from tbl")]))
sum2 = db.create_table("sum2", primary_id = False)
sum2.insert_many(list(db.query("select origin, ROUND(AVG(dep_delay), 2) as avg_delay from tbl group by origin")))
list(db.query("select a.*, b.avg_delay from sum1 as a, sum2 as b where a.origin = b.origin"))
#[{u'avg_delay': -2.33, u'med_dist': 1065.0, u'origin': u'EWR'},
# {u'avg_delay': -1.75, u'med_dist': 747.5, u'origin': u'LGA'},
# {u'avg_delay': -0.67, u'med_dist': 1089.0, u'origin': u'JFK'}]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment