Created
October 16, 2011 16:09
-
-
Save zed/1291081 to your computer and use it in GitHub Desktop.
Find coords that are in a +/- step in sqlite
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import random | |
import sys | |
import sqlite3 | |
from pprint import pprint | |
try: | |
import numpy as np | |
from itertools import izip | |
def get_random_dataset(N): | |
return izip(np.random.uniform(-90, 90, N), | |
np.random.uniform(-180, 180, N)) | |
except ImportError: | |
get_random_dataset = None | |
STEP = 0.041667 | |
def get_small_dataset(): | |
coords = """ | |
55.02050000 13.02040000 | |
56.02050000 14.02040000 | |
56.02050000 13.02040000 | |
56.000000 13.000000 | |
56.000000 13.041667 | |
56.041667 13.000000 | |
56.041667 13.041667""".splitlines() | |
return (line.split() for line in coords if line.strip()) | |
def create_populated_table(db, coords): | |
"""Create table coords and populate it with sample data.""" | |
db.execute("drop table if exists coords") | |
db.execute("create table coords (lat real, lon real)") | |
with db: # insert all or nothing | |
db.executemany("insert into coords(lat, lon) values (?, ?)", coords) | |
def create_rtree_index(db, step): | |
"""For each point in the coords table put +/- step range into the index. | |
http://www.sqlite.org/rtree.html | |
""" | |
db.execute("drop table if exists coords_index") | |
db.execute("""CREATE VIRTUAL TABLE coords_index USING rtree( | |
id, -- Integer primary key | |
minlat, maxlat, -- Minimum and maximum latitude | |
minlon, maxlon -- Minimum and maximum longitude | |
)""") | |
# populate it with coords data | |
with db: | |
db.executemany("""insert into | |
coords_index(id, minlat, maxlat, minlon, maxlon) | |
values (?, ?, ?, ?, ?)""", ( | |
#XXX disregard values near +/- 90 latitude, +/- 180 longitude | |
(id, lat-step, lat+step, lon-step, lon+step) | |
for id, lat, lon in db.execute("""select rowid, lat, lon | |
from coords"""))) | |
def find_adjacent_coords(db, lat, lon, step): | |
"""Find coords that are in a +/- step range of lat, lon.""" | |
#XXX disregard values near +/- 90 latitude, +/- 180 longitude | |
coords_range = lat-step, lat+step, lon-step, lon+step | |
return db.execute("""select lat, lon from coords | |
where lat >= ? and lat <= ? | |
and lon >= ? and lon <= ?""", | |
coords_range) | |
def find_adjacent_coords_rtree(db, lat, lon): | |
# find ranges that contain lat,lon point using SQLite R-Tree index | |
return db.execute("""select lat, lon from coords, coords_index | |
where coords.rowid=coords_index.id | |
and maxlat >= :lat and minlat <= :lat | |
and maxlon >= :lon and minlon <= :lon""", | |
dict(lat=lat, lon=lon)) | |
def main(argv=None): | |
if argv is None: | |
argv = sys.argv | |
step = STEP | |
point = None | |
if len(argv) > 1: | |
# use existing database with populated index that uses given step | |
db = sqlite3.connect(argv[1]) | |
if len(argv) > 2: | |
step = float(argv[2]) | |
if len(argv) > 4: | |
point = argv[3:] | |
else: | |
db = create_test_db(10000) | |
if point is None: | |
point = random.choice( | |
db.execute("select lat, lon from coords limit 100").fetchall()) | |
pprint(point) | |
pprint(find_adjacent_coords(db, *map(float,point), step=step).fetchall()) | |
pprint(find_adjacent_coords_rtree(db, *point).fetchall()) | |
def create_test_db(N): | |
# create random dataset in memory | |
db = sqlite3.connect(':memory:') | |
if get_random_dataset: | |
coords = get_random_dataset(N) | |
else: | |
coords = get_small_dataset() | |
create_populated_table(db, coords) | |
#HACK: increase step to include boundary points | |
create_rtree_index(db, STEP+1e-6) | |
return db | |
def test_find_adjacent_coords(): | |
N = 10000 | |
db = create_test_db(N) | |
for i, point in enumerate(db.execute("select lat, lon from coords"), 1): | |
a = find_adjacent_coords(db, *point, step=STEP).fetchall() | |
b = find_adjacent_coords_rtree(db, *point).fetchall() | |
assert sorted(a) == sorted(b), (point, a,b) | |
assert i == N, i | |
if __name__=="__main__": | |
test_find_adjacent_coords() | |
main() | |
# for 1e6 points: | |
# python -mcProfile daikini.py | grep find_adjacent | |
# 1 0.000 0.000 0.161 0.161 daikini.py:40(find_adjacent_coords) | |
# 1 0.000 0.000 0.000 0.000 daikini.py:48(find_adjacent_coords_rtree) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment