Skip to content

Instantly share code, notes, and snippets.

@zed
Created October 16, 2011 16:09
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save zed/1291081 to your computer and use it in GitHub Desktop.
Save zed/1291081 to your computer and use it in GitHub Desktop.
Find coords that are in a +/- step in sqlite
#!/usr/bin/env python
import random
import sys
import sqlite3
from pprint import pprint
try:
import numpy as np
from itertools import izip
def get_random_dataset(N):
return izip(np.random.uniform(-90, 90, N),
np.random.uniform(-180, 180, N))
except ImportError:
get_random_dataset = None
STEP = 0.041667
def get_small_dataset():
coords = """
55.02050000 13.02040000
56.02050000 14.02040000
56.02050000 13.02040000
56.000000 13.000000
56.000000 13.041667
56.041667 13.000000
56.041667 13.041667""".splitlines()
return (line.split() for line in coords if line.strip())
def create_populated_table(db, coords):
"""Create table coords and populate it with sample data."""
db.execute("drop table if exists coords")
db.execute("create table coords (lat real, lon real)")
with db: # insert all or nothing
db.executemany("insert into coords(lat, lon) values (?, ?)", coords)
def create_rtree_index(db, step):
"""For each point in the coords table put +/- step range into the index.
http://www.sqlite.org/rtree.html
"""
db.execute("drop table if exists coords_index")
db.execute("""CREATE VIRTUAL TABLE coords_index USING rtree(
id, -- Integer primary key
minlat, maxlat, -- Minimum and maximum latitude
minlon, maxlon -- Minimum and maximum longitude
)""")
# populate it with coords data
with db:
db.executemany("""insert into
coords_index(id, minlat, maxlat, minlon, maxlon)
values (?, ?, ?, ?, ?)""", (
#XXX disregard values near +/- 90 latitude, +/- 180 longitude
(id, lat-step, lat+step, lon-step, lon+step)
for id, lat, lon in db.execute("""select rowid, lat, lon
from coords""")))
def find_adjacent_coords(db, lat, lon, step):
"""Find coords that are in a +/- step range of lat, lon."""
#XXX disregard values near +/- 90 latitude, +/- 180 longitude
coords_range = lat-step, lat+step, lon-step, lon+step
return db.execute("""select lat, lon from coords
where lat >= ? and lat <= ?
and lon >= ? and lon <= ?""",
coords_range)
def find_adjacent_coords_rtree(db, lat, lon):
# find ranges that contain lat,lon point using SQLite R-Tree index
return db.execute("""select lat, lon from coords, coords_index
where coords.rowid=coords_index.id
and maxlat >= :lat and minlat <= :lat
and maxlon >= :lon and minlon <= :lon""",
dict(lat=lat, lon=lon))
def main(argv=None):
if argv is None:
argv = sys.argv
step = STEP
point = None
if len(argv) > 1:
# use existing database with populated index that uses given step
db = sqlite3.connect(argv[1])
if len(argv) > 2:
step = float(argv[2])
if len(argv) > 4:
point = argv[3:]
else:
db = create_test_db(10000)
if point is None:
point = random.choice(
db.execute("select lat, lon from coords limit 100").fetchall())
pprint(point)
pprint(find_adjacent_coords(db, *map(float,point), step=step).fetchall())
pprint(find_adjacent_coords_rtree(db, *point).fetchall())
def create_test_db(N):
# create random dataset in memory
db = sqlite3.connect(':memory:')
if get_random_dataset:
coords = get_random_dataset(N)
else:
coords = get_small_dataset()
create_populated_table(db, coords)
#HACK: increase step to include boundary points
create_rtree_index(db, STEP+1e-6)
return db
def test_find_adjacent_coords():
N = 10000
db = create_test_db(N)
for i, point in enumerate(db.execute("select lat, lon from coords"), 1):
a = find_adjacent_coords(db, *point, step=STEP).fetchall()
b = find_adjacent_coords_rtree(db, *point).fetchall()
assert sorted(a) == sorted(b), (point, a,b)
assert i == N, i
if __name__=="__main__":
test_find_adjacent_coords()
main()
# for 1e6 points:
# python -mcProfile daikini.py | grep find_adjacent
# 1 0.000 0.000 0.161 0.161 daikini.py:40(find_adjacent_coords)
# 1 0.000 0.000 0.000 0.000 daikini.py:48(find_adjacent_coords_rtree)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment