Created
April 22, 2013 19:03
-
-
Save tmcw/5437594 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python2 | |
import psycopg2, argparse, sys, tarfile, getpass | |
from xml.parsers import expat | |
def create_tables(proj=4326): | |
cur = db.cursor() | |
cur.execute('drop table if exists gpx_data;') | |
cur.execute('drop table if exists gpx_info;') | |
cur.execute('drop type if exists gpx_visibility;') | |
cur.execute("create type gpx_visibility as enum( 'private', 'trackable', 'public', 'identifiable' )") | |
cur.execute("""create table gpx_info ( | |
gpx_id integer not null primary key, | |
visibility gpx_visibility not null, | |
gpx_date date, | |
uid integer, | |
user_name varchar(100), | |
description varchar(500) | |
);""") | |
cur.execute("""create table gpx_data ( | |
gpx_id integer not null references gpx_info (gpx_id), | |
segment_id integer not null, | |
track_date date, | |
track geometry(linestring, {0}) not null, | |
primary key (gpx_id, segment_id) | |
);""".format(proj)) | |
cur.close() | |
def create_index_and_vacuum(db): | |
cur = db.cursor() | |
cur.execute('create index on gpx_data (gpx_id);') | |
cur.execute('create index on gpx_data using gist (track);') | |
cur.close() | |
db.commit() | |
old_isolation = db.isolation_level | |
db.set_isolation_level(0) | |
cur = db.cursor() | |
cur.execute('vacuum analyze gpx_data;') | |
cur.close() | |
db.set_isolation_level(old_isolation) | |
def get_all_ids(db): | |
cur = db.cursor() | |
cur.execute('select gpx_id from gpx_info;') | |
gpxids = [rec[0] for rec in cur] | |
cur.close() | |
return gpxids | |
def get_fake_id(gpxids): | |
i = -1 | |
while i in gpxids: | |
i -= 1 | |
return i | |
def process_metadata(f): | |
metadata = {} | |
p = expat.ParserCreate() | |
def start_element(name, attrs): | |
if name == 'gpxFile': | |
m = {} | |
if attrs.has_key('visibility'): m['visibility'] = attrs['visibility'] | |
if attrs.has_key('user'): m['user'] = attrs['user'] | |
if attrs.has_key('id'): m['id'] = int(attrs['id']) | |
if attrs.has_key('uid'): m['uid'] = int(attrs['uid']) | |
if attrs.has_key('points'): m['points'] = int(attrs['points']) | |
if attrs.has_key('timestamp'): m['date'] = attrs['timestamp'] | |
metadata[attrs['filename']] = m | |
p.StartElementHandler = start_element | |
p.ParseFile(f) | |
return metadata | |
def store_metadata(db, info): | |
cur = db.cursor() | |
cur.execute('insert into gpx_info (gpx_id, visibility, gpx_date, uid, user_name, description) values (%s, %s, %s, %s, %s, %s)', | |
(info['id'], info['visibility'], info['date'], info.get('uid', None), info.get('user', None), info.get('description', None))) | |
cur.close() | |
geomfromtext = 'ST_GeomFromText(%s)' | |
geomfromtext = 'ST_Transform({0}, 900913)'.format(geomfromtext) | |
def process_gpx(db, gpx_id, f, options, cur): | |
p = expat.ParserCreate() | |
dmin = 1e-7 | |
dmax = 1e-3 | |
def start_element(name, attrs): | |
if name == 'trkpt': | |
last = (float(attrs['lon']), float(attrs['lat'])) | |
dist = abs(last[0] - start_element.last[0]) + abs(last[1] - start_element.last[1]) if start_element.last else dmin * 2 | |
if dist < dmax and dist > dmin and last[1] < 90 and last[1] > -90 and last[0] < 180 and last[0] > -180: | |
start_element.points.append(last) | |
start_element.last = last | |
elif name == 'trkseg': | |
start_element.points = [] | |
start_element.segment += 1 | |
elif name == 'time': | |
start_element.in_time = True | |
start_element.points = [] | |
start_element.last = False | |
start_element.segment = 0 | |
start_element.in_time = False | |
def char_data(d): | |
if start_element.in_time: | |
char_data.lastDate = d | |
start_element.in_time = False | |
char_data.lastDate = '' | |
def end_element(name): | |
if name == 'trkseg' and len(start_element.points) >= 5: | |
geom = 'SRID=4326;LINESTRING(' + ','.join(['{0} {1}'.format(x[0], x[1]) for x in start_element.points]) + ')' | |
cur.execute('insert into gpx_data (gpx_id, segment_id, track) values (%s, %s, {0})'.format(geomfromtext), | |
(gpx_id, start_element.segment, geom)) | |
p.StartElementHandler = start_element | |
p.EndElementHandler = end_element | |
p.CharacterDataHandler = char_data | |
p.ParseFile(f) | |
if __name__ == '__main__': | |
default_user = getpass.getuser() | |
parser = argparse.ArgumentParser(description='Loads OpenStreetMap GPX dump into a PostgreSQL database with PostGIS extension') | |
apg_input = parser.add_argument_group('Input') | |
apg_input.add_argument('-f', '--file', type=argparse.FileType('r'), help='a file to process', required=True) | |
apg_input.add_argument('-s', '--single', action='store_true', help='process a single GPX file') | |
apg_filter = parser.add_argument_group('Filter') | |
apg_filter.add_argument('--pmin', type=int, help='minimum number of points in a track segment (default: {0})'.format(10), default=10) | |
apg_db = parser.add_argument_group('Database') | |
apg_db.add_argument('-u', '--user', help='user name for db (default: {0})'.format(default_user), default=default_user) | |
apg_db.add_argument('-w', '--password', action='store_true', help='ask for password') | |
apg_db.add_argument('--host', help='database host', default='localhost') | |
apg_db.add_argument('--port', type=int, help='database port', default='5432') | |
apg_db.add_argument('-d', '--dbname', metavar='DB', help='database (default: gis)', default='gis') | |
apg_db.add_argument('-c', '--create-tables', dest='tables', action='store_true', help='recreate tables') | |
apg_db.add_argument('-9', '--900913', dest='reproject', action='store_true', help='reproject points to 900913 projection') | |
options = parser.parse_args() | |
if options.password: | |
passwd = getpass.getpass('Please enter database password: ') | |
try: | |
db = psycopg2.connect(database=options.dbname, user=options.user, password=passwd, host=options.host, port=options.port) | |
db.set_client_encoding('UTF8') | |
except Exception, e: | |
print "Error connecting to database: ", e | |
sys.exit(1) | |
if options.tables: | |
create_tables(900913 if options.reproject else 4326) | |
gpxids = get_all_ids(db) | |
if options.single: | |
gpxinfo = { 'id': get_fake_id(gpxids), 'visibility': 'trackable' } | |
store_metadata(db, gpxinfo) | |
process_gpx(db, gpxinfo['id'], options.file, options) | |
sys.exit(0) | |
cur = db.cursor() | |
tar = tarfile.open(fileobj=options.file, mode='r|') | |
i = 0 | |
cur.execute("begin transaction;") | |
for f in tar: | |
if 'metadata.xml' in f.name: | |
sys.stdout.write('Processing metadata') | |
sys.stdout.flush() | |
metadata = process_metadata(tar.extractfile(f)) | |
db.commit() | |
count = len(metadata) | |
print count | |
elif count and f.isfile and '.gpx' in f.name: | |
i += 1 | |
if i % 100 == 0: | |
db.commit() | |
for k, v in metadata.items(): | |
if k in f.name: | |
gpxinfo = v | |
if not gpxinfo: | |
gpxinfo = { 'id': get_fake_id(gpxids), 'visibility': 'trackable' } | |
if not gpxinfo['id'] in gpxids: | |
gpxids.append(gpxinfo['id']) | |
store_metadata(db, gpxinfo) | |
process_gpx(db, gpxinfo['id'], tar.extractfile(f), options, cur) | |
tar.members = [] | |
cur.close() | |
tar.close() | |
create_index_and_vacuum(db) | |
db.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment