Skip to content

Instantly share code, notes, and snippets.

@tmcw
Created April 22, 2013 19:03
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tmcw/5437594 to your computer and use it in GitHub Desktop.
Save tmcw/5437594 to your computer and use it in GitHub Desktop.
#!/usr/bin/python2
import psycopg2, argparse, sys, tarfile, getpass
from xml.parsers import expat
def create_tables(proj=4326):
cur = db.cursor()
cur.execute('drop table if exists gpx_data;')
cur.execute('drop table if exists gpx_info;')
cur.execute('drop type if exists gpx_visibility;')
cur.execute("create type gpx_visibility as enum( 'private', 'trackable', 'public', 'identifiable' )")
cur.execute("""create table gpx_info (
gpx_id integer not null primary key,
visibility gpx_visibility not null,
gpx_date date,
uid integer,
user_name varchar(100),
description varchar(500)
);""")
cur.execute("""create table gpx_data (
gpx_id integer not null references gpx_info (gpx_id),
segment_id integer not null,
track_date date,
track geometry(linestring, {0}) not null,
primary key (gpx_id, segment_id)
);""".format(proj))
cur.close()
def create_index_and_vacuum(db):
cur = db.cursor()
cur.execute('create index on gpx_data (gpx_id);')
cur.execute('create index on gpx_data using gist (track);')
cur.close()
db.commit()
old_isolation = db.isolation_level
db.set_isolation_level(0)
cur = db.cursor()
cur.execute('vacuum analyze gpx_data;')
cur.close()
db.set_isolation_level(old_isolation)
def get_all_ids(db):
cur = db.cursor()
cur.execute('select gpx_id from gpx_info;')
gpxids = [rec[0] for rec in cur]
cur.close()
return gpxids
def get_fake_id(gpxids):
i = -1
while i in gpxids:
i -= 1
return i
def process_metadata(f):
metadata = {}
p = expat.ParserCreate()
def start_element(name, attrs):
if name == 'gpxFile':
m = {}
if attrs.has_key('visibility'): m['visibility'] = attrs['visibility']
if attrs.has_key('user'): m['user'] = attrs['user']
if attrs.has_key('id'): m['id'] = int(attrs['id'])
if attrs.has_key('uid'): m['uid'] = int(attrs['uid'])
if attrs.has_key('points'): m['points'] = int(attrs['points'])
if attrs.has_key('timestamp'): m['date'] = attrs['timestamp']
metadata[attrs['filename']] = m
p.StartElementHandler = start_element
p.ParseFile(f)
return metadata
def store_metadata(db, info):
cur = db.cursor()
cur.execute('insert into gpx_info (gpx_id, visibility, gpx_date, uid, user_name, description) values (%s, %s, %s, %s, %s, %s)',
(info['id'], info['visibility'], info['date'], info.get('uid', None), info.get('user', None), info.get('description', None)))
cur.close()
geomfromtext = 'ST_GeomFromText(%s)'
geomfromtext = 'ST_Transform({0}, 900913)'.format(geomfromtext)
def process_gpx(db, gpx_id, f, options, cur):
p = expat.ParserCreate()
dmin = 1e-7
dmax = 1e-3
def start_element(name, attrs):
if name == 'trkpt':
last = (float(attrs['lon']), float(attrs['lat']))
dist = abs(last[0] - start_element.last[0]) + abs(last[1] - start_element.last[1]) if start_element.last else dmin * 2
if dist < dmax and dist > dmin and last[1] < 90 and last[1] > -90 and last[0] < 180 and last[0] > -180:
start_element.points.append(last)
start_element.last = last
elif name == 'trkseg':
start_element.points = []
start_element.segment += 1
elif name == 'time':
start_element.in_time = True
start_element.points = []
start_element.last = False
start_element.segment = 0
start_element.in_time = False
def char_data(d):
if start_element.in_time:
char_data.lastDate = d
start_element.in_time = False
char_data.lastDate = ''
def end_element(name):
if name == 'trkseg' and len(start_element.points) >= 5:
geom = 'SRID=4326;LINESTRING(' + ','.join(['{0} {1}'.format(x[0], x[1]) for x in start_element.points]) + ')'
cur.execute('insert into gpx_data (gpx_id, segment_id, track) values (%s, %s, {0})'.format(geomfromtext),
(gpx_id, start_element.segment, geom))
p.StartElementHandler = start_element
p.EndElementHandler = end_element
p.CharacterDataHandler = char_data
p.ParseFile(f)
if __name__ == '__main__':
default_user = getpass.getuser()
parser = argparse.ArgumentParser(description='Loads OpenStreetMap GPX dump into a PostgreSQL database with PostGIS extension')
apg_input = parser.add_argument_group('Input')
apg_input.add_argument('-f', '--file', type=argparse.FileType('r'), help='a file to process', required=True)
apg_input.add_argument('-s', '--single', action='store_true', help='process a single GPX file')
apg_filter = parser.add_argument_group('Filter')
apg_filter.add_argument('--pmin', type=int, help='minimum number of points in a track segment (default: {0})'.format(10), default=10)
apg_db = parser.add_argument_group('Database')
apg_db.add_argument('-u', '--user', help='user name for db (default: {0})'.format(default_user), default=default_user)
apg_db.add_argument('-w', '--password', action='store_true', help='ask for password')
apg_db.add_argument('--host', help='database host', default='localhost')
apg_db.add_argument('--port', type=int, help='database port', default='5432')
apg_db.add_argument('-d', '--dbname', metavar='DB', help='database (default: gis)', default='gis')
apg_db.add_argument('-c', '--create-tables', dest='tables', action='store_true', help='recreate tables')
apg_db.add_argument('-9', '--900913', dest='reproject', action='store_true', help='reproject points to 900913 projection')
options = parser.parse_args()
if options.password:
passwd = getpass.getpass('Please enter database password: ')
try:
db = psycopg2.connect(database=options.dbname, user=options.user, password=passwd, host=options.host, port=options.port)
db.set_client_encoding('UTF8')
except Exception, e:
print "Error connecting to database: ", e
sys.exit(1)
if options.tables:
create_tables(900913 if options.reproject else 4326)
gpxids = get_all_ids(db)
if options.single:
gpxinfo = { 'id': get_fake_id(gpxids), 'visibility': 'trackable' }
store_metadata(db, gpxinfo)
process_gpx(db, gpxinfo['id'], options.file, options)
sys.exit(0)
cur = db.cursor()
tar = tarfile.open(fileobj=options.file, mode='r|')
i = 0
cur.execute("begin transaction;")
for f in tar:
if 'metadata.xml' in f.name:
sys.stdout.write('Processing metadata')
sys.stdout.flush()
metadata = process_metadata(tar.extractfile(f))
db.commit()
count = len(metadata)
print count
elif count and f.isfile and '.gpx' in f.name:
i += 1
if i % 100 == 0:
db.commit()
for k, v in metadata.items():
if k in f.name:
gpxinfo = v
if not gpxinfo:
gpxinfo = { 'id': get_fake_id(gpxids), 'visibility': 'trackable' }
if not gpxinfo['id'] in gpxids:
gpxids.append(gpxinfo['id'])
store_metadata(db, gpxinfo)
process_gpx(db, gpxinfo['id'], tar.extractfile(f), options, cur)
tar.members = []
cur.close()
tar.close()
create_index_and_vacuum(db)
db.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment