Skip to content

Instantly share code, notes, and snippets.

@mdda
Created July 18, 2014 08:34
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mdda/a11f7d9ce1143da4146b to your computer and use it in GitHub Desktop.
Save mdda/a11f7d9ce1143da4146b to your computer and use it in GitHub Desktop.
CSV-to-sqlite importer with simple DSL to map ports 'AIS' data
#!/usr/bin/python
# -*- coding: utf-8 -*-
import sqlite3
import re
import sys, csv
import datetime, time
db_file = "ports.db"
#vslRecID_n,vsl_m,vslTrgtPosnLat_q,vslTrgtPosnLong_q,vslCourse_q,vslTy_c,vslLen_q,vslGT_q,vslBre_q,vslSpeed_q, timeStamp_dt
#52589,, 1.308571696, 103.7263107, 0, TU, 54, 1373, 0, 0, Mar 31 2014 12:01:46:270AM
#68346,, 1.289833188, 104.0870514, 84.72840881,TU, 19, 89, 0, 2.988201141,Mar 31 2014 12:14:02:826AM
structure_ais = dict(
table="AIS",
skip_first=True,
ignore_zero_vid=True,
fields=[
"vslRecID=vid INT", # - vessel ID. 0 means we're missing the vessel receiver ID
"vsl_m=0 CHAR(1)", # - vessel manager but unused
"vslTrgtPosnLat_q=lat REAL", # - vessel latitude
"vslTrgtPosnLong_q=lon REAL", # - vessel longitude
"vslCourse_q=course REAL", # - vessel direction
"vslTy_c=country CHAR(2)", # - vessel country of registration
"vslLen_q=l INT", # - vessel length
"vslGT_q=gt INT", # - gross tonnage (e.g. 6000 means volume in cubic feet)
"vslBre_q=beam INT", # - vessel beam
"vslSpeed_q=speed REAL", # - vessel speed in knots at the time of measurement
"timeStamp_dt=ts TS_AIS" # - timestamp when this record was made
],
structure_csv=[],
fields_sql=[],
create_sql="",
)
class CSVtoSQLite():
#db_create_real = re.sub(r"\#.*?\n", "\n", """
DB_HINTS_PARSE = r"^(.*?)\=(.*?)\s+(.*)"
def __init__(self, db_structure):
sql_create=[]
sql_fields=[]
sql_places=[]
structure=[]
for c in db_structure['fields']:
m = re.match(self.DB_HINTS_PARSE, c)
if m is not None:
(csv_field, sql_field, typ) = (m.group(1), m.group(2), m.group(3) )
structure.append( dict(csv = csv_field, sql = sql_field, typ = typ) )
if sql_field == "0": continue
if typ == "TS_AIS": typ = "INT" # Conversion will happen below
print "%s - %s - %s" % (csv_field, sql_field, typ)
sql_create.append("%s %s" % (sql_field, typ))
sql_fields.append(sql_field)
sql_places.append('?')
db_structure['structure_csv'] = structure
db_structure['create_sql'] = ("CREATE TABLE %s (" % (db_structure['table'])) + ", \n".join(sql_create) + (");")
db_structure['fields_sql'] = sql_fields
db_structure['places_sql'] = sql_places
self.db_structure = db_structure
def create_if_not_there(self):
r = connection.execute("SELECT * FROM sqlite_master WHERE type = 'table' AND name = ?", (self.db_structure['table'],))
if r.fetchone() is None:
connection.execute(self.db_structure['create_sql'])
def load_csv(self, filename):
#reader = csv.DictReader(open(filename))
reader = csv.reader(open(filename))
if self.db_structure['skip_first']:
throwaway = reader.next()
def return_place(field): return '?'
cols = ','.join(self.db_structure['fields_sql'])
places = ','.join(self.db_structure['places_sql'])
insert = "INSERT INTO %s (%s) VALUES (%s)" % (self.db_structure['table'], cols, places)
#print insert
for row in reader:
entry = []
skip_row = False
for (c,v) in zip(self.db_structure['structure_csv'], row):
if c['sql'] == "0": continue
if c['sql'] == "vid" and v=="0" and self.db_structure['ignore_zero_vid']: skip_row=True
# https://docs.python.org/2/library/datetime.html#strftime-strptime-behavior
if c['typ'] == "TS_AIS":
#print "DATE:" + v
ts = datetime.datetime.strptime(v, "%b %d %Y %I:%M:%S:%f%p") # 'Apr 30 2014 12:00:42:240AM'
#v = "datetime(%s, 'unixepoch')" % (time.mktime(ts.timetuple()),)
v = time.mktime(ts.timetuple())
entry.append(v)
if not skip_row:
#print insert
#print entry
connection.execute(insert, entry)
if reader.line_num % 10000 == 0 :
print "%30s : %6dk" % (filename, reader.line_num/1000)
connection.commit()
# http://www.tutorialspoint.com/sqlite/sqlite_python.htm
connection = sqlite3.connect(db_file)
dataset = ''
if len(sys.argv)<2:
print """Usage:
python csv_to_sqlite.py {ais}
Layout:
./db/csv_to_sqlite.py # This script
./db/ports.db # The sqlite database populated
./data/AIS/YYYY-MM/*.csv # AIS data
"""
else:
dataset = sys.argv[1]
if dataset.lower() == 'ais':
ais=CSVtoSQLite(structure_ais)
ais.create_if_not_there()
#ais.load_csv("../data/AIS/2014-05/PTMS_curTrgtP3History_01-May-2014.csv")
#ais.load_csv("../data/AIS/2014-05/PTMS_curTrgtP3History_02-May-2014.csv")
ais.load_csv("../data/AIS/2014-05/PTMS_curTrgtP3History_03-May-2014.csv")
connection.close()
#system("""echo -e ".mode csv\n.import ../data/AIS/2014-05/PTMS_curTrgtP3History_01-May-2014.csv AIS_RAW" | sqlite3 ports.db""")
#system("""echo -e ".import ../data/AIS/2014-05/PTMS_curTrgtP3History_01-May-2014.csv AIS_RAW" | sqlite3 -csv ports.db""")
"""
## https://gist.github.com/rgrp/5199059
# ID INT PRIMARY KEY NOT NULL,
# NAME TEXT NOT NULL,
# AGE INT NOT NULL,
# ADDRESS CHAR(50),
# SALARY REAL
## http://www.tutorialspoint.com/sqlite/sqlite_indexes.htm
#CREATE INDEX index_name
#on table_name (column1, column2);
"""
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment