Skip to content

Instantly share code, notes, and snippets.

@plablo09
Last active February 23, 2016 23:05
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save plablo09/2faa0fd94a9526ae19ff to your computer and use it in GitHub Desktop.
Save plablo09/2faa0fd94a9526ae19ff to your computer and use it in GitHub Desktop.
Parse and import a csv with points to postgres/poststgis
# -*- coding: utf-8 -*-
import psycopg2 as psy
from datetime import datetime
import time
import csv
import json
def date_formatter(date_string):
"""Returns datetime object"""
dt = datetime.fromtimestamp(time.mktime(
time.strptime(str(date_string),
'%a %b %d %H:%M:%S %Z %Y')))
return dt
def get_clase(c_str):
"""Return the int representation of clase.
Not really important, just for campatibility with the mapbox data.
"""
if c_str == 'POS':
return 3
elif c_str == 'NEG':
return 1
else:
return 2
#connect to database
conn_str = "host=localhost dbname=map_month user=postgres password=postgres"
conn = psy.connect(conn_str)
#read and parse csv file
csv_data = csv.reader(open('10Mcorected.csv', 'rb'), delimiter='\t')
cur = conn.cursor()
for i, row in enumerate(csv_data):
t_id = row[0]
user = row[1]
x = row[5]
y = row[6]
coordinates = (y,x)
print coordinates
try:
date_obj = date_formatter(row[7])
except:
date_0bj = date_formatter('Sat Jan 01 00:00:00 CST 2000')
contenido = row[8]
clase = get_clase(row[9])
try:
cur.execute("SELECT ST_SetSRID(ST_MakePoint(%s, %s),4326);",coordinates)
point_wkb = cur.fetchall()
except:
conn.rollback()
point_wkb = None
if point_wkb is not None:
insert_row = (t_id,user,contenido,date_obj,point_wkb[0],clase)
print insert_row
query = """INSERT INTO points(t_id,uname,contenido,fecha_hora,\
geom,clase) VALUES (%s,%s,%s,%s,%s,%s);"""
cur.execute(query,insert_row)
conn.commit()
#print insert_row
print 'Done'
#return 'done'
# id serial NOT NULL,
# t_id int,
# uname text,
# contenido text,
# fecha_hora timestamp with time zone,
# geom geometry(Point,4326),
# clase int
# try:
# conn = psy.connect(conn_str)
# except psy.Error, e:
# print 'noooooo'
# csv_data = csv.reader(open('MonthMap.csv', 'rb'), delimiter='\t')
# features = []
# for i, row in enumerate(csv_data):
#
# try:
# f = {
# "type" : "Feature",
# "id" : row[0],
# "properties" : { "date" : row[3], "value" : row[4]},
# "geometry" : {
# "type" : "Point",
# "coordinates" : [float(row[2]),float(row[1])]
# }
# }
# features.append(f)
# except:
# pass
#
# feature_collection = {
# "type" : "FeatureCollection",
# "features": features
# }
# with open('output.geojson', 'w') as outfile:
# json.dump(feature_collection, outfile,indent=4, sort_keys=True)
CREATE TABLE points
(
id serial NOT NULL,
t_id text,
uname text,
contenido text,
fecha_hora timestamp with time zone,
geom geometry(Point,4326),
clase integer,
CONSTRAINT tweets_pkey PRIMARY KEY (id)
)
WITH (
OIDS=FALSE
);
ALTER TABLE points
OWNER TO postgres;
-- Index: points_date_idx
-- DROP INDEX points_date_idx;
CREATE INDEX points_date_idx
ON points
USING btree
(fecha_hora);
-- Index: points_gix
-- DROP INDEX points_gix;
CREATE INDEX points_gix
ON points
USING gist
(geom);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment