Last active
February 23, 2016 23:05
-
-
Save plablo09/2faa0fd94a9526ae19ff to your computer and use it in GitHub Desktop.
Parse and import a csv with points to postgres/poststgis
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
import psycopg2 as psy | |
from datetime import datetime | |
import time | |
import csv | |
import json | |
def date_formatter(date_string): | |
"""Returns datetime object""" | |
dt = datetime.fromtimestamp(time.mktime( | |
time.strptime(str(date_string), | |
'%a %b %d %H:%M:%S %Z %Y'))) | |
return dt | |
def get_clase(c_str): | |
"""Return the int representation of clase. | |
Not really important, just for campatibility with the mapbox data. | |
""" | |
if c_str == 'POS': | |
return 3 | |
elif c_str == 'NEG': | |
return 1 | |
else: | |
return 2 | |
#connect to database | |
conn_str = "host=localhost dbname=map_month user=postgres password=postgres" | |
conn = psy.connect(conn_str) | |
#read and parse csv file | |
csv_data = csv.reader(open('10Mcorected.csv', 'rb'), delimiter='\t') | |
cur = conn.cursor() | |
for i, row in enumerate(csv_data): | |
t_id = row[0] | |
user = row[1] | |
x = row[5] | |
y = row[6] | |
coordinates = (y,x) | |
print coordinates | |
try: | |
date_obj = date_formatter(row[7]) | |
except: | |
date_0bj = date_formatter('Sat Jan 01 00:00:00 CST 2000') | |
contenido = row[8] | |
clase = get_clase(row[9]) | |
try: | |
cur.execute("SELECT ST_SetSRID(ST_MakePoint(%s, %s),4326);",coordinates) | |
point_wkb = cur.fetchall() | |
except: | |
conn.rollback() | |
point_wkb = None | |
if point_wkb is not None: | |
insert_row = (t_id,user,contenido,date_obj,point_wkb[0],clase) | |
print insert_row | |
query = """INSERT INTO points(t_id,uname,contenido,fecha_hora,\ | |
geom,clase) VALUES (%s,%s,%s,%s,%s,%s);""" | |
cur.execute(query,insert_row) | |
conn.commit() | |
#print insert_row | |
print 'Done' | |
#return 'done' | |
# id serial NOT NULL, | |
# t_id int, | |
# uname text, | |
# contenido text, | |
# fecha_hora timestamp with time zone, | |
# geom geometry(Point,4326), | |
# clase int | |
# try: | |
# conn = psy.connect(conn_str) | |
# except psy.Error, e: | |
# print 'noooooo' | |
# csv_data = csv.reader(open('MonthMap.csv', 'rb'), delimiter='\t') | |
# features = [] | |
# for i, row in enumerate(csv_data): | |
# | |
# try: | |
# f = { | |
# "type" : "Feature", | |
# "id" : row[0], | |
# "properties" : { "date" : row[3], "value" : row[4]}, | |
# "geometry" : { | |
# "type" : "Point", | |
# "coordinates" : [float(row[2]),float(row[1])] | |
# } | |
# } | |
# features.append(f) | |
# except: | |
# pass | |
# | |
# feature_collection = { | |
# "type" : "FeatureCollection", | |
# "features": features | |
# } | |
# with open('output.geojson', 'w') as outfile: | |
# json.dump(feature_collection, outfile,indent=4, sort_keys=True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
CREATE TABLE points | |
( | |
id serial NOT NULL, | |
t_id text, | |
uname text, | |
contenido text, | |
fecha_hora timestamp with time zone, | |
geom geometry(Point,4326), | |
clase integer, | |
CONSTRAINT tweets_pkey PRIMARY KEY (id) | |
) | |
WITH ( | |
OIDS=FALSE | |
); | |
ALTER TABLE points | |
OWNER TO postgres; | |
-- Index: points_date_idx | |
-- DROP INDEX points_date_idx; | |
CREATE INDEX points_date_idx | |
ON points | |
USING btree | |
(fecha_hora); | |
-- Index: points_gix | |
-- DROP INDEX points_gix; | |
CREATE INDEX points_gix | |
ON points | |
USING gist | |
(geom); | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment