Skip to content

Instantly share code, notes, and snippets.

@fscottfoti
Created March 23, 2016 21:32
Show Gist options
  • Save fscottfoti/a066a5ee6984a6c21573 to your computer and use it in GitHub Desktop.
Save fscottfoti/a066a5ee6984a6c21573 to your computer and use it in GitHub Desktop.
script to turn parcels into binary h5 file
from pymongo import MongoClient
from bson.objectid import ObjectId
import json
import time
from string import join
import pandas as pd
import cPickle
MONGO = True
JURIS = None
FEASIBILITY = True
cid = "ZC7yyAyA8jkDFnRtf" # parcels
csvname = "output/parcels.csv"
if FEASIBILITY:
cid = "hMm5FqbDCPa4ube6Y" # feasibility
csvname = "output/feasibility.csv"
if MONGO:
client = MongoClient()
#client.drop_database("baus")
db = client.togethermap
else:
outf = open("parcels.json", "w")
df = pd.read_csv(csvname, index_col="geom_id")
cnt = 0
features = []
print time.ctime()
def export_features(features):
global MONGO, db, outf
if MONGO:
db.places.insert_many(features)
else:
outf.write(join([json.dumps(f) for f in features], "\n"))
parcels = cPickle.load(open("output/parcels.pickle"))
for geom_id, geojson in parcels:
cnt += 1
if cnt % 10000 == 0:
print "Done reading rec %d" % cnt
if len(features) == 10000:
print "Exporting 10k recs"
export_features(features)
print "Done exporting 10k recs"
features = []
try:
rec = df.loc[geom_id]
except:
# don't need to keep it, it's not in parcels.csv
continue
if JURIS and rec["juris"] != JURIS:
continue
f = json.loads(geojson)
f["properties"] = rec.to_dict()
f["properties"]["geom_id"] = geom_id
del f["id"]
f["creatorUID"] = "ceTir2NKMN87Gq7wj"
f["creator"] = "Fletcher Foti"
f["createDate"] = "2015-08-29T05:10:00.446Z"
f["updateDate"] = "2015-08-29T05:10:00.446Z"
f["collectionId"] = cid
f['_id'] = str(ObjectId())
f["post_count"] = 0
features.append(f)
if len(features):
export_features(features)
print time.ctime()
import fiona
from shapely.geometry import shape
import pandas as pd
import json
import time
from string import join
import cPickle
print time.ctime()
def add_bbox(p):
bounds = shape(p['geometry']).bounds
minx, miny, maxx, maxy = bounds
poly = {
"type": "Polygon",
"coordinates": [
[ [minx, miny], [minx, maxy], [maxx, maxy],
[maxx, miny], [minx, miny] ]
]
}
p['bbox'] = poly
return p
store = pd.HDFStore("parcels.h5", "w")
features = []
with fiona.drivers():
with fiona.open('/home/ubuntu/data/parcels4326.shp') as shp:
for f in shp:
if f["geometry"] is None:
continue
geom_id = int(f["properties"]["GEOM_ID"])
f["properties"] = {}
f = add_bbox(f)
features.append((geom_id, json.dumps(f)))
if len(features) % 5000 == 0: print len(features)
cPickle.dump(features, open("parcels.pickle", "w"))
#geomids, geojson = zip(*features)
#s = pd.Series(geojson, index=geomids)
#store["parcels"] = pd.DataFrame({"geojson": s})
#store.close()
print time.ctime()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment