Skip to content

Instantly share code, notes, and snippets.

@danhammer
Created May 2, 2014 23:16
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save danhammer/c097bec81f3bfb55df89 to your computer and use it in GitHub Desktop.
Save danhammer/c097bec81f3bfb55df89 to your computer and use it in GitHub Desktop.
from bs4 import BeautifulSoup
import json
import pandas as pd
# Process information on Caltrans projects. Convert this file to
# GeoJSON:
# wget https://dot.ca.gov/hq/construc/cons.kml
# ogr2ogr -f GeoJSON cons.json cons.kml
def _read_data():
with open('cons.json') as f:
x = json.load(f)
return x['features']
def _process_entry(entry):
lon, lat, _ = entry['geometry']['coordinates']
doc = entry['properties']['Description']
l = doc.split("<br />")
def _process_kv(kv):
try:
return dict([BeautifulSoup(kv).text.split(": ")])
except ValueError:
return None
a = [_process_kv(x) for x in l if _process_kv(x) is not None]
a = a + [{'lat':lat}] + [{'lon':lon}]
return { k: v for d in a for k, v in d.items() }
xx = map(_process_entry, _read_data())
df = pd.DataFrame(xx)
df.to_csv('tester.csv')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment