Skip to content

Instantly share code, notes, and snippets.

@willycs40
Created April 14, 2015 22:16
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save willycs40/18eeb34012d8be3b95a3 to your computer and use it in GitHub Desktop.
Save willycs40/18eeb34012d8be3b95a3 to your computer and use it in GitHub Desktop.
Link Weather Data to Geospatial / Temporal Data
import pandas as pd
from sqlalchemy import create_engine # database connection
import numpy as np
import pyproj
from scipy import spatial
# connect to the database, and query for all the unique weather station names and locations:
disk_engine = create_engine('sqlite:///WeatherData.db')
query = '''
SELECT SiteName, Latitude, Longitude
FROM Data
GROUP BY SiteName, Latitude, Longitude
'''
sites_df = pd.read_sql_query(query, disk_engine)
# pull out the station names, latitude and longitudes out as seperate lists to feed to pyproj
stations = list(sites_df['SiteName'])
lon = list(sites_df['Longitude'])
lat = list(sites_df['Latitude'])
# convert the latitude and longitudes (provided in wgs 84) to os 1936 (eastings and northings)
wgs84=pyproj.Proj("+init=EPSG:4326") # Lat Lon with WGS84 datum used by GPS units and Google Earth
osgb36=pyproj.Proj("+init=EPSG:27700") # UK Ordnance Survey, 1936 datum
easting, northing = pyproj.transform(wgs84, osgb36, lon, lat)
# zip up the eastings and northings and create an numpy array, to feed to the kd-tree
easting_northing = np.array(zip(easting, northing))
# create a kd-tree from the easting_northing locations of the weather stations.
# this will allow fast lookup of the nearest weather station based on an easting northing coordinate provided
tree = spatial.KDTree(easting_northing)
# now I can lookup the nearest weather station to a particular coordinate as follows:
accident_x, accident_y = pyproj.transform(wgs84, osgb36, -0.602322, 53.18559) #convert wgs84 to
distance, station = tree.query([x,y])
print(stations[station]
import pandas as pd
from sqlalchemy import create_engine # database connection
import datetime as dt
csv_path = r"C:\Users\will.cubitt-smith\Local\Projects\WeatherData\combined.csv"
disk_engine = create_engine('sqlite:///WeatherData.db')
start = dt.datetime.now()
chunksize = 20000
j = 0
index_start = 1
for df in pd.read_csv(csv_path, chunksize=chunksize, iterator=True, encoding='utf-8'):
df = df.rename(columns={c: c.replace(' ', '') for c in df.columns}) # Remove spaces from columns
df['ObservationDate'] = pd.to_datetime(df['ObservationDate']) # Convert to datetimes
df.index += index_start
j+=1
print '{} seconds: completed {} rows'.format((dt.datetime.now() - start).seconds, j*chunksize)
df.to_sql('data', disk_engine, if_exists='append')
index_start = df.index[-1] + 1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment