Created
April 14, 2015 22:16
-
-
Save willycs40/18eeb34012d8be3b95a3 to your computer and use it in GitHub Desktop.
Link Weather Data to Geospatial / Temporal Data
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
from sqlalchemy import create_engine # database connection | |
import numpy as np | |
import pyproj | |
from scipy import spatial | |
# connect to the database, and query for all the unique weather station names and locations: | |
disk_engine = create_engine('sqlite:///WeatherData.db') | |
query = ''' | |
SELECT SiteName, Latitude, Longitude | |
FROM Data | |
GROUP BY SiteName, Latitude, Longitude | |
''' | |
sites_df = pd.read_sql_query(query, disk_engine) | |
# pull out the station names, latitude and longitudes out as seperate lists to feed to pyproj | |
stations = list(sites_df['SiteName']) | |
lon = list(sites_df['Longitude']) | |
lat = list(sites_df['Latitude']) | |
# convert the latitude and longitudes (provided in wgs 84) to os 1936 (eastings and northings) | |
wgs84=pyproj.Proj("+init=EPSG:4326") # Lat Lon with WGS84 datum used by GPS units and Google Earth | |
osgb36=pyproj.Proj("+init=EPSG:27700") # UK Ordnance Survey, 1936 datum | |
easting, northing = pyproj.transform(wgs84, osgb36, lon, lat) | |
# zip up the eastings and northings and create an numpy array, to feed to the kd-tree | |
easting_northing = np.array(zip(easting, northing)) | |
# create a kd-tree from the easting_northing locations of the weather stations. | |
# this will allow fast lookup of the nearest weather station based on an easting northing coordinate provided | |
tree = spatial.KDTree(easting_northing) | |
# now I can lookup the nearest weather station to a particular coordinate as follows: | |
accident_x, accident_y = pyproj.transform(wgs84, osgb36, -0.602322, 53.18559) #convert wgs84 to | |
distance, station = tree.query([x,y]) | |
print(stations[station] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
from sqlalchemy import create_engine # database connection | |
import datetime as dt | |
csv_path = r"C:\Users\will.cubitt-smith\Local\Projects\WeatherData\combined.csv" | |
disk_engine = create_engine('sqlite:///WeatherData.db') | |
start = dt.datetime.now() | |
chunksize = 20000 | |
j = 0 | |
index_start = 1 | |
for df in pd.read_csv(csv_path, chunksize=chunksize, iterator=True, encoding='utf-8'): | |
df = df.rename(columns={c: c.replace(' ', '') for c in df.columns}) # Remove spaces from columns | |
df['ObservationDate'] = pd.to_datetime(df['ObservationDate']) # Convert to datetimes | |
df.index += index_start | |
j+=1 | |
print '{} seconds: completed {} rows'.format((dt.datetime.now() - start).seconds, j*chunksize) | |
df.to_sql('data', disk_engine, if_exists='append') | |
index_start = df.index[-1] + 1 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment