Skip to content

Instantly share code, notes, and snippets.

@atomic77
Last active November 9, 2020 03:06
Show Gist options
  • Save atomic77/d522a99673519aad0650dc55be131207 to your computer and use it in GitHub Desktop.
Save atomic77/d522a99673519aad0650dc55be131207 to your computer and use it in GitHub Desktop.
Convert GHCN weather data files to pandas dataframes
import pandas as pd
import numpy as np
def transform_elements(df):
# TMAX/MIN are given in _tenths_; convert to an approrpiate float for now
for t in ['TMAX', 'TMIN', 'TAVG']:
df.loc[df.element == t, 'value'] = df.loc[df.element == t, 'value'] / 10
def get_df_for_dly(file):
""" Parse awkward .dly GHCN data format into a clean pandas time-series dataframe """
xform = []
with open(file, 'r') as f:
for l in f.readlines():
base = {
'country': l[0:2],
'station': l[0:11],
'element': l[17:21],
'year': int(l[11:15]),
'month': int(l[15:17]),
}
for m in np.arange(0, 30):
value = int(l[21 + m *8:26 + m * 8])
if value == -9999:
continue
d = base.copy()
d.update({
'day': m+1,
'value': value,
'mflag': l[26 + m *8],
'qflag': l[27 + m *8],
'sflag': l[28 + m *8]
})
xform.append(d)
df = pd.DataFrame.from_records(xform)
df.index = pd.to_datetime(df[['year','month','day']])
transform_elements(df)
return df
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment