Skip to content

Instantly share code, notes, and snippets.

@mattharrison
Last active June 12, 2020 17:14
Show Gist options
  • Save mattharrison/83f520a231eefbc6694e5946e967c27f to your computer and use it in GitHub Desktop.
Save mattharrison/83f520a231eefbc6694e5946e967c27f to your computer and use it in GitHub Desktop.
Tweak NYC (non-mutating)
def tweak_nyc(raw):
def clean_col(val):
return val.strip().replace(' ', '_')
return (raw
.rename(columns=clean_col)
.assign(PrecipitationIn=pd.to_numeric(
raw.PrecipitationIn.replace('T', '0.001')),
Events=raw[' Events'].fillna(''),
)
.assign(PrecipitationCM=lambda df_:df_.PrecipitationIn*2.54)
)
nyc2 = tweak_nyc(nyc)
names = '''obs
year
month
day
date
latitude
longitude
zon.winds
mer.winds
humidity
air temp.
s.s.temp.'''.split('\n')
nino = pd.read_csv('data/tao-all2.dat.gz', sep=' ', names=names, na_values='.',
parse_dates=[[1,2,3]])
def clean_nino_col(col):
return col.rstrip('.').replace('.', '_').replace(' ', '_')
def tweak_nino(raw):
return (raw
.rename(columns=clean_nino_col)
.assign(air_temp_F=lambda df_:df_.air_temp *9/5+32,
zon_winds_mph=lambda df_:df_.zon_winds*2.237,
mer_winds_mph=lambda df_:df_.mer_winds*2.237,
date=pd.to_datetime(raw.date, format='%y%m%d')
)
.drop(columns='obs')
)
nino = tweak_nino(nino)#.dtypes
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment