Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Example of using Orca to download and concatenate multiple datasets from the California Department of Water Resources. More on Orca at https://udst.github.io/orca/.
import sys
from datetime import datetime
import orca
from caldwr.parsers.precipout import load_precipout
# this will be replaced when calling orca.run, but it's good to
# have a placeholder here so things don't crash when evaluating
# stuff ala carte or with orca-server.
@orca.injectable()
def iter_var():
return 2014
# rename the iter_var to something semantic
@orca.injectable(cache=True, cache_scope='iteration')
def year(iter_var):
return iter_var
@orca.injectable(cache=True, cache_scope='iteration')
def precipout_url(year):
if year >= 2012:
url = 'http://cdec.water.ca.gov/cgi-progs/reports/PRECIPOUT.{}'
else:
url = 'http://cdec.water.ca.gov/cgi-progs/reports3/PRECIPOUT.{}'
return url.format(year)
@orca.table(cache=True, cache_scope='iteration')
def precipout(precipout_url, year):
return load_precipout(precipout_url, year)
@orca.table(cache=True, cache_scope='iteration')
def precip_monthly(precipout):
return precipout.to_frame().query('month not in ["Oct-Sep", "Wat-Yr"]')
@orca.table(cache=True, cache_scope='iteration')
def precip_yearly(precipout):
return precipout.to_frame().query('month == "Oct-Sep"')
@orca.column('precip_monthly', cache=True, cache_scope='iteration')
def date(precip_monthly):
def to_datetime(row):
return datetime.strptime(
'{} {}'.format(row['month'], row['year']), '%b %Y')
df = precip_monthly.to_frame(columns=['year', 'month'])
return df.apply(to_datetime, axis=1)
@orca.step()
def concat_monthly(precip_monthly):
precip_monthly = precip_monthly.to_frame()
if not orca.is_injectable('monthly_df'):
orca.add_injectable('monthly_df', precip_monthly)
else:
df = orca.get_injectable('monthly_df')
df = df.append(precip_monthly, ignore_index=True)
orca.add_injectable('monthly_df', df)
@orca.step()
def concat_yearly(precip_yearly):
precip_yearly = precip_yearly.to_frame()
if not orca.is_injectable('yearly_df'):
orca.add_injectable('yearly_df', precip_yearly)
else:
df = orca.get_injectable('yearly_df')
df = df.append(precip_yearly, ignore_index=True)
orca.add_injectable('yearly_df', df)
def main():
orca.run(['concat_monthly', 'concat_yearly'], iter_vars=range(1987, 2015))
monthly_df = orca.get_injectable('monthly_df')
monthly_df.to_csv('precip_monthly.csv', index=False)
yearly_df = orca.get_injectable('yearly_df')
yearly_df.to_csv('precip_yearly.csv', index=False)
if __name__ == '__main__':
sys.exit(main())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment