Skip to content

Instantly share code, notes, and snippets.

@jiffyclub
Last active November 15, 2020 00:37
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save jiffyclub/2a252333c8dcad1b99aa to your computer and use it in GitHub Desktop.
Save jiffyclub/2a252333c8dcad1b99aa to your computer and use it in GitHub Desktop.
Example of using Orca to download and concatenate multiple datasets from the California Department of Water Resources. More on Orca at https://udst.github.io/orca/.
import sys
from datetime import datetime
import orca
from caldwr.parsers.precipout import load_precipout
# this will be replaced when calling orca.run, but it's good to
# have a placeholder here so things don't crash when evaluating
# stuff ala carte or with orca-server.
@orca.injectable()
def iter_var():
return 2014
# rename the iter_var to something semantic
@orca.injectable(cache=True, cache_scope='iteration')
def year(iter_var):
return iter_var
@orca.injectable(cache=True, cache_scope='iteration')
def precipout_url(year):
if year >= 2012:
url = 'http://cdec.water.ca.gov/cgi-progs/reports/PRECIPOUT.{}'
else:
url = 'http://cdec.water.ca.gov/cgi-progs/reports3/PRECIPOUT.{}'
return url.format(year)
@orca.table(cache=True, cache_scope='iteration')
def precipout(precipout_url, year):
return load_precipout(precipout_url, year)
@orca.table(cache=True, cache_scope='iteration')
def precip_monthly(precipout):
return precipout.to_frame().query('month not in ["Oct-Sep", "Wat-Yr"]')
@orca.table(cache=True, cache_scope='iteration')
def precip_yearly(precipout):
return precipout.to_frame().query('month == "Oct-Sep"')
@orca.column('precip_monthly', cache=True, cache_scope='iteration')
def date(precip_monthly):
def to_datetime(row):
return datetime.strptime(
'{} {}'.format(row['month'], row['year']), '%b %Y')
df = precip_monthly.to_frame(columns=['year', 'month'])
return df.apply(to_datetime, axis=1)
@orca.step()
def concat_monthly(precip_monthly):
precip_monthly = precip_monthly.to_frame()
if not orca.is_injectable('monthly_df'):
orca.add_injectable('monthly_df', precip_monthly)
else:
df = orca.get_injectable('monthly_df')
df = df.append(precip_monthly, ignore_index=True)
orca.add_injectable('monthly_df', df)
@orca.step()
def concat_yearly(precip_yearly):
precip_yearly = precip_yearly.to_frame()
if not orca.is_injectable('yearly_df'):
orca.add_injectable('yearly_df', precip_yearly)
else:
df = orca.get_injectable('yearly_df')
df = df.append(precip_yearly, ignore_index=True)
orca.add_injectable('yearly_df', df)
def main():
orca.run(['concat_monthly', 'concat_yearly'], iter_vars=range(1987, 2015))
monthly_df = orca.get_injectable('monthly_df')
monthly_df.to_csv('precip_monthly.csv', index=False)
yearly_df = orca.get_injectable('yearly_df')
yearly_df.to_csv('precip_yearly.csv', index=False)
if __name__ == '__main__':
sys.exit(main())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment