Last active
November 15, 2020 00:37
-
-
Save jiffyclub/2a252333c8dcad1b99aa to your computer and use it in GitHub Desktop.
Example of using Orca to download and concatenate multiple datasets from the California Department of Water Resources. More on Orca at https://udst.github.io/orca/.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
from datetime import datetime | |
import orca | |
from caldwr.parsers.precipout import load_precipout | |
# this will be replaced when calling orca.run, but it's good to | |
# have a placeholder here so things don't crash when evaluating | |
# stuff ala carte or with orca-server. | |
@orca.injectable() | |
def iter_var(): | |
return 2014 | |
# rename the iter_var to something semantic | |
@orca.injectable(cache=True, cache_scope='iteration') | |
def year(iter_var): | |
return iter_var | |
@orca.injectable(cache=True, cache_scope='iteration') | |
def precipout_url(year): | |
if year >= 2012: | |
url = 'http://cdec.water.ca.gov/cgi-progs/reports/PRECIPOUT.{}' | |
else: | |
url = 'http://cdec.water.ca.gov/cgi-progs/reports3/PRECIPOUT.{}' | |
return url.format(year) | |
@orca.table(cache=True, cache_scope='iteration') | |
def precipout(precipout_url, year): | |
return load_precipout(precipout_url, year) | |
@orca.table(cache=True, cache_scope='iteration') | |
def precip_monthly(precipout): | |
return precipout.to_frame().query('month not in ["Oct-Sep", "Wat-Yr"]') | |
@orca.table(cache=True, cache_scope='iteration') | |
def precip_yearly(precipout): | |
return precipout.to_frame().query('month == "Oct-Sep"') | |
@orca.column('precip_monthly', cache=True, cache_scope='iteration') | |
def date(precip_monthly): | |
def to_datetime(row): | |
return datetime.strptime( | |
'{} {}'.format(row['month'], row['year']), '%b %Y') | |
df = precip_monthly.to_frame(columns=['year', 'month']) | |
return df.apply(to_datetime, axis=1) | |
@orca.step() | |
def concat_monthly(precip_monthly): | |
precip_monthly = precip_monthly.to_frame() | |
if not orca.is_injectable('monthly_df'): | |
orca.add_injectable('monthly_df', precip_monthly) | |
else: | |
df = orca.get_injectable('monthly_df') | |
df = df.append(precip_monthly, ignore_index=True) | |
orca.add_injectable('monthly_df', df) | |
@orca.step() | |
def concat_yearly(precip_yearly): | |
precip_yearly = precip_yearly.to_frame() | |
if not orca.is_injectable('yearly_df'): | |
orca.add_injectable('yearly_df', precip_yearly) | |
else: | |
df = orca.get_injectable('yearly_df') | |
df = df.append(precip_yearly, ignore_index=True) | |
orca.add_injectable('yearly_df', df) | |
def main(): | |
orca.run(['concat_monthly', 'concat_yearly'], iter_vars=range(1987, 2015)) | |
monthly_df = orca.get_injectable('monthly_df') | |
monthly_df.to_csv('precip_monthly.csv', index=False) | |
yearly_df = orca.get_injectable('yearly_df') | |
yearly_df.to_csv('precip_yearly.csv', index=False) | |
if __name__ == '__main__': | |
sys.exit(main()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment