djhocking/gist:eff1072b54b6d8049270

## gistfile1.r
# fetch temperature data
tbl_values <- left_join(tbl_series,
                        select(tbl_variables, variable_id, variable_name),
                        by=c('variable_id'='variable_id')) %>%
  select(-file_id) %>%
  filter(location_id %in% df_locations$location_id,
         variable_name=="TEMP") %>%
  left_join(tbl_values,
            by=c('series_id'='series_id')) %>%
  left_join(select(tbl_locations, location_id, location_name, latitude, longitude, featureid=catchment_id),
            by=c('location_id'='location_id')) %>%
  left_join(tbl_agencies,
            by=c('agency_id'='agency_id')) %>%
  mutate(year = date_part('year', datetime))

df_values <- collect(tbl_values)
df_values <- df_values %>%
  mutate(datetime=with_tz(datetime, tzone='EST'))
summary(df_values)

# create climateData input dataset (too big without pre-filter or smaller join)

# tried to do the year filter within the postgres tbl query but was getting errors with recognizing
climate <- tbl_daymet %>%
  mutate(year = date_part('year', date)) %>%
  filter(featureid %in% df_locations$featureid)

tbl_climate <- climate %>%
  filter(year %in% unique(df_values$year)) # distinct() doesn't work on numeric values

climateData <- collect(tbl_climate)
	# fetch temperature data
	tbl_values <- left_join(tbl_series,
	select(tbl_variables, variable_id, variable_name),
	by=c('variable_id'='variable_id')) %>%
	select(-file_id) %>%
	filter(location_id %in% df_locations$location_id,
	variable_name=="TEMP") %>%
	left_join(tbl_values,
	by=c('series_id'='series_id')) %>%
	left_join(select(tbl_locations, location_id, location_name, latitude, longitude, featureid=catchment_id),
	by=c('location_id'='location_id')) %>%
	left_join(tbl_agencies,
	by=c('agency_id'='agency_id')) %>%
	mutate(year = date_part('year', datetime))

	df_values <- collect(tbl_values)
	df_values <- df_values %>%
	mutate(datetime=with_tz(datetime, tzone='EST'))
	summary(df_values)

	# create climateData input dataset (too big without pre-filter or smaller join)

	# tried to do the year filter within the postgres tbl query but was getting errors with recognizing
	climate <- tbl_daymet %>%
	mutate(year = date_part('year', date)) %>%
	filter(featureid %in% df_locations$featureid)

	tbl_climate <- climate %>%
	filter(year %in% unique(df_values$year)) # distinct() doesn't work on numeric values

	climateData <- collect(tbl_climate)