dblodgett-usgs/regrid_daymet.R

## regrid_daymet.R
library(sf)
library(ncdf4)
library(dplyr)
library(retry)
# Need: https://github.com/USGS-R/ncdfgeom
# Need: https://github.com/ramhiser/retry/

get_time_nc <- function(t_dim) {
  time_units<-strsplit(t_dim$units, " ")[[1]]
  time_step<-time_units[1]
  date_origin<-time_units[3]

  if(nchar(date_origin) == 10) {
    date_origin <- paste0(date_origin, "T00:00:00Z")
  }

  if(grepl("hour", time_step, ignore.case = TRUE)) {
    multiplier <- (60^2)
  } else if(grepl("day", time_step, ignore.case = TRUE)) {
    multiplier <- (24 * 60^2)
  } else {
    stop("only hour time steps supported so far")
  }

  origin <- as.POSIXct(strptime(date_origin,
                                format = "%Y-%m-%dT%H:%M:%SZ",
                                tz = "UTC"),
                       tz = "UTC")

  as.POSIXct(t_dim$vals * multiplier,
             origin = origin,
             tz = "UTC")
}

input_cells <- st_simplify(read_sf("SanJuanSimplify"), 100)

### Used for testing a smaller grid ###
# subset_box <- c(510000, 4050000, 530000, 4080000) %>%
#   setNames(c("xmin", "ymin", "xmax", "ymax"))
# class(subset_box) <- "bbox"
# subset_box <- st_as_sfc(subset_box)
# st_crs(subset_box) = st_crs(input_cells)
# input_cells <- st_intersection(input_cells, subset_box)
###

# Will not work with CRAN ncdf4 which is not compiled with OPeNDAP support.
dmt <- nc_open("https://thredds.daac.ornl.gov/thredds/dodsC/daymet-v3-agg/na.ncml")

# Uses: https://github.com/USGS-R/ncdfgeom
prj <- ncdfgeom::get_prj(ncatt_get(dmt, dmt$var$lambert_conformal_conic))

# Buffer out a bit to be sure we have all centroids in output.
bbox <- st_bbox(st_transform(input_cells, prj)) + c(-1000, -1000, 1000, 1000)

# Grab stuff in bounding box.
x_inds <- which(dmt$dim$x$vals > bbox$xmin & dmt$dim$x$vals < bbox$xmax)
y_inds <- which(dmt$dim$y$vals > bbox$ymin & dmt$dim$y$vals < bbox$ymax)

# Finicky conversion to matrix and adding an id.
x_vals <- matrix(dmt$dim$x$vals[x_inds],
                 nrow = length(y_inds), ncol = length(x_inds),
                 byrow = T)
y_vals <- matrix(dmt$dim$y$vals[y_inds],
                 nrow = length(y_inds), ncol = length(x_inds),
                 byrow = F)
ids <- matrix(seq(1, length(x_inds) * length(y_inds)),
              nrow = length(y_inds), ncol = length(x_inds),
              byrow = T)

dmt_points <- st_as_sf(data.frame(x = matrix(x_vals, ncol = 1),
                                  y = matrix(y_vals, ncol = 1),
                                  id = matrix(ids, ncol = 1)),
                       coords = c("x", "y"), crs = prj, agr = "constant")

# Buffer out half a grid cell for intersection ov voronoi.
correct_bbox <- st_bbox(dmt_points) + c(-500, -500, +500, +500)

# Build Cell Geometry
dmt_polygons <- st_geometry(dmt_points) %>%
  st_union() %>%
  st_voronoi() %>%
  st_cast() %>%
  st_intersection(st_as_sfc(correct_bbox)) %>%
  st_sf()

dmt_polygons <- st_join(dmt_polygons, dmt_points, join = st_contains) %>%
  select(dmt_id = id)

input_cells <- st_transform(input_cells, st_crs(dmt_polygons)) %>%
  select(input_id = OBJECTID)

intersection <- st_intersection(dmt_polygons, input_cells)

write_sf(dmt_points, "sj_dmt_points.gpkg")
write_sf(dmt_polygons, "sj_dmt_poly.gpkg")
write_sf(input_cells, "sj_input_poly.gpkg")
write_sf(intersection, "sj_intersected_poly.gpkg")

# Get individual intersection areas. (as.numeric removes units)
intersection <- intersection %>%
  mutate(area = as.numeric(st_area(intersection))) %>%
  group_by(input_id) %>%
  st_set_geometry(NULL)

# Get sum of area by input_id and join to intersection.
id_area <- summarise(intersection, id_area = sum(area))
intersection <- left_join(intersection, id_area, by = "input_id")

# Calculate percent of id_area of each incremental area.
intersection <- mutate(intersection, percent_covers = area/id_area) %>%
  select(-area, -id_area) %>% ungroup()

# How many steps to run.
n_steps <- 100 # All is dmt$dim$time$len
# Setup output matrix
out <- matrix(nrow = n_steps, ncol = length(input_cells$input_id))

# Step through time steps here.
# Could do this in a few ways to improve scalability.
# If one time step is small, could ncvar_get many time steps at a time and unpack.
# If all time steps in out is too large, could write output incrementally.
for(i in 1:n_steps) {
  # One timestep at a time.
  retry:::try_backoff({
    dmt_data <- ncvar_get(dmt, dmt$var$tmin,
                          start = c(min(x_inds), min(y_inds), i),
                          count = c(length(x_inds), length(y_inds), 1))
  })

  # Unpack dmt_data and add id
  dmt_data <- data.frame(d = matrix(t(dmt_data), ncol = 1, byrow = TRUE),
                         dmt_id = matrix(ids, ncol = 1))

  # Only needed for plotting output.
  dmt_p <- dmt_data

  dmt_data <- right_join(dmt_data, intersection,
                         by = c("dmt_id")) %>%
    mutate(d = d * percent_covers) %>%
    group_by(input_id) %>%
    summarise(d = sum(d, na.rm = T)/sum(percent_covers, na.rm = T)) %>%
    ungroup()

  # Append to output
  out[i,] <- dmt_data$d

  print(i)
}

time_steps <- get_time_nc(dmt$dim$time)

# Write output for gsflow
cat("daymet data test\n", file = "daymet_test.cbh")
cat(paste0("tmin ", length(input_cells$input_id), "\n"),
    file = "daymet_test.cbh", append = TRUE)
cat("########################################",
    file = "daymet_test.cbh", append = TRUE)

for(i in 1:n_steps) {
cat(paste0(paste(format(time_steps[i], format = "%Y %m %d"), "0 0 0 "),
          paste(round(out[i,], digits = 2), collapse = " "), "\n"),
    file = "daymet_test.cbh", append = TRUE)
}

# plot(st_geometry(dmt_polygons))
# plot(st_transform(st_geometry(input_cells), st_crs(dmt_polygons)), add = T)

# Lots of checking here. Only use with 1 time step above.
png("dmt_grid.png")
dmt_p <- left_join(dmt_polygons, dmt_p)
plot(dmt_p["d"], border = NA)
dev.off()

write_sf(dmt_p, "sj_dmt_check.gpkg")

png("sj_grid.png")
pd <- st_transform(left_join(input_cells, dmt_data), st_crs(dmt_polygons))
plot(pd["d"], border = NA)
dev.off()

# Use to check order of output text file.
write_sf(pd, "sj_dmt_out.gpkg")
#
# plot(st_geometry(dmt_p))
# plot(st_geometry(pd), add = TRUE)
	library(sf)
	library(ncdf4)
	library(dplyr)
	library(retry)
	# Need: https://github.com/USGS-R/ncdfgeom
	# Need: https://github.com/ramhiser/retry/

	get_time_nc <- function(t_dim) {
	time_units<-strsplit(t_dim$units, " ")[[1]]
	time_step<-time_units[1]
	date_origin<-time_units[3]

	if(nchar(date_origin) == 10) {
	date_origin <- paste0(date_origin, "T00:00:00Z")
	}

	if(grepl("hour", time_step, ignore.case = TRUE)) {
	multiplier <- (60^2)
	} else if(grepl("day", time_step, ignore.case = TRUE)) {
	multiplier <- (24 * 60^2)
	} else {
	stop("only hour time steps supported so far")
	}

	origin <- as.POSIXct(strptime(date_origin,
	format = "%Y-%m-%dT%H:%M:%SZ",
	tz = "UTC"),
	tz = "UTC")

	as.POSIXct(t_dim$vals * multiplier,
	origin = origin,
	tz = "UTC")
	}

	input_cells <- st_simplify(read_sf("SanJuanSimplify"), 100)

	### Used for testing a smaller grid ###
	# subset_box <- c(510000, 4050000, 530000, 4080000) %>%
	# setNames(c("xmin", "ymin", "xmax", "ymax"))
	# class(subset_box) <- "bbox"
	# subset_box <- st_as_sfc(subset_box)
	# st_crs(subset_box) = st_crs(input_cells)
	# input_cells <- st_intersection(input_cells, subset_box)
	###

	# Will not work with CRAN ncdf4 which is not compiled with OPeNDAP support.
	dmt <- nc_open("https://thredds.daac.ornl.gov/thredds/dodsC/daymet-v3-agg/na.ncml")

	# Uses: https://github.com/USGS-R/ncdfgeom
	prj <- ncdfgeom::get_prj(ncatt_get(dmt, dmt$var$lambert_conformal_conic))

	# Buffer out a bit to be sure we have all centroids in output.
	bbox <- st_bbox(st_transform(input_cells, prj)) + c(-1000, -1000, 1000, 1000)

	# Grab stuff in bounding box.
	x_inds <- which(dmt$dim$x$vals > bbox$xmin & dmt$dim$x$vals < bbox$xmax)
	y_inds <- which(dmt$dim$y$vals > bbox$ymin & dmt$dim$y$vals < bbox$ymax)

	# Finicky conversion to matrix and adding an id.
	x_vals <- matrix(dmt$dim$x$vals[x_inds],
	nrow = length(y_inds), ncol = length(x_inds),
	byrow = T)
	y_vals <- matrix(dmt$dim$y$vals[y_inds],
	nrow = length(y_inds), ncol = length(x_inds),
	byrow = F)
	ids <- matrix(seq(1, length(x_inds) * length(y_inds)),
	nrow = length(y_inds), ncol = length(x_inds),
	byrow = T)

	dmt_points <- st_as_sf(data.frame(x = matrix(x_vals, ncol = 1),
	y = matrix(y_vals, ncol = 1),
	id = matrix(ids, ncol = 1)),
	coords = c("x", "y"), crs = prj, agr = "constant")

	# Buffer out half a grid cell for intersection ov voronoi.
	correct_bbox <- st_bbox(dmt_points) + c(-500, -500, +500, +500)

	# Build Cell Geometry
	dmt_polygons <- st_geometry(dmt_points) %>%
	st_union() %>%
	st_voronoi() %>%
	st_cast() %>%
	st_intersection(st_as_sfc(correct_bbox)) %>%
	st_sf()

	dmt_polygons <- st_join(dmt_polygons, dmt_points, join = st_contains) %>%
	select(dmt_id = id)

	input_cells <- st_transform(input_cells, st_crs(dmt_polygons)) %>%
	select(input_id = OBJECTID)

	intersection <- st_intersection(dmt_polygons, input_cells)

	write_sf(dmt_points, "sj_dmt_points.gpkg")
	write_sf(dmt_polygons, "sj_dmt_poly.gpkg")
	write_sf(input_cells, "sj_input_poly.gpkg")
	write_sf(intersection, "sj_intersected_poly.gpkg")

	# Get individual intersection areas. (as.numeric removes units)
	intersection <- intersection %>%
	mutate(area = as.numeric(st_area(intersection))) %>%
	group_by(input_id) %>%
	st_set_geometry(NULL)

	# Get sum of area by input_id and join to intersection.
	id_area <- summarise(intersection, id_area = sum(area))
	intersection <- left_join(intersection, id_area, by = "input_id")

	# Calculate percent of id_area of each incremental area.
	intersection <- mutate(intersection, percent_covers = area/id_area) %>%
	select(-area, -id_area) %>% ungroup()

	# How many steps to run.
	n_steps <- 100 # All is dmt$dim$time$len
	# Setup output matrix
	out <- matrix(nrow = n_steps, ncol = length(input_cells$input_id))

	# Step through time steps here.
	# Could do this in a few ways to improve scalability.
	# If one time step is small, could ncvar_get many time steps at a time and unpack.
	# If all time steps in out is too large, could write output incrementally.
	for(i in 1:n_steps) {
	# One timestep at a time.
	retry:::try_backoff({
	dmt_data <- ncvar_get(dmt, dmt$var$tmin,
	start = c(min(x_inds), min(y_inds), i),
	count = c(length(x_inds), length(y_inds), 1))
	})

	# Unpack dmt_data and add id
	dmt_data <- data.frame(d = matrix(t(dmt_data), ncol = 1, byrow = TRUE),
	dmt_id = matrix(ids, ncol = 1))

	# Only needed for plotting output.
	dmt_p <- dmt_data

	dmt_data <- right_join(dmt_data, intersection,
	by = c("dmt_id")) %>%
	mutate(d = d * percent_covers) %>%
	group_by(input_id) %>%
	summarise(d = sum(d, na.rm = T)/sum(percent_covers, na.rm = T)) %>%
	ungroup()

	# Append to output
	out[i,] <- dmt_data$d

	print(i)
	}

	time_steps <- get_time_nc(dmt$dim$time)

	# Write output for gsflow
	cat("daymet data test\n", file = "daymet_test.cbh")
	cat(paste0("tmin ", length(input_cells$input_id), "\n"),
	file = "daymet_test.cbh", append = TRUE)
	cat("########################################",
	file = "daymet_test.cbh", append = TRUE)

	for(i in 1:n_steps) {
	cat(paste0(paste(format(time_steps[i], format = "%Y %m %d"), "0 0 0 "),
	paste(round(out[i,], digits = 2), collapse = " "), "\n"),
	file = "daymet_test.cbh", append = TRUE)
	}

	# plot(st_geometry(dmt_polygons))
	# plot(st_transform(st_geometry(input_cells), st_crs(dmt_polygons)), add = T)

	# Lots of checking here. Only use with 1 time step above.
	png("dmt_grid.png")
	dmt_p <- left_join(dmt_polygons, dmt_p)
	plot(dmt_p["d"], border = NA)
	dev.off()

	write_sf(dmt_p, "sj_dmt_check.gpkg")

	png("sj_grid.png")
	pd <- st_transform(left_join(input_cells, dmt_data), st_crs(dmt_polygons))
	plot(pd["d"], border = NA)
	dev.off()

	# Use to check order of output text file.
	write_sf(pd, "sj_dmt_out.gpkg")
	#
	# plot(st_geometry(dmt_p))
	# plot(st_geometry(pd), add = TRUE)