Skip to content

Instantly share code, notes, and snippets.

@t-kalinowski
Last active September 23, 2017 15:54
Show Gist options
  • Save t-kalinowski/05900156476a0c792623daa39175543b to your computer and use it in GitHub Desktop.
Save t-kalinowski/05900156476a0c792623daa39175543b to your computer and use it in GitHub Desktop.
Some helpers for package:units
# modified from http://adv-r.had.co.nz/dsl.html
.wrap_bare_symbols_with_make_unit_call <- function(x) {
if (is.atomic(x)) {
x
} else if (is.name(x)) {
chr <- as.character(x)
if(!udunits2::ud.is.parseable(chr))
stop("could not parse ", dbl_quote(chr))
bquote(units::make_unit(.(as.character(x))))
}
else if (is.call(x) || is.pairlist(x)) {
children <- lapply(x[-1], .wrap_bare_symbols_with_make_unit_call)
x[-1] <- children
x
} else {
stop("Don't know how to handle type ", typeof(x),
call. = FALSE)
}
}
.parse_chr_2_units <- function(chr) {
stopifnot(is.character(chr), length(chr) == 1L)
unit <- parse(text = chr)[[1]]
e <- try(unit <- .wrap_bare_symbols_with_make_unit_call(unit), silent = TRUE)
if(inherits(e, "try-error")) {
msg <- paste0('in unit "', chr,'", ', attr(e, "condition")$message)
return(stop(msg))
}
eval(unit, baseenv())
# list2env(list(make_unit = units::make_unit), parent = baseenv())) # #units::ud_units
}
#' make a unit object
#'
#' This is an alternative to units::make_unit with the following differences
#' \itemize{
#' \item not only unit symobls, but also valid unit names parse correctly
#' \item strings of compound units like \code{"ug/l"} parse into correct symyolic_units
#' objects, with the denominator correct. This allows for more robust unit
#' simplification in complex equations
#' \item both expressions and strings are accepted
#' \item throws an error if the supplied arguments do not construct a valid unit
#' parsable by \code{\link[udunits2]{ud.is.parsable}}
#' }
#'
#' @param x a string, or a bare expression that describes a valid unit. See
#' examples for usage
#' @param .SEonly perform standard evaluation only. If true, the supplied value
#' must be a scalar character, otherwise an error is thrown.
#'
#' @return an object of class `symbolic_units`, just like [units::make_unit]
#' @export
#'
#' @importFrom rlang enquo quo_text
#' @import units
#' @seealso units::make_unit .udunits_symbols_info
#'
#' @noMd
#' @examples
#' # different ways to specify units
#' # all 3 should be identical
#' make_unit2(ug/l)
#' make_unit2("ug/l")
#' string <- "ug/l"
#' make_unit2(string)
#'
#' # valid unit names not found in units::ud_units also parse
#' make_unit2(ug/gallon)
#' make_unit2("ug/gallon")
#' string <- "ug/gallon"
#' make_unit2(string)
#'
#' # the normal evaluation of the supplied argument is attempted first before
#' # inspecting supplied expression. The expression is only inspected if it does
#' # not resolve to a character vector"
#' ug <- "kilogram"
#' make_unit2(ug)
#' # note that even if one of the symbols is bound, NSE is done on the whole
#' # expression or not at all
#' make_unit2(ug/l)
#' # to avoid expression parsing, use argument .SEonly (e.g., defensive programmig
#' # inside a function)
#' # make_unit2(ug/l, .SEonly = TRUE) # ERROR
#' make_unit2("ug/l", .SEonly = FALSE) # the default
#' make_unit2(ug, .SEonly = TRUE)
#'
#' # some examples for how to convert units
#' # first assign units to a numeric, this makes a vector with units
#' x <- 1:3
#' (units(x) <- make_unit2(ug/l))
#' # then, assigning units to a vector with units performs conversion
#' set_units(x, make_unit2("mg/l"))
#' set_units(x, make_unit2("g/l"))
#' set_units(x, make_unit2("ug/tbsp"))
#' set_units(x, make_unit2(kg/US_liquid_gallon))
#' # `set_units(x, u)` is a pipe friendly equivelant version of `units(x) <- u`
#' (units(x) <- make_unit2("mg/l") )
#' units(x) <- make_unit2(ug/l)
#'
#' # reserved words like in and special characters like % and ' must be
#' # backticked if passed as bare expression, however they work just fine if
#' # passed as a character string.
#' make_unit2(`in`)
#' make_unit2("in")
#' make_unit2("%/gallon")
#' make_unit2("%/'")
#' make_unit2("'/%")
#'
#' # this is commented out because of a documentation difficulty with roxygen
#' # throwing errors about mismatched quotes, but this should work
#' # make_unit2(`'` / `%`)
#'
#'
#' make_unit2(`%`/gallon)
#'
#' make_unit2(`%`*T)
#' make_unit2(T/F)
#' # make_unit2(T/FALSE) # ERROR
#'
#' # attempting to convert between incompatable units throws an error
#' # units(x) <- make_unit2(ft) # ERROR
#' # not recognized units throw an error
#' # make_unit2(foo/bar) # ERROR
make_unit2 <- function(x, .SEonly = FALSE) {
require(units)
ex <- enquo(x)
o <- try(chr <- force(x), silent = TRUE)
if (!.SEonly && (!is.character(o) || inherits(o, "try-error")))
chr <- quo_text(ex)
stopifnot(is.character(chr), length(chr) == 1L)
reserved_word <-
"`?(%|'|\"|if|T|F|else|repeat|while|function|for|in|next|break|TRUE|FALSE|NULL|Inf|NaN|NA|NA_integer_|NA_real_|NA_complex_|NA_character_)`?" # removing \\b for now
# backtick reserved words and other characters that othwise might throw throw
# off parse(text = ) or resolve to something in the baseenv() (e.g., T). Don't
# doubleup backtick.
# FIXME: currently "kelvin" becomes "kelv`in`". Need to add a boundary check...
# simply adding \\b makes % and a few other characters fail though.
#
# NOTE: there are a bunch of other symbols in ?Syntax that perhaps should also
# be included here
#
# Question: why not just backtick everything?
chr <- gsub(reserved_word, "\\`\\1\\`", chr)
return(.parse_chr_2_units(chr))
}
# adapted from:
# https://github.com/edzer/units/blob/master/R/ud_units.R
# https://raw.githubusercontent.com/edzer/units/master/R/ud_units.R
# nocov start
# This is setup code and all fails if we do not do it and use the units
# in the list, so there are no explicit tests for this, thus the nocov
.get_ud_xml_dir <- function() {
requireNamespace("udunits2")
udunits2:::.onAttach() # prints out the filepath to the udunits2.xml database
# require(xml2)
udunits2_dir <- dirname(Sys.getenv("UDUNITS2_XML_PATH"))
if (udunits2_dir == "")
udunits2_dir <- "/usr/share/xml/udunits"
udunits2_dir
}
.read_ud_db_symbols <- function(dir, filename) {
if (! requireNamespace("xml2", quietly = TRUE))
stop("package xml2 required to create ud_units database")
database <- xml2::read_xml(file.path(dir, filename))
symbols <- xml2::xml_find_all(database, ".//symbol")
unlist(Map(function(node) as.character(xml2::xml_contents(node)), symbols))
}
.read_ud_db_scales <- function(dir, filename) {
if (! requireNamespace("xml2", quietly = TRUE))
stop("package xml2 required to create ud_units database")
database <- xml2::read_xml(file.path(dir, filename))
symbols <- xml2::xml_find_all(database, ".//value")
symbols
unlist(Map(function(node) as.numeric(as.character(xml2::xml_contents(node))), symbols))
}
.get_ud_symbols <- function() {
udunits2_dir <- .get_ud_xml_dir()
symbols <- c(.read_ud_db_symbols(udunits2_dir, "udunits2-base.xml"),
.read_ud_db_symbols(udunits2_dir, "udunits2-derived.xml"),
.read_ud_db_symbols(udunits2_dir, "udunits2-accepted.xml"),
.read_ud_db_symbols(udunits2_dir, "udunits2-common.xml"))
# symbols = symbols[symbols == make.names(symbols)]
## (this would drop "'" "\"" "%" "in")
symbols
}
.get_ud_prefixes <- function() {
udunits2_dir <- .get_ud_xml_dir()
.read_ud_db_symbols(udunits2_dir, "udunits2-prefixes.xml")
}
.construct_ud_units <- function(){
ud_prefixes <- .get_ud_prefixes()
ud_symbols <- .get_ud_symbols()
expand_with_prefixes <- function(symbol) paste(ud_prefixes, symbol, sep = "")
symbols <- unique(c(ud_symbols,
unlist(Map(expand_with_prefixes, ud_symbols), use.names = FALSE)))
ud_units <- Map(make_unit, symbols)
names(ud_units) <- symbols
ud_units
}
# Use this to generate the data
# ud_units <- .construct_ud_units()
# nocov end
# ------------ end of copy from
# --------- https://raw.githubusercontent.com/edzer/units/master/R/ud_units.R
# library(purrr)
# library(dplyr)
# library(TKutils)
`%|%` <- rlang::`%|%`
`%empty%` <- function(x, y) if(length(x)==0) y else x
.read_ud_db <- function(dir, filename) {
if (! requireNamespace("xml2", quietly = TRUE))
stop("package xml2 required to create ud_units database")
database <- xml2::read_xml(file.path(dir, filename))
# xml2::as_list(database)
database
}
.db_list_as_dataframe <- function(db) {
xml_nodes <- xml_children(db)
map_dfr(seq_len(xml_length(db)), function(i) {
unit <- xml_nodes[[i]]
symbols <- xml_find_all(unit, ".//symbol")
symbols <- xml_text(symbols) %empty% ""
symbol <- symbols[ 1]
symbol_aliases <- pcc(symbols[-1])
unit_names <- xml_find_all(unit, ".//name")
all_names <- unlist(map(unit_names, ~xml_text(xml_children(.x))))
singular <- xml_find_all(unit_names, ".//singular") %>% xml_text()
plural <- xml_find_all(unit_names, ".//plural") %>% xml_text()
name_singular <- singular[ 1] %|% ""
name_singular_aliases <- pcc(singular[-1]) %|% ""
name_plural <- plural[ 1] %|% ""
name_plural_aliases <- pcc(plural[-1]) %|% ""
def <- xml_find_all(unit, ".//def")
def <- xml_text(def) %empty% ""
definition <- xml_find_all(unit, ".//definition")
definition <- xml_text(definition) %empty% ""
comment <- xml_find_all(unit, ".//comment")
comment <- xml_text(comment) %empty% ""
dimensionless <- xml_find_all(unit, ".//dimensionless")
dimensionless <- as.logical(length(dimensionless))
# all node names that might be in a unit node
# db %>% xml_children() %>% map(~xml_children(.x) %>% xml_name()) %>%
# unique() %>% unlist() %>% unique()
# [1] "base" "name" "symbol"
# [4] "aliases" "definition" "def"
# [7] "comment" "dimensionless"
# rest_xml <- unit %>% xml_children()
# rest <- map(rest_xml, xml_text)
# names(rest) <- rest_xml %>% xml_name()
# rest <- list(rest)
tibble(symbol, symbol_aliases,
name_singular, name_singular_aliases,
name_plural, name_plural_aliases,
def, definition, comment, dimensionless) #, rest
})
}
.get_ud_db_all <- function() {
udunits2_dir <- .get_ud_xml_dir()
base <- .read_ud_db(udunits2_dir, "udunits2-base.xml") # 7
derv <- .read_ud_db(udunits2_dir, "udunits2-derived.xml") # 23
acpt <- .read_ud_db(udunits2_dir, "udunits2-accepted.xml") # 24
cmon <- .read_ud_db(udunits2_dir, "udunits2-common.xml") # 221
len <- sum(sapply(list(base, derv, acpt, cmon), xml_length))
bind_rows(
base = .db_list_as_dataframe(base),
derived = .db_list_as_dataframe(derv),
accepted = .db_list_as_dataframe(acpt),
common = .db_list_as_dataframe(cmon),
.id = "source_xml_table_name"
)
}
.get_ud_prefixes_xml <- function() {
udunits2_dir <- .get_ud_xml_dir()
.read_ud_db(udunits2_dir, "udunits2-prefixes.xml")
}
#' @name udunits-info
#' @export
.udunits_prefix_info <- function() {
pr <- .get_ud_prefixes_xml()
# all prefix valid names
# pr %>% xml_children() %>% map(~xml_children(.x) %>% xml_name()) %>%
# unlist() %>% unique()
# "value" "name" "symbol"
pr %>%
xml_children() %>%
map_dfr(function(prefix) {
symbols <- xml_find_all(prefix, ".//symbol") %>% xml_text()
symbol <- symbols[1]
symbol_aliases <- pcc(symbols[-1])
name <- xml_find_all(prefix, ".//name") %>% xml_text()
value <- xml_find_all(prefix, ".//value") %>% xml_double()
tibble(symbol, symbol_aliases, name, value)
})
}
#' Get information on valid units
#'
#' The returned dataframe is constructed at runtime by reading the xml database
#' that powers unit conversion in [package:udunits2]. Inspect this dataframe to
#' determine what inputs are accepted to `units::make_unit` or
#' `RemTools::make_units2`. Any entry listed as a `name` or `symbol` (including
#' alias names and symbols) are accepted. Additionaly, any symbols can also
#' contain a valid prefix.
#'
#' @return a data frame
#' @export
#'
#' @importFrom xml2 xml_children xml_find_all xml_text xml_length xml_double
#' @name udunits-info
#' @examples
#' .udunits_symbols_info()
#' .udunits_prefix_info()
.udunits_symbols_info <- function() {
.get_ud_db_all()
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment