Skip to content

Instantly share code, notes, and snippets.

@DanChaltiel
Last active March 16, 2022 11:23
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save DanChaltiel/0842f53833e5000479834b5571e37d0e to your computer and use it in GitHub Desktop.
Save DanChaltiel/0842f53833e5000479834b5571e37d0e to your computer and use it in GitHub Desktop.
Read nQuery file
if(!require(tidyverse) || !require(xml2)){
stop("Packages `tidyverse` and `xml2` are required for read_nquery() to work.\n",
'Please run `install.packages(c("tidyverse", "xml2", "devtools"))` before trying again.')
}
#' Read nQuery files
#'
#' @param file the `.nqt` file
#' @param na_col a column on which to filter NA. Most likely "Alpha"
#' @param remove_duplicates should we remove duplicate lines (mostly in Side Tables)?
#'
#' @return a list of `test_name`, `results`, and `sidetable` as an object of class `nQueryTable`
#' @import tidyverse xml2
#'
#' @examples
#' x = read_nquery("my_file.nqt", na_col="Alpha")
#' print(x)
read_nquery = function(file, na_col=NULL, remove_duplicates=TRUE){
xmlfile=read_xml(file)
# htmltidy::xml_view(xmlfile)
x = list()
class(x) = "nQueryTable"
x$test_name = xml_child(xmlfile, "Test") %>% xml_attr("name")
get_results = function(){
rtn = xml_find_first(xmlfile, ".//Data") %>% xml_children() %>% map_dfc(~{
stopifnot(xml_name(.x)=="Row")
.rules = .x %>% xml_child("Parameter") %>% xml_child("Rules") %>% xml_children()
label = .rules %>% xml_attr("name")
message = .rules %>% xml_attr("message")
value = .x %>% xml_child("Cells") %>% xml_children() %>% xml_attr("value") %>%
type.convert(as.is=TRUE)
attr(value, "message") = message
attr(value, "label") = label
# class(value) = c("labelled", class(value))
name = .x %>% xml_child("Parameter") %>% xml_attr("name")
tibble(value) %>% set_names(name)
})
if(!is.null(na_col)){
if(!is.character(na_col)) stop("`na_col` must be a string")
rtn = filter(rtn, !is.na(!!ensym(na_col)))
}
if(remove_duplicates){
rtn = unique(rtn)
}
rtn
}
get_sidetable = function(){
rtn = xml_find_first(xmlfile, ".//SideTables") %>%
xml_children() %>%
map_dfr(function(s_table){
try({
s_name = s_table %>% xml_attr("caption")
s_table %>% xml_child("Rows") %>% xml_children() %>% map_dfc(~{
name = .x %>% xml_child("Parameter") %>% xml_attr("name")
stopifnot(.x %>% xml_child("Cells") %>% length() == 2)
xx = .x %>% xml_child("Cells") %>% xml_children()
value = xml_attr(xx, "value")[xml_attr(xx, "id")=="C1"] %>%
type.convert(as.is=TRUE)
tibble(value) %>% set_names(name)
}) %>% mutate(name=s_name, .before=1)
})
}) %>%
filter(if_all(.fns = ~!is.na(.x)))
if(remove_duplicates){
is_dup = duplicated(select(rtn, -name))
attr(rtn, "duplicates") = sum(is_dup)
rtn = filter(rtn, !is_dup)
}
rtn
}
x$results = try(get_results())
x$sidetable = try(get_sidetable())
x
}
print.nQueryTable = function(x){
cat(glue::glue("-- nQuery file: {x$test_name} -- "), "\n")
print(x$results, row_numbers=FALSE)
cat(glue::glue("-- Side Tables -- (removed {dups} duplicates) \n",
dups=attr(x$sidetable, "duplicates")))
print(x$sidetable, row_numbers=FALSE)
}
@DanChaltiel
Copy link
Author

DanChaltiel commented Jun 1, 2021

The easiest way to use this function is to simply run devtools::source_gist("0842f53833e5000479834b5571e37d0e").

More functionalities are to come if needed.

Tests:

  • working for: STT0
  • not working for: POC6

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment