DanChaltiel/read_nquery.R

## read_nquery.R

if(!require(tidyverse) || !require(xml2)){
  stop("Packages `tidyverse` and `xml2` are required for read_nquery() to work.\n",
       'Please run `install.packages(c("tidyverse", "xml2", "devtools"))` before trying again.')
}


#' Read nQuery files
#'
#' @param file the `.nqt` file
#' @param na_col a column on which to filter NA. Most likely "Alpha"
#' @param remove_duplicates should we remove duplicate lines (mostly in Side Tables)?
#'
#' @return a list of `test_name`, `results`, and `sidetable` as an object of class `nQueryTable`
#' @import tidyverse xml2
#'
#' @examples
#' x = read_nquery("my_file.nqt", na_col="Alpha")
#' print(x)
read_nquery = function(file, na_col=NULL, remove_duplicates=TRUE){

  xmlfile=read_xml(file)
  # htmltidy::xml_view(xmlfile)
  x = list()
  class(x) = "nQueryTable"

  x$test_name = xml_child(xmlfile, "Test") %>% xml_attr("name")

  get_results = function(){
    rtn = xml_find_first(xmlfile, ".//Data") %>% xml_children() %>% map_dfc(~{
      stopifnot(xml_name(.x)=="Row")

      .rules = .x %>% xml_child("Parameter") %>% xml_child("Rules") %>% xml_children()
      label = .rules %>% xml_attr("name")
      message = .rules %>% xml_attr("message")
      value = .x %>% xml_child("Cells") %>% xml_children() %>% xml_attr("value") %>%
        type.convert(as.is=TRUE)
      attr(value, "message") = message
      attr(value, "label") = label
      # class(value) = c("labelled", class(value))

      name = .x %>% xml_child("Parameter") %>% xml_attr("name")
      tibble(value) %>% set_names(name)
    })
    if(!is.null(na_col)){
      if(!is.character(na_col)) stop("`na_col` must be a string")
      rtn = filter(rtn, !is.na(!!ensym(na_col)))
    }
    if(remove_duplicates){
      rtn = unique(rtn)
    }
    rtn
  }

  get_sidetable = function(){
    rtn = xml_find_first(xmlfile, ".//SideTables") %>%
      xml_children() %>%
      map_dfr(function(s_table){
        try({
          s_name = s_table %>% xml_attr("caption")
          s_table %>% xml_child("Rows") %>% xml_children() %>% map_dfc(~{
            name = .x %>% xml_child("Parameter") %>% xml_attr("name")
            stopifnot(.x %>% xml_child("Cells") %>% length() == 2)
            xx = .x %>% xml_child("Cells") %>% xml_children()
            value = xml_attr(xx, "value")[xml_attr(xx, "id")=="C1"] %>%
              type.convert(as.is=TRUE)
            tibble(value) %>% set_names(name)
          }) %>% mutate(name=s_name, .before=1)
        })
      }) %>%
      filter(if_all(.fns = ~!is.na(.x)))

    if(remove_duplicates){
      is_dup = duplicated(select(rtn, -name))
      attr(rtn, "duplicates") = sum(is_dup)
      rtn = filter(rtn, !is_dup)
    }
    rtn
  }

  x$results = try(get_results())
  x$sidetable = try(get_sidetable())

  x
}

print.nQueryTable = function(x){
  cat(glue::glue("-- nQuery file: {x$test_name} -- "), "\n")
  print(x$results, row_numbers=FALSE)
  cat(glue::glue("-- Side Tables -- (removed {dups} duplicates) \n",
                 dups=attr(x$sidetable, "duplicates")))

  print(x$sidetable, row_numbers=FALSE)
}

	if(!require(tidyverse) \|\| !require(xml2)){
	stop("Packages `tidyverse` and `xml2` are required for read_nquery() to work.\n",
	'Please run `install.packages(c("tidyverse", "xml2", "devtools"))` before trying again.')
	}


	#' Read nQuery files
	#'
	#' @param file the `.nqt` file
	#' @param na_col a column on which to filter NA. Most likely "Alpha"
	#' @param remove_duplicates should we remove duplicate lines (mostly in Side Tables)?
	#'
	#' @return a list of `test_name`, `results`, and `sidetable` as an object of class `nQueryTable`
	#' @import tidyverse xml2
	#'
	#' @examples
	#' x = read_nquery("my_file.nqt", na_col="Alpha")
	#' print(x)
	read_nquery = function(file, na_col=NULL, remove_duplicates=TRUE){

	xmlfile=read_xml(file)
	# htmltidy::xml_view(xmlfile)
	x = list()
	class(x) = "nQueryTable"

	x$test_name = xml_child(xmlfile, "Test") %>% xml_attr("name")

	get_results = function(){
	rtn = xml_find_first(xmlfile, ".//Data") %>% xml_children() %>% map_dfc(~{
	stopifnot(xml_name(.x)=="Row")

	.rules = .x %>% xml_child("Parameter") %>% xml_child("Rules") %>% xml_children()
	label = .rules %>% xml_attr("name")
	message = .rules %>% xml_attr("message")
	value = .x %>% xml_child("Cells") %>% xml_children() %>% xml_attr("value") %>%
	type.convert(as.is=TRUE)
	attr(value, "message") = message
	attr(value, "label") = label
	# class(value) = c("labelled", class(value))

	name = .x %>% xml_child("Parameter") %>% xml_attr("name")
	tibble(value) %>% set_names(name)
	})
	if(!is.null(na_col)){
	if(!is.character(na_col)) stop("`na_col` must be a string")
	rtn = filter(rtn, !is.na(!!ensym(na_col)))
	}
	if(remove_duplicates){
	rtn = unique(rtn)
	}
	rtn
	}

	get_sidetable = function(){
	rtn = xml_find_first(xmlfile, ".//SideTables") %>%
	xml_children() %>%
	map_dfr(function(s_table){
	try({
	s_name = s_table %>% xml_attr("caption")
	s_table %>% xml_child("Rows") %>% xml_children() %>% map_dfc(~{
	name = .x %>% xml_child("Parameter") %>% xml_attr("name")
	stopifnot(.x %>% xml_child("Cells") %>% length() == 2)
	xx = .x %>% xml_child("Cells") %>% xml_children()
	value = xml_attr(xx, "value")[xml_attr(xx, "id")=="C1"] %>%
	type.convert(as.is=TRUE)
	tibble(value) %>% set_names(name)
	}) %>% mutate(name=s_name, .before=1)
	})
	}) %>%
	filter(if_all(.fns = ~!is.na(.x)))

	if(remove_duplicates){
	is_dup = duplicated(select(rtn, -name))
	attr(rtn, "duplicates") = sum(is_dup)
	rtn = filter(rtn, !is_dup)
	}
	rtn
	}

	x$results = try(get_results())
	x$sidetable = try(get_sidetable())

	x
	}

	print.nQueryTable = function(x){
	cat(glue::glue("-- nQuery file: {x$test_name} -- "), "\n")
	print(x$results, row_numbers=FALSE)
	cat(glue::glue("-- Side Tables -- (removed {dups} duplicates) \n",
	dups=attr(x$sidetable, "duplicates")))

	print(x$sidetable, row_numbers=FALSE)
	}