IdoBar/util.R

## util.R
# util.R:
# Utilities to make R a happier place
# Originally created by Brendan O'Connor, brenocon.com/code
# Edited by Ido Bar.
# Source the file directly from this gist using `devtools::source_gist("7f63547158ecdbacf31b54a58af0d1cc", filename = "util.R")`


########################################
## Put everything into an environment, to not pollute global namespace

util = new.env()

########################################
## Better I/O routines
util$repath <- function(x, to=.Platform$OS.type) {
   xa <- ifelse(to=="windows", gsub('/', '\\\\', x), gsub('\\\\', '/', x))
  return(xa)
 }
# search file recursively in a path
util$search.path <- function(fn,
     paths = strsplit(chartr("\\", "/", Sys.getenv("RPATH")), split =
                switch(.Platform$OS.type, windows = ";", ":"))[[1]]) {
  for(d in paths)
     if (file.exists(f <- file.path(d, fn)))
        return(f)
  return(NULL)
}
# Find recent file in a folder (based on pattern to search)
# Usage:  recent_file("./data/intermediate_files", glue::glue(".*{stacks_name}.+\\.clean.vcf"))
util$recent_file <- function(filedir, pattern, sort_by="modified"){
  files <- file.info(dir(filedir, pattern, full.names = TRUE))
  files$filename <- rownames(files)
  # files[[paste0(substr(sort_by,1,1), "time")]] <- as.POSIXct(files[[paste0(substr(sort_by,1,1), "time")]])
  files[order(files[[paste0(substr(sort_by,1,1), "time")]] , decreasing = TRUE),"filename"][1]
}


# deprecated in favour of readr::read_tsv() function from tidyverse
# util$read.tsv <- function(..., header=F, sep='\t', quote='', comment='', na.strings='', stringsAsFactors=FALSE) {
  # read.table() wrapper with default settings for no-nonsense, pure TSV
  # Typical use case is output from another program.
  # (R's defaults are more geared for human-readable datafiles, which is less
  # feasible for large-scale data anyway.)
  # These options are substantially faster than read.table() defaults.
  #   (see e.g. LINK)
  # stringsAsFactors is the devil.

#   args = list(...)
 # args$header = headp
#  if (!is.null(args$col.names)) {
    # read.delim() is not smart about this.  Yikes.
#    args$header = FALSE
#  }
#  args$sep = sep
#  args$quote = quote
#  args$comment = comment
#  args$stringsAsFactors = stringsAsFactors
#  args$na.strings = na.strings
#  do.call(read.delim, args)
# }
# deprecated in favour of readr::write_tsv() function from tidyverse
# util$write.tsv <- function(..., header=NA, col.names=F, row.names=F, sep='\t', na='', quote=F) {
  # 'header' to 'col.names' naming consistency with read.table()
#  if (is.finite(header)) col.names = header
#  write.table(..., col.names=col.names, row.names=row.names, sep=sep, na=na, quote=quote)
# }

# write fasta files
util$write_fasta <- function(id_seq_table, line_width=60, filename){
  file.create(filename)
  apply(id_seq_table, 1, function(entry) cat(sprintf(">%s\n%s\n\n", entry[1],
                gsub(sprintf("([GTAC]{%i})",line_width), "\\1\n", entry[2])),
                                             file = filename, append = TRUE))
  if (file.exists(filename)) return(TRUE)
  else {
     message(sprintf("Unable to create the file %s, please check that the path exists and is accessible", filename))
     return(FALSE)
        }

}

# improved implementation of xlsx::write.xlsx (allows overwrite of sheets and return the data as tibble, allowing it to be part of tidyverse pipes)
util$write_xlsx <- function(df, excel_file, sheet="Sheet1", overwritesheet=FALSE, overwritefile = TRUE,
                             replacefile = FALSE, asTable=TRUE, return.tibble=TRUE,...){
  if (!'openxlsx' %in% row.names(installed.packages())) stop("Package 'openxlsx' is required and not available, please install it and try again")
  df <- as.data.frame(df)
  if (!file.exists(excel_file) | isTRUE(replacefile)) {
    unlink(excel_file)
    openxlsx::write.xlsx(df, excel_file, sheetName = sheet, rowNames=FALSE, asTable=asTable, ...)
    if (isTRUE(return.tibble) & ('tibble' %in% row.names(installed.packages()))) df <- tibble::as_tibble(df)
    return(invisible(df))
  }
  if (!isTRUE(overwritefile)) stop(sprintf("File %s exists and 'overwritefile' set to FALSE.", excel_file))
  wb <- openxlsx::loadWorkbook(excel_file)
  sheets <- openxlsx::getSheetNames(excel_file)
  if (sheet %in% sheets){
    if (overwritesheet==FALSE) stop(sprintf("Sheet %s exists in file and 'overwritesheet' set to FALSE, please try again with a new sheet name or with 'overwritesheet=TRUE' option", sheet))
    openxlsx::removeWorksheet(wb, sheet)
  }
  openxlsx::addWorksheet(wb, sheet)
  if (asTable==TRUE){
   openxlsx::writeDataTable(wb, sheet, df)
  } else {
   openxlsx::writeData(wb, sheet, df)
  }
  openxlsx::saveWorkbook(wb, excel_file, overwrite = overwritefile)
  # openxlsx::write.xlsx(df, excel_file, sheetName = sheet, row.names=FALSE, ...)
  if (isTRUE(return.tibble) & ('tibble' %in% row.names(installed.packages()))) df <- tibble::as_tibble(df)
  return(invisible(df))
}

# extract hyperlinks from excel sheets (see https://stackoverflow.com/a/70676505/5346827)
util$hyperlinks_from_excel <- function(aExcelFile, ExcelSheet="all",
                                  aRefOutputFile = NULL){
  req_pkgs <- c("XML", "purrr", "dplyr", "readxl")
  if (!'rlang' %in% row.names(installed.packages())) stop("'rlang' package is not installed, please install it and try again")
 #  if (!"rlang" %in% installed.packages() require(XML)) stop("XML package is not installed")
  rlang::check_installed(req_pkgs,
                         reason = "Package XML is required to extract the urls from the sheets, while purrr and dplyr are needed to process the results")
  # ExcelSheet="submission_info"
 #  aExcelFile="sample_info/AGRF_NGS_yield.xlsx"
  sheets <- readxl::excel_sheets(aExcelFile)

  read_relationships <- function(aSheetIndex, tmp_dir){
    # tmp_dir= tmp_base_dir
    # aSheetIndex = 2
    filename <- file.path(tmp_dir, 'xl', 'worksheets', '_rels', paste0('sheet', aSheetIndex, '.xml.rels'))
    rel <- XML::xmlParse(filename) %>% XML::xmlToList() %>% purrr::map_dfr(as.list) %>%
      dplyr::filter(grepl("hyperlink$", Type)) %>%
      dplyr::select(id=Id, target=Target) #%>% setNames()

    if(nrow(rel) == 0){
      return(NULL)
    }

    filename <- file.path(tmp_dir, 'xl', 'worksheets', paste0('sheet', aSheetIndex, '.xml'))
    pos <- XML::xmlParse(filename) %>% XML::xmlToList()

    if(is.null(pos$hyperlinks)){
      return(NULL)
    }

    ret <- purrr::map_dfr(pos$hyperlinks, as.list) %>%
      # dplyr::select(-uid) %>%
      dplyr::inner_join(rel, by = 'id') %>%
      dplyr::mutate(tab_idx=aSheetIndex)

    link_text <- ret %>% dplyr::select(text=display, target) %>%
      dplyr::filter(!is.na(text)) %>%
      dplyr::distinct()

    return(ret %>% left_join(link_text))
  }

  EXCEL_TEMP_NAME <- 'unzipped_excel'

  tmp_base_dir <- file.path(tempdir(),
                            paste0('tmpexcl',
                                   as.character(round(runif(1, 1000000000000, 9999999999999)))))
  dir.create(tmp_base_dir)
  on.exit(unlink(tmp_base_dir))

  zipfile <- file.path(tmp_base_dir, paste0(EXCEL_TEMP_NAME, '.zip'))
  file.copy(from = aExcelFile, to = zipfile)
  unzip(zipfile, exdir = tmp_base_dir)

  sheet_idx <- NULL
  # ExcelSheet=sheets[1]
  if (is.numeric(ExcelSheet) & all(ExcelSheet %in% seq_along(sheets))) {
    sheet_idx <- ExcelSheet
  }  else if (is.character(ExcelSheet) & all(ExcelSheet %in% sheets)) {
    sheet_idx <- which(sheets %in% ExcelSheet)
  } else if (tolower(ExcelSheet)=="all") {
    sheet_idx <- seq_along(sheets)
  }


  if (length(sheet_idx)==0) stop("Sheets provided to 'ExcelSheet' argument not present in the Excel file")

  res <- purrr::map_dfr(sheet_idx, ~read_relationships(.x, tmp_base_dir)) %>%
    dplyr::mutate(sheet = sheets[tab_idx]) %>%
    dplyr::mutate(ref = paste0("'", sheet, "'!", ref)) %>%
    dplyr::select(id,tab_idx, sheet, ref, target, text)


  if(!is.null(aRefOutputFile)){
    write_csv(res, aRefOutputFile)
  }

  return(res)
}


########################################
##  Misc small routines
  # calculates standard error
util$se <- function(x, na.rm=TRUE) {
   if(na.rm) return(sqrt(var(x, na.rm=TRUE)/length(x[!is.na(x)]))) else return(sqrt(var(x)/length(x)))
}
# calculate geometric mean (see https://stackoverflow.com/a/25555105/5346827)
util$gm_mean = function(x, na.rm=TRUE){
  exp(sum(log(x[x > 0]), na.rm=na.rm) / length(x))
}

util$as.c <- as.character
# split vector into chunks (see https://stackoverflow.com/questions/3318333/split-a-vector-into-chunks)
util$chunk <- function(x, n) (mapply(function(a, b) (x[a:b]), seq.int(from=1, to=length(x), by=n), pmin(seq.int(from=1, to=length(x), by=n)+(n-1), length(x)), SIMPLIFY=FALSE))
util$chunk2 <- function(x,n) split(x, cut(seq_along(x), n, labels = FALSE))

util$unwhich <- function(indices, len=length(indices)) {
  # reverse of which(): from indices to boolean mask.
  ret = rep(F,len)
  ret[indices] = T
  ret
}

util$nna <- function(...) !is.na(...)   # i type this a lot, i think its worth 3 characters + shift key

util$shuffle <- function(...) UseMethod("shuffle")

util$shuffle.default <- function(x)  x[order(runif(length(x)))]

util$shuffle.data.frame <- function(x)  x[order(runif(nrow(x))),]

util$sample_df <- function(d, size=10, ...)  {
  samp = sample(1:nrow(d), size=size, ...)
  d[samp,]
}

# split a dataframe into subsets by size
util$split_df <- function(df, group_size){
  split(df, gl(ceiling(nrow(df)/group_size), group_size, nrow(df)))
}

util$present_levels <- function(x) intersect(levels(x), x)

util$trim_levels <- function(...) UseMethod("trim_levels")

util$trim_levels.factor <- function(x)  factor(x, levels=present_levels(x))

util$trim_levels.data.frame <- function(x) {
  for (n in names(x))
    if (is.factor(x[,n]))
      x[,n] = trim_levels(x[,n])
  x
}

util$unfactor <- function(df){
  id <- sapply(df, is.factor)
  df[id] <- lapply(df[id], as.character)
  df
}

# grep() returns indices of matches.  Variants:

util$bgrep <- function(pat,x, ...) {
  # "boolean" grep: return a logical vector ready for vector ops
  # like & |  and others
  unwhich(grep(pat,x,...), length(x))
}

util$ngrep <- function(pat,x, ...)
  # "normal" grep: return values, not indices
  x[grep(pat,x,...)]


########################################
##  Other data manipulation routines
util$tidy_transpose <- function(df) {
   df %>% t() %>%   # Tranpose, but function is for matrices. Return Matrix
   as.data.frame() %>%   #Force to be dataframe
   tibble::rownames_to_column(var = "rowname") %>%  #Resave first column from rownames
   janitor::row_to_names(row_number = 1) #Resave column headers from first row.
}

util$tapply2 <- function(x, ...) {
  # like tapply but preserves factors
  if (is.factor(x)) {
    r = factor(tapply(as.character(x), ...), levels=levels(x))
  } else {
    r = tapply(x, ...)
  }
  r
}

util$inject <- function(collection, start, fn) {
  # like lisp reduce.  (named after ruby)
  acc = start
  for (x in collection)
    acc = fn(acc, x)
  acc
}


##### Bioinformatics Annotation #######
  # Shortens and "prettify" long BLAST descriptions
util$pretty_term <- function(term, prefix="Predicted:", comma=FALSE, dot=FALSE, paren=FALSE, cap=TRUE, firstCapOnly=FALSE,
                             spDot=TRUE, charNum=0, showTrimmed=FALSE) {
  term <- sapply(term, function (s) {
    s_ <- s
    if (is.character(prefix) && length(prefix)>0) s_ <- sub(sprintf("^%s\\s*",prefix) ,"", s_, ignore.case = TRUE, perl=TRUE)
    if (cap) s_ <- sub("(^.)", "\\U\\1", s_, perl=TRUE)
    if (firstCapOnly) s_ <- paste(c(unlist(strsplit(s_, " ", fixed = TRUE))[1], sapply(unlist(strsplit(s_, " ", fixed = TRUE))[-1], function(n) sub("(^.)", "\\L\\1", n, perl=TRUE), USE.NAMES = FALSE)), collapse = " ")
    if (spDot) s_ <- sub("sp\\. (.+?)\\.(.+)", "sp. \\1_\\2", s_, perl=TRUE)
    if (paren) s_ <- sub(" \\(.+\\)", "",  s_)
    if (dot) s_ <- unlist(strsplit(s_, ".", fixed = TRUE))[1]
    if (comma) s_ <- unlist(strsplit(s_, ",", fixed = TRUE))[1]
    # Trim to the end of the word if character number limit is reached and remove any trailing commas or dots
    if (charNum>0) s_ <- sub("[,|\\.]$", "", sub(sprintf("(^.{%s}[\\S]*)[ ]*.*", charNum), "\\1", s_, perl=TRUE), perl = TRUE)
    # Add a ^ at the end if the term was trimmed anyhow
    if (nchar(s_)<nchar(s) && !grepl("\\*$", s_) && showTrimmed) return(paste0(s_, "^"))
    else return(s_)
  }, USE.NAMES = FALSE)
  return(term)
}

# Find unique Tax_name
util$find_uniq_tax <- function(desc, taxName, prettify=FALSE) {
  if (is.na(taxName) || taxName=="N/A")  uniqTax <- ifelse((is.na(desc) || !grepl(".+\\[(.*)\\]", desc)), NA, sub(".+\\[(.*)\\]","\\1", desc))
  else {
    if (grepl(";", taxName)) {
      taxList <- unlist(strsplit(taxName, ";"))
      taxDesc <- sub(".+\\[(.*)\\]","\\1", desc)
      uniqTax <- ifelse(taxDesc %in% taxList, taxDesc, taxList[length(taxList)])

    } else uniqTax <- taxName
  }
  if (prettify) uniqTax <- pretty_term(uniqTax, prefix="Undetermined", comma=FALSE, dot=FALSE, paren=FALSE, cap=TRUE, spDot=TRUE, charNum=0, showTrimmed=FALSE)
  return(uniqTax)
}

# Resolve scientific name (with taxize package)
util$resolve_scientific <- function(tax_names) {
  tax_count=0
  resolved <- lapply(tax_names, function(x) {
    tax_count <<- tax_count + 1
    perc <- tax_count/length(tax_names)*100
    message(sprintf("Processing %s (%.1f%% completed)", x, perc))
    res_name <- try(taxize::gnr_resolve(names = x, data_source_ids=4, canonical = TRUE,best_match_only=FALSE ), TRUE)
    if (!inherits(res_name, "try-error")) return(res_name$results[which.max(res_name$results$score),])
    else {
      message(sprintf("Warning: Could not process %s correctly, returning NAs", x))
      return(data.frame(submitted_name=x,matched_name=NA,data_source_title=NA,score=NA))
    }
  } )
  return(resolved)
}

# Define a function that accepts a matrix of DE contrasts and DESeq data to analyse each contrast
util$get_deseq2_res <- function(dds_deseq2, mat_col, group_col = "sample_group") {
  contrast <- c(group_col,rev(mat_col))
  comparison <- paste0(rev(mat_col), collapse="_vs_")
  res <- DESeq2::results(dds_deseq2, contrast=contrast, tidy = TRUE) %>% as_tibble() %>%
    dplyr::rename(gene_id=row) %>%
    dplyr::mutate(contrast=comparison, analysis_date=analysis_date) %>%
    dplyr::filter(!is.na(padj)) %>% dplyr::arrange(padj, desc(abs(log2FoldChange)))
  return(res)
}

# Modify plotPCA to return all 4 PCs

util$plotPCA4 <- function (dds_deseq2, intgroup = "condition", ntop = 500, PCs=3:4, returnData = FALSE){
  rv <- genefilter::rowVars(SummarizedExperiment::assay(dds_deseq2))
  select <- order(rv, decreasing = TRUE)[seq_len(min(ntop,
                                                     length(rv)))]
  pca <- prcomp(t(SummarizedExperiment::assay(dds_deseq2)[select, ]))
  percentVar <- pca$sdev^2/sum(pca$sdev^2)
  if (!all(intgroup %in% names(SummarizedExperiment::colData(dds_deseq2)))) {
    stop("the argument 'intgroup' should specify columns of colData(dds)")
  }
  intgroup.df <- as.data.frame(SummarizedExperiment::colData(dds_deseq2)[, intgroup,
                                               drop = FALSE])
  group <- if (length(intgroup) > 1) {
    factor(apply(intgroup.df, 1, paste, collapse = " : "))
  }
  else {
    SummarizedExperiment::colData(dds_deseq2)[[intgroup]]
  }
  d <- data.frame(PC1 = pca$x[, 1], PC2 = pca$x[, 2], PC3 = pca$x[, 3], PC4 = pca$x[, 4], group = group,
                  intgroup.df, name = colnames(dds_deseq2))
  if (returnData) {
    attr(d, "percentVar") <- percentVar[1:4]
    return(d)
  }
  ggplot2::ggplot(data = d, aes_string(x = paste0("PC", PCs[1]), y = paste0("PC", PCs[2]), color = "group")) +
   ggplot2::geom_point(size = 3) +
    ggplot2::labs(x = sprintf("PC%d: %.2f%% variance", PCs[1], percentVar[PCs[1]]*100),
         y = sprintf("PC%d: %.2f%% variance", PCs[2], percentVar[PCs[2]]*100)) + ggplot2::coord_fixed()
   # + ylab(paste0("PC4: ", round(percentVar[2] * 100), "% variance"))
}

########################################
## Printing, viewing
## see also:  str()

## Transparent colors
## Mark Gardener 2015
## www.dataanalytics.org.uk

util$t_col <- function(color, percent = 50, name = NULL) {
#	  color = color name, percent = % transparency, name = an optional name for the color
## Get RGB values for named color
  rgb.val <- col2rgb(color)
## Make new color using input color as base and alpha set by transparency
  t.col <- rgb(rgb.val[1], rgb.val[2], rgb.val[3],
               max = 255,
               alpha = (100-percent)*255/100,
               names = name)
## Save the color
  invisible(t.col)
  }

util$printf <- function(...) cat(sprintf(...))

util$listprint <- function(x) {
  s = paste(sapply(names(x), function(n)  sprintf("%s=%s", n,x[[n]])), collapse=' ')
  printf("%s\n", s)
}

util$msg <- function(...)  cat(..., "\n", file=stderr())

util$LogMsg <- function(text, dateformat="%d-%m-%Y %I:%M:%S",  ...) {
   if (is.null(dateformat) | (dateformat==FALSE)) return(cat(paste0(text, "\n")))
  msg <- sprintf(paste0(format(Sys.time(), dateformat), ": ", text, "\n"), ...)
  cat(msg)
}

util$h = utils::head

util$ppy <- function(x, column.major=FALSE, ...) {
  # pretty-print as yaml.  intended for rows with big textual cells.
  # a la mysql's \G operator
  library(yaml)
  cat(as.yaml(x, column.major=column.major), ...)
  cat("\n", ...)
}

util$table_html = function(...) {
  # Intended for inside dosink()
  columns = list(...)
  ncol = length(columns)
  nrow = length(columns[[1]])
  # assume columns are in parallel
  printf("\n<table cellpadding=3 border=1 cellspacing=0 bordercolor=gray>")
  for (i in 1:nrow) {
    printf("\n<tr>")
    for (j in 1:ncol)
      printf("\n  <td>%s", columns[[j]][i])
  }
  printf("\n</table>\n")
}


########################################
##  Workspace management
# Fix devtools
util$.fixdevtools <- function() {
   dev_reqs <- c("httr", "devtools")
   install.deps(dev_reqs)
   httr::set_config( httr::config( ssl_verifypeer = 0L ) )
   options(buildtools.check = function(action) TRUE )
   }
# Silently load a package, can change mode to "require"
util$silent.load <- function(a.package, mode="library"){
   eval(parse(text=sprintf("suppressWarnings(suppressPackageStartupMessages(
    %s(a.package, character.only=TRUE)))", mode)))
}

# install and load packages from CRAN, Bioconductor or git, see usage below
util$install.deps <- function(p, repo="cran", reposite=getOption("repos"), silent=FALSE, force=FALSE){
  # repos=getOption("repos")
  if (is.null(reposite)) reposite <- "http://cloud.r-project.org/"

  call_install <- switch(repo,
              cran=sprintf("install.packages(package, repos=\"%s\"", reposite),
                         bioc="biocLite(package, suppressUpdates=TRUE, ask=FALSE",
                         git="install_github(package")

  if (repo=="git") .fixdevtools()
  if (repo=="bioc" & !all(p %in% utils::installed.packages())) {
    #source("http://bioconductor.org/biocLite.R")

    if (!"RCurl" %in% utils::installed.packages()) {
      install.packages("RCurl", repos=reposite)

    }

    eval(parse(text = RCurl::getURL("http://bioconductor.org/biocLite.R",
                             ssl.verifypeer=FALSE)))
    biocLite(suppressUpdates=TRUE, ask=FALSE)
  }
  for (package in p) {
    gitNames <- unlist(strsplit(package, "/"))
    pName <- gitNames[length(gitNames)]
    if (!pName %in% utils::installed.packages() | isTRUE(force)) {
      cat(sprintf("Please wait, installing and loading missing package %s and its dependencies from %s.\n",
                  package, repo), file=stderr())

      suppressWarnings(eval(parse(text = sprintf("%s, quiet = TRUE, lib=.libPaths()[1])",call_install))))
      if (!pName %in% utils::installed.packages()) {
        ifelse(!interactive(),q("no", 2, TRUE),
               stop(sprintf("Unable to install missing package %s from %s.\n",
                            package, repo), call. = FALSE))
      }
    }
    if (isTRUE(silent)) silent.load(pName, mode="require")
      else require(pName, character.only=T, quietly=TRUE, warn.conflicts=TRUE)
  }
}

# CRAN_packages <- c("RCurl", "dplyr", "tidyr", "ggplot2", "RColorBrewer", "RSQLite")
  # install.deps(CRAN_packages)


# improved list of objects
# http://stackoverflow.com/questions/1358003/tricks-to-manage-the-available-memory-in-an-r-session
util$list_objects = function (pos = 1, pattern) {
    napply = function(names, fn) sapply(names, function(x)
                                         fn(get(x, pos = pos)))
    names = ls(pos = pos, pattern = pattern)
    N = length(names)
    obj_class = napply(names, function(x) as.character(class(x))[1])
    obj_mode = napply(names, mode)
    obj_type = ifelse(is.na(obj_class), obj_mode, obj_class)
    obj_prettysize = napply(names, function(x) {
                           capture.output(print(object.size(x), units = "auto")) })
    obj_size = napply(names, object.size)
    obj_prettysize[obj_size < 1e6] = ""

    obj_length = napply(names, function(x) length(x))
    obj_dim = t(napply(names, function(x)
                        as.numeric(dim(x))[1:2]))

    is_flat = is.na(obj_dim)[, 1]
    is_vector = napply(names, function(x) is.vector(x) & class(x) != 'list')


    info_width = max(20, options('width')$width - 60)

    small_str = function(x) {
      out = capture.output(
        str(x, max.level=0, give.attr=F, give.head=F, width=info_width, strict.width='cut')
      )
      out = str_c(out,collapse=' ')
      out = cutoff(str_replace(out,"\n"," "))
      if (str_detect(out, "^List of"))
        out = str_c("[Names] $ ", str_c(names(x),collapse=' '))
      cutoff(out)
    }

    cutoff = function(s) {
      if (str_length(s) >= info_width) {
        str_c(str_sub(s,1,info_width-2),'..')
      } else {
        s
      }
    }

    pad = function(s) sprintf(" %s", s)

    out <- data.frame(
      Type = obj_type,
      Size = obj_prettysize,
      Dim = ifelse(is_vector | is_flat, obj_length,
        sprintf("(%s, %s)", obj_dim[,1], obj_dim[,2])),
      Value = napply(names, function(x)
        if (class(x) %in% c('data.frame','list') && !is.null(names(x)))
          cutoff(str_c("[Names] $ ",str_c(names(x), collapse=' ')))
        else small_str(x)
        ),
      stringsAsFactors=F)
    row.names(out) = names
    out$Dim = sprintf(" %s", out$Dim)
    out$Value = sprintf(str_c(" %-", info_width, "s"), out$Value)

    out = rbind(subset(out, Type!='function'), subset(out, Type=='function'))
    out
}

util$lsos = function() {
  d = list_objects()
  d$name = row.names(d)
  d = subset(d, name != 'util')
  row.names(d)=d$name
  d$name=NULL
  d
}


########################################
## For performance optimization and long-running jobs

util$timeit <- function(expr, name=NULL) {
  # print how long the expression takes, and return its value too.
  # So you can interpose   timeit({ blabla })   around any chunk of code "blabla".
  start = Sys.time()
  ret = eval(expr)
  finish = Sys.time()
  if (!is.null(name)) cat(name,": ")
  print(finish-start)
  invisible(ret)
}

util$dotprogress <- function(callback, interval=10) {
  # intended to wrap the anonymous callback for sapply() or somesuch.
  # ALTERNATIVE: plyr *ply(.progress='text')
  count = 0
  return(function(...) {
    if ((count <<- count+1) %% interval == 0)
      cat(".")
    callback(...)
  })
}


########################################
##  External programs for interactivity


########################################
## Graphics output wrappers
## For easy one-liners, like:
## dopdf("tmp.pdf",width=5,height=5,cmd=plot(x,y))

util$dopdf <- function(filename,..., cmd) {
  pdf(filename, ...)
  eval(cmd)
  dev.off()
  if (exists('OPEN') && OPEN)
    system(sprintf("open %s", filename))
}

util$dopng <- function(filename,..., cmd) {
  png(filename, ...)
  eval(cmd)
  dev.off()
  if ((exists('OPEN') && OPEN))
    system(sprintf("open %s", filename))
}

util$dosink <- function(filename,cmd, open=NULL) {
  # like capture.output() but follows open/OPEN conventions here
  sink(filename)
  eval(cmd)
  sink(NULL)
  if (prio_check(open, exists('OPEN') && OPEN))
    system(sprintf("open %s", filename))
}

util$dosvg <- function(filename, ..., cmd, open=NULL) {
  require("RSvgDevice")
  devSVG(filename, ...)
  eval(cmd)
  dev.off()
  if (prio_check(open, exists('OPEN') && OPEN))
    system(sprintf("open %s", filename))
}

# A default plotting theme
util$plot_theme <-  function(def_theme="grey", baseSize=22, strip_fill="lightskyblue"){
  require("ggplot2")
  my_theme <- eval(parse(text = sprintf("theme_%s(base_size=%d)", def_theme,baseSize)))  +
    theme(axis.title.y=element_text(face="bold", vjust = 1.5, size=rel(0.8)),
          axis.title.x=element_text(face="bold", vjust = 0.1, size=rel(0.8)),
          legend.title=element_text(size=rel(0.85), hjust=0.5),
          legend.text=element_text(size = rel(0.75),lineheight = 1.5),
          axis.text.y=element_text(size=rel(0.85)),
          axis.text.x=element_text(size=rel(0.85)),
          #panel.grid.minor=element_blank(),
          strip.text=element_text(size=rel(0.75)),
          strip.background=element_rect(fill = strip_fill))
  return(my_theme)
}

# plot correlation matrix (https://statsandr.com/blog/correlation-coefficient-and-correlation-test-in-r/)
# do not edit
util$corrplot2 <- function(data,
                      method = "pearson",
                      sig.level = 0.05,
                      order = "original",
                      diag = FALSE,
                      type = "upper",
                      tl.srt = 90,
                      number.font = 1,
                      number.cex = 1,
                      mar = c(0, 0, 0, 0)) {
  require("corrplot")
  data_incomplete <- data
  data <- data[complete.cases(data), ]
  mat <- cor(data, method = method)
  cor.mtest <- function(mat, method) {
    mat <- as.matrix(mat)
    n <- ncol(mat)
    p.mat <- matrix(NA, n, n)
    diag(p.mat) <- 0
    for (i in 1:(n - 1)) {
      for (j in (i + 1):n) {
        tmp <- cor.test(mat[, i], mat[, j], method = method)
        p.mat[i, j] <- p.mat[j, i] <- tmp$p.value
      }
    }
    colnames(p.mat) <- rownames(p.mat) <- colnames(mat)
    p.mat
  }
  p.mat <- cor.mtest(data, method = method)
  col <- colorRampPalette(c("#BB4444", "#EE9988", "#FFFFFF", "#77AADD", "#4477AA"))
  corrplot(mat,
    method = "color", col = col(200), number.font = number.font,
    mar = mar, number.cex = number.cex,
    type = type, order = order,
    addCoef.col = "black", # add correlation coefficient
    tl.col = "black", tl.srt = tl.srt, # rotation of text labels
    # combine with significance level
    p.mat = p.mat, sig.level = sig.level, insig = "blank",
    # hide correlation coefficients on the diagonal
    diag = diag
  )
}

# This function makes a pretty ggplot theme (https://ourcodingclub.github.io/tutorials/dataviz-beautification-synthesis/#distributions)
# This function takes no arguments
# meaning that you always have just niwot_theme() and not niwot_theme(something else here)

util$theme_niwot <- function(baseSize=16){
  theme_bw(base_size=baseSize) +
    theme(text = element_text(family = "Helvetica Light"),
          axis.text = element_text(size = rel(0.85)),
          # axis.title = element_text(size = 18),
          axis.line.x = element_line(color="black"),
          axis.line.y = element_line(color="black"),
          panel.border = element_blank(),
          panel.grid.major.x = element_blank(),
          panel.grid.minor.x = element_blank(),
          panel.grid.minor.y = element_blank(),
          panel.grid.major.y = element_blank(),
          plot.margin = unit(c(1, 1, 1, 1), units = , "cm"),
          plot.title = element_text(vjust = 1, hjust = 0),
          legend.text = element_text(size = rel(0.85)),
          legend.title = element_blank(),
          legend.position = c(0.95, 0.15),
          legend.key = element_blank(),
          legend.background = element_rect(color = "black",
                                           fill = "transparent",
                                           size = 2, linetype = "blank"))
}
# Create a filename with folder and current date/time before the extension
util$filedate <- function(filename, ext="pdf", outdir=".", dateformat="%d_%m_%Y"){
  if (outdir!="." & !file.exists(outdir)) dir.create(outdir, recursive = TRUE)
  ext <- sub("\\.*(.+)", "\\1", ext)
  if (is.null(dateformat)) return(file.path(outdir, sprintf("%s.%s", filename, ext)))
  if (dateformat==FALSE) return(file.path(outdir, sprintf("%s.%s", filename, ext)))
  return(file.path(outdir, sprintf("%s_%s.%s", filename, format(Sys.time(), dateformat), ext)))
}

########################################
## Plotting routines

# Create nice log axis breaks (see package "scales")
util$log_breaks <- function(n = 10, minor=TRUE, major=TRUE, pretty=FALSE){
  function(x) {
    b <- axisTicks(log10(range(x, na.rm = TRUE)), log = TRUE, n = n)
    if (!minor) b <- b[grepl("1", prettyNum(b))]
    if (!major) b <- b[grepl("5", prettyNum(b))]
    if (pretty) b <- prettyNum(b)
    return(b)
  }
}
# Create log axis labels
util$base_breaks <- function(n = 10){
  function(x) {
    axisTicks(log10(range(x, na.rm = TRUE)), log = TRUE, n = n)
  }
}
# A function factory for getting integer y-axis values.
util$integer_breaks <- function(n = 5, ...) {
  fxn <- function(x) {
    breaks <- floor(pretty(x, n, ...))
    names(breaks) <- attr(breaks, "labels")
    breaks
  }
  return(fxn)
}
util$linelight <- function(x,y, lty='dashed', col='lightgray', ...) {
  # highlight a point with lines running to the axes.
  left = par('usr')[1]
  bot = par('usr')[3]
  segments(left,y, x,y, lty=lty, col=col, ...)
  segments(x,bot,  x,y, lty=lty, col=col, ...)
}

util$hintonplot <- function(mat, max_value=max(abs(mat)), mid_value=0, ...) {
  # Plots a matrix as colored, size-varying boxes
  # I dunno who started calling this a "Hinton plot", but anyways

  # Example:
  # hintonplot(matrix(rnorm(100),10))

  # Example, for counts:
  # table(cyl=mtcars$cyl, mpg=cut(mtcars$mpg,3))
  #    mpg
  # cyl (10.4,18.2] (18.2,26.1] (26.1,33.9]
  #   4           0           6           5
  #   6           2           5           0
  #   8          12           2           0
  # hintonplot(table(cyl=mtcars$cyl, mpg=cut(mtcars$mpg,3)))

  plot.new()
  plot.window(xlim=c(0.5,ncol(mat)+0.5), ylim=c(0.5,nrow(mat)+0.5))

  x_mid = 1:ncol(mat)
  y_mid = 1:nrow(mat)

  area = abs(mat) / max_value
  side = sqrt(area)

  for (x in 1:ncol(mat)) {
    for (y in nrow(mat):1) {
      # ym = (nrow(mat):1)[y]
      ym = y
      d = side[ym,x] / 2
      rect(x-d, y-d, x+d, y+d, col=if (mat[ym,x]>0) 'darkblue' else 'darkred')
    }
  }

  axis(1, 1:ncol(mat), labels=colnames(mat))
  # axis(2, nrow(mat):1, labels=row.names(mat))
  axis(2, 1:nrow(mat), labels=row.names(mat))
  title(xlab=names(dimnames(mat))[2], ylab=names(dimnames(mat))[1], ...)
}


########################################
## Rmarkdown functions
# make captions for DT tables
util$DT_caption <- function(ref, caption, tab_width="100%"){
  # Do not use underscores or spaces in references!
  if (grepl("[_ ]", ref)) stop("Do not use underscores (_) or spaces ( ) in figure or table reference!")
  cat(sprintf("<table width=%s>",tab_width) ,
      sprintf("<caption>(#tab:%s)%s</caption>", ref, caption),"</table>",
      sep ="\n")
}


########################################
## Has to be last in file

while("util" %in% search())
  detach("util")
attach(util)