Skip to content

Instantly share code, notes, and snippets.

@leeper
Last active August 29, 2015 14:01
Show Gist options
  • Save leeper/8d4478b71011ed08db92 to your computer and use it in GitHub Desktop.
Save leeper/8d4478b71011ed08db92 to your computer and use it in GitHub Desktop.
Outline of a more convenient i/o function for R
# call package easyio
library('foreign')
library('tools')
# library('haven') # for spss (sav, por), stata (dta), sas
#library('memisc') # for `spss.portable.file` and `spss.system.file`
#library('hmisc') # for `spss.get`
#library('openxlsx') # for `read.xlsx`
#library('data.table') # for fread
# matlab i/o: http://cran.r-project.org/web/packages/R.matlab/index.html
# netcdf
# hdf
# maybe: http://cran.r-project.org/web/packages/ncdf4/ncdf4.pdf
# images raster, tif, jpeg
# source a .R file?
# zip, tar, gz, etc.
dataframeToXML <- function(x){
if(!is.list(x))
stop("'x' must be a data.frame or list of data.frames")
if(is.data.frame(x)){
out <- newXMLNode('data.frame')
addAttributes(out, name=as.character(substitute(x)))
for(i in 1:nrow(x)){
thisrow <- newXMLNode('row')
addAttributes(thisrow, number=i)
tmp <- mapply(function(a,b)
newXMLNode(b,a), x[i,], names(x)
)
addChildren(thisrow, tmp)
addChildren(out, thisrow)
}
return(out)
} else {
# something to parse a list to XML
out <- newXMLNode('list')
addChildren(out, lapply(x, dataframeToXML))
}
}
readwrite <- function(in, out=NULL, in_fun=NULL, out_fun=NULL, in_opts=NULL, out_opts = NULL, verbose = FALSE, ...){
if(!is.data.frame(in)){
# check in-file size to determine whether to use
info <- file.info(in)
if(info$isdir)
stop("Value specified for 'in' is a directory, not a file.")
# info$size # use this to suggest `data.table::fread` on large files
# determine in-file type from name
if(is.null(in_fun)){
if(in=='clipboard') {
if(.Platform$OS.type=='windows'){
in <- readClipboard()
# needs to have a format argument: 1 = text; 2 = DIF
# need in_fun to parse character string
}
# else unix: file("X11_clipboard")
# else OSX: pipe("pbpaste")
if(is.null(in_fun))
stop("Must specify 'in_fun' for reading from clipboard.")
} else {
ext <- file_ext(in)
infuns <- list( 'csv' = read.csv,
'tsv' = read.delim,
'txt' = read.fwf,
'Rdata' = load,
'rds' = readRDS,
'dif' = read.dif, # from utils
'dta' = read.dta,
'arff' = read.arff,
'dbf' = read.dbf,
'rec' = read.epiinfo,
'mtp' = read.mtp,
'mat' = read.octave,
'sav' = read.spss, # but maybe something else
'por' = spss.portable.file, # but maybe something else???
'syd' = read.systat,
'xpt' = read.xport,
'xlsx' = read.xlsx, # from `openxlsx`
'ods' = read.gnumeric.sheet, # library('gnumeric')
'ods' = read.ods # from `ROpenOffice` on OmegaHat
'json' = fromJSON # from RJSONIO
)
if(ext %in% names(infuns))
in_fun <- switch(ext, infuns)
else
in_fun <- readLines
}
}
}
# determine out-file type from name
if(!is.null(out) && is.null(out_fun)){
if(in=='clipboard') {
stop("If ")
if(.Platform$OS.type=='windows') {
out_fun <- writeClipboard
# transform `in` to a character string representation
# writeClipboard needs to have a format argument: 1 = text; 2 = DIF
}
# else unix: ???
# else OSX: pipe("pbcopy", "w")
if(is.null(out_fun))
stop("Must specify 'out_fun' for reading from clipboard.")
} else {
ext <- file_ext(out)
outfuns <- list('csv' = write.csv,
'tsv' = write.delim,
'Rdata' = save,
'rds' = writeRDS,
'arff' = write.arff,
'dbf' = write.dbf,
'dta' = write.dta,
'xpt' = write.xport,
'json' = toJSON, # RJSONIO?
#'xml' = # something to build an XML tree
)
if(ext %in% names(outfuns))
out_fun <- switch(ext, outfuns)
else
out_fun <- write.table
}
}
# read file in
if(is.data.frame(in))
obj <- in
else
obj <- do.call(in_fun, c(file=in, in_opts))
# write file out
if(!is.null(out))
do.call(out_fun, c(file=out, in_opts))
else
return(obj)
}
.RdArgs <- function(fun){
# parse an Rd file and extract details of function arguments to display via `readhelp` and `writehelp`
}
readhelp <- function(file, ...) {
# use filename to determine default behavior of readwrite
# display information to the user about the default function by parsing the Rd file
}
writehelp <- function(file, ...) {
# use filename to determine default behavior of readwrite
# display information to the user about the default function by parsing the Rd file
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment