Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Read Google Spreadsheet into R
# This set of functions comes entirely form Andrie on stackoverflow
# http://stackoverflow.com/questions/22873602/importing-data-into-r-from-google-spreadsheet
readGoogleSheet <- function(url, na.string="", header=TRUE){
stopifnot(require(XML))
# Suppress warnings because Google docs seems to have incomplete final line
suppressWarnings({
doc <- paste(readLines(url), collapse=" ")
})
if(nchar(doc) == 0) stop("No content found")
htmlTable <- gsub("^.*?(<table.*</table).*$", "\\1>", doc)
ret <- readHTMLTable(htmlTable, header=header, stringsAsFactors=FALSE, as.data.frame=TRUE)
lapply(ret, function(x){ x[ x == na.string] <- NA; x})
}
cleanGoogleTable <- function(dat, table=1, skip=0, ncols=NA, nrows=-1, header=TRUE, dropFirstCol=NA){
if(!is.data.frame(dat)){
dat <- dat[[table]]
}
if(is.na(dropFirstCol)) {
firstCol <- na.omit(dat[[1]])
if(all(firstCol == ".") || all(firstCol== as.character(seq_along(firstCol)))) {
dat <- dat[, -1]
}
} else if(dropFirstCol) {
dat <- dat[, -1]
}
if(skip > 0){
dat <- dat[-seq_len(skip), ]
}
if(nrow(dat) == 1) return(dat)
if(nrow(dat) >= 2){
if(all(is.na(dat[2, ]))) dat <- dat[-2, ]
}
if(header && nrow(dat) > 1){
header <- as.character(dat[1, ])
names(dat) <- header
dat <- dat[-1, ]
}
# Keep only desired columns
if(!is.na(ncols)){
ncols <- min(ncols, ncol(dat))
dat <- dat[, seq_len(ncols)]
}
# Keep only desired rows
if(nrows > 0){
nrows <- min(nrows, nrow(dat))
dat <- dat[seq_len(nrows), ]
}
# Rename rows
rownames(dat) <- seq_len(nrow(dat))
dat
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.