Skip to content

Instantly share code, notes, and snippets.

@jwinternheimer
Created June 4, 2015 12:46
Show Gist options
  • Save jwinternheimer/69c8124b03550ecc31ce to your computer and use it in GitHub Desktop.
Save jwinternheimer/69c8124b03550ecc31ce to your computer and use it in GitHub Desktop.
diversity-app
library(data.table);
library(dplyr);
library(tidyr);
cleanGoogleTable <- function(dat, table=1, skip=0, ncols=NA, nrows=-1, header=TRUE, dropFirstCol=NA){
if(!is.data.frame(dat)){
dat <- dat[[table]]
}
if(is.na(dropFirstCol)) {
firstCol <- na.omit(dat[[1]])
if(all(firstCol == ".") || all(firstCol== as.character(seq_along(firstCol)))) {
dat <- dat[, -1]
}
} else if(dropFirstCol) {
dat <- dat[, -1]
}
if(skip > 0){
dat <- dat[-seq_len(skip), ]
}
if(nrow(dat) == 1) return(dat)
if(nrow(dat) >= 2){
if(all(is.na(dat[2, ]))) dat <- dat[-2, ]
}
if(header && nrow(dat) > 1){
header <- as.character(dat[1, ])
names(dat) <- header
dat <- dat[-1, ]
}
# Keep only desired columns
if(!is.na(ncols)){
ncols <- min(ncols, ncol(dat))
dat <- dat[, seq_len(ncols)]
}
# Keep only desired rows
if(nrows > 0){
nrows <- min(nrows, nrow(dat))
dat <- dat[seq_len(nrows), ]
}
# Rename rows
rownames(dat) <- seq_len(nrow(dat))
dat
}
readGoogleSheet <- function(url, name, na.string="", header=TRUE){
day <-format(Sys.time(), "%Y-%m-%d")
filename <- paste0('/data/',name,day,'.csv')
if (!file.exists(filename))
download(url, destfile=filename)
# Suppress warnings because Google docs seems to have incomplete final line
suppressWarnings({
doc <- paste(readLines(filename), collapse=" ")
})
if(nchar(doc) == 0) stop("No content found")
htmlTable <- gsub("^.*?(<table.*</table).*$", "\\1>", doc)
ret <- readHTMLTable(htmlTable, header=header, stringsAsFactors=FALSE, as.data.frame=TRUE)
raw <- lapply(ret, function(x){ x[ x == na.string] <- NA; x})
cleanGoogleTable(raw, table=1)
}
cleanUpData <- function(data) {
## IMPORT AND TIDY DATA
names(data) <- c("date","gender","ethnicity","continent","age","department")
ethnicities <- c("Asian","Caucasian","Hispanic/Latino","Indian","Black","Mixed Race")
data$ethnicity <- gsub("^Caucasian.*$", replacement = "Caucasian",data$ethnicity,ignore.case=T)
data$ethnicity <- gsub("^White.*$", replacement = "Caucasian",data$ethnicity,ignore.case=T)
data$ethnicity <- gsub("Southeast Asian","Asian",data$ethnicity)
data$ethnicity <- gsub("Chinese","Asian",data$ethnicity)
data$ethnicity <- gsub("Taiwanese","Asian",data$ethnicity)
data$ethnicity <- gsub("Hispanic/Caucasian","Mixed Race",data$ethnicity)
data$ethnicity <- ifelse(data$ethnicity %in% ethnicities, data$ethnicity,"Other")
data
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment