Created
February 2, 2015 21:52
-
-
Save anhqle/88cfb9e3c6cfc7da9bdb to your computer and use it in GitHub Desktop.
A bunch of convenience R functions I made for myself
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Install and load packages | |
f_install_and_load <- function(packs) { | |
new.packs <- packs[!(packs %in% installed.packages()[ ,"Package"])] | |
lapply(new.packs, install.packages, repos="http://cran.rstudio.com/", dependencies=TRUE) | |
lapply(packs, library, character.only=TRUE) | |
} | |
# Scale and center a numeric vector | |
f_center_and_scale <- function(vector, num.sd = 2) { | |
# num.sd is how many sd to divide by | |
(vector - mean(vector, na.rm=T)) / (num.sd * sd(vector, na.rm=T)) | |
} | |
# Create an environment that contains the stata var labels | |
f_stata_to_env <- function(df) { | |
lab_env <- new.env() | |
for (i in seq_along(names(df))) { | |
lab_env[[names(df)[i]]] <- attr(df, "var.labels")[i] | |
} | |
return(lab_env) | |
} | |
f_stata_to_df <- function(df) { | |
lab_df <- data.frame(var.name = names(df), | |
var.label = attr(df, "var.labels")) | |
return(lab_df) | |
} | |
# Split countryyear | |
f_splitcountryyear <- function(df) { | |
library(Hmisc) | |
# First remove Unicode | |
ascii_country <- iconv(df$country, "latin1", "ASCII", sub="") | |
# Split the countryyear into country and year | |
country_year <- strsplit(ascii_country, "(?<=[a-zA-Z])(?=[0-9])", perl=TRUE) | |
# Create new variables year2 and country2 according to the split | |
df$year <- sapply(country_year, function(x) as.numeric(x[2])) | |
df$country <- sapply(country_year, function(x) paste0(x[1:(length(x)-1)])) | |
df$country <- capitalize(df$country) | |
return(df) | |
} | |
# Function to clean name to fit with countrycode country names | |
f_cleancname <- function(ss) { | |
require(plyr) | |
revalue(ss, | |
c("Antiguaandbarbuda"="Antigua and Barbuda", | |
"BiH"="Bosnia and Herzegovina", | |
"Bolivia"="Bolivia, Plurinational State of", | |
"Burkinafaso"="Burkina Faso", | |
"BurkinaFaso"="Burkina Faso", | |
"Capeverde"="Cabo Verde", | |
"CapeVerde"="Cabo Verde", | |
"Cape Verde"="Cabo Verde", | |
"Centralafricanrepublic"="Central African Republic", | |
"Congo, The Democratic Republic Of"="Congo, the Democratic Republic of the", | |
"Costarica"="Costa Rica", | |
"CostaRica"="Costa Rica", | |
"Cte d'Ivoire"="Cote d'Ivoire", | |
"Czech"="Czech Republic", | |
"DominicanRepublic"="Dominican Republic", | |
"Drc"="Congo, the Democratic Republic of the", | |
"DRC"="Congo, the Democratic Republic of the", | |
"ElSalvador"="El Salvador", | |
"Elsalvador"="El Salvador", | |
"Fyr Macedonia"="Macedonia, the former Yugoslav Republic of", | |
"FYROM"="Macedonia, the former Yugoslav Republic of", | |
"GuineaBissau"="Guinea-Bissau", | |
"Kosovo"="Kosovo", # FLAG FLAG | |
"Kyrgyz Republic"="Kyrgyzstan", | |
"Laos"="Lao People's Democratic Republic", | |
"LaoPDR"="Lao People's Democratic Republic", | |
"Moldova"="Moldova, Republic of", | |
"Micronesia"="Micronesia, Federated States of", | |
"Montenegro"="Yugoslavia", | |
"Russia"="Russian Federation", | |
"StKittsandNevis"="Saint Kitts and Nevis", | |
"StLucia"="Saint Lucia", | |
"StVincentandGrenadines"="Saint Vincent and the Grenadines", | |
"Serbia"="Yugoslavia", | |
"Serbia&Montenegro"="Yugoslavia", | |
"Slovak Republic"="Slovakia", | |
"SouthAfrica"="South Africa", | |
"SouthKorea"="Korea, Republic of", | |
"SriLanka"="Sri Lanka", | |
"Syria"="Syrian Arab Republic", | |
"Tanzania"="Tanzania, United Republic of", | |
"Timor Leste"="Timor-Leste", | |
"TrinidadandTobago"="Trinidad and Tobago", | |
"Venezuela"="Venezuela, Bolivarian Republic of", | |
"Vietnam-b"="Viet Nam", | |
"Vietnam"="Viet Nam"), warn_missing=TRUE) | |
} | |
# ---- Balance table ---- | |
# Get level names from variables (for balance table) | |
f_getlevelnames <- function(df, vars) { | |
unlist(sapply(vars, function(var) paste(var, levels(df[ , var])[-1], sep="_"))) | |
} | |
# Add significance level star to table | |
f_addstar <- function(df, pvaluevar="p.value") { | |
pval <- df[ , pvaluevar] | |
if (is.null(pval)) { | |
stop("Variable not found.") | |
} else { | |
stars <- ifelse(pval <= .001, "***", | |
ifelse(pval <= .05, "**", | |
ifelse(pval <= .1, "*", ""))) | |
} | |
return(cbind.data.frame(df, stars)) | |
} | |
# Create a balance table based on MatchBalance result | |
f_create_balancetable <- function(df, balance_vars, bal_result) { | |
f_addstar( | |
cbind.data.frame( | |
f_getlevelnames(df, balance_vars), | |
ldply(bal_result, function(x) | |
data.frame(mean.Tr=x$mean.Tr, mean.Co=x$mean.Co, p.value=x$p.value)) | |
) | |
) | |
} | |
# Re-order columns | |
# http://stackoverflow.com/questions/18339370/reordering-columns-in-a-large-dataframe | |
moveMe <- function(data, tomove, where = "last", ba = NULL) { | |
temp <- setdiff(names(data), tomove) | |
x <- switch( | |
where, | |
first = data[c(tomove, temp)], | |
last = data[c(temp, tomove)], | |
before = { | |
if (is.null(ba)) stop("must specify ba column") | |
if (length(ba) > 1) stop("ba must be a single character string") | |
data[append(temp, values = tomove, after = (match(ba, temp)-1))] | |
}, | |
after = { | |
if (is.null(ba)) stop("must specify ba column") | |
if (length(ba) > 1) stop("ba must be a single character string") | |
data[append(temp, values = tomove, after = (match(ba, temp)))] | |
}) | |
x | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment