Skip to content

Instantly share code, notes, and snippets.

@anhqle
Created February 2, 2015 21:52
Show Gist options
  • Save anhqle/88cfb9e3c6cfc7da9bdb to your computer and use it in GitHub Desktop.
Save anhqle/88cfb9e3c6cfc7da9bdb to your computer and use it in GitHub Desktop.
A bunch of convenience R functions I made for myself
# Install and load packages
f_install_and_load <- function(packs) {
new.packs <- packs[!(packs %in% installed.packages()[ ,"Package"])]
lapply(new.packs, install.packages, repos="http://cran.rstudio.com/", dependencies=TRUE)
lapply(packs, library, character.only=TRUE)
}
# Scale and center a numeric vector
f_center_and_scale <- function(vector, num.sd = 2) {
# num.sd is how many sd to divide by
(vector - mean(vector, na.rm=T)) / (num.sd * sd(vector, na.rm=T))
}
# Create an environment that contains the stata var labels
f_stata_to_env <- function(df) {
lab_env <- new.env()
for (i in seq_along(names(df))) {
lab_env[[names(df)[i]]] <- attr(df, "var.labels")[i]
}
return(lab_env)
}
f_stata_to_df <- function(df) {
lab_df <- data.frame(var.name = names(df),
var.label = attr(df, "var.labels"))
return(lab_df)
}
# Split countryyear
f_splitcountryyear <- function(df) {
library(Hmisc)
# First remove Unicode
ascii_country <- iconv(df$country, "latin1", "ASCII", sub="")
# Split the countryyear into country and year
country_year <- strsplit(ascii_country, "(?<=[a-zA-Z])(?=[0-9])", perl=TRUE)
# Create new variables year2 and country2 according to the split
df$year <- sapply(country_year, function(x) as.numeric(x[2]))
df$country <- sapply(country_year, function(x) paste0(x[1:(length(x)-1)]))
df$country <- capitalize(df$country)
return(df)
}
# Function to clean name to fit with countrycode country names
f_cleancname <- function(ss) {
require(plyr)
revalue(ss,
c("Antiguaandbarbuda"="Antigua and Barbuda",
"BiH"="Bosnia and Herzegovina",
"Bolivia"="Bolivia, Plurinational State of",
"Burkinafaso"="Burkina Faso",
"BurkinaFaso"="Burkina Faso",
"Capeverde"="Cabo Verde",
"CapeVerde"="Cabo Verde",
"Cape Verde"="Cabo Verde",
"Centralafricanrepublic"="Central African Republic",
"Congo, The Democratic Republic Of"="Congo, the Democratic Republic of the",
"Costarica"="Costa Rica",
"CostaRica"="Costa Rica",
"Cte d'Ivoire"="Cote d'Ivoire",
"Czech"="Czech Republic",
"DominicanRepublic"="Dominican Republic",
"Drc"="Congo, the Democratic Republic of the",
"DRC"="Congo, the Democratic Republic of the",
"ElSalvador"="El Salvador",
"Elsalvador"="El Salvador",
"Fyr Macedonia"="Macedonia, the former Yugoslav Republic of",
"FYROM"="Macedonia, the former Yugoslav Republic of",
"GuineaBissau"="Guinea-Bissau",
"Kosovo"="Kosovo", # FLAG FLAG
"Kyrgyz Republic"="Kyrgyzstan",
"Laos"="Lao People's Democratic Republic",
"LaoPDR"="Lao People's Democratic Republic",
"Moldova"="Moldova, Republic of",
"Micronesia"="Micronesia, Federated States of",
"Montenegro"="Yugoslavia",
"Russia"="Russian Federation",
"StKittsandNevis"="Saint Kitts and Nevis",
"StLucia"="Saint Lucia",
"StVincentandGrenadines"="Saint Vincent and the Grenadines",
"Serbia"="Yugoslavia",
"Serbia&Montenegro"="Yugoslavia",
"Slovak Republic"="Slovakia",
"SouthAfrica"="South Africa",
"SouthKorea"="Korea, Republic of",
"SriLanka"="Sri Lanka",
"Syria"="Syrian Arab Republic",
"Tanzania"="Tanzania, United Republic of",
"Timor Leste"="Timor-Leste",
"TrinidadandTobago"="Trinidad and Tobago",
"Venezuela"="Venezuela, Bolivarian Republic of",
"Vietnam-b"="Viet Nam",
"Vietnam"="Viet Nam"), warn_missing=TRUE)
}
# ---- Balance table ----
# Get level names from variables (for balance table)
f_getlevelnames <- function(df, vars) {
unlist(sapply(vars, function(var) paste(var, levels(df[ , var])[-1], sep="_")))
}
# Add significance level star to table
f_addstar <- function(df, pvaluevar="p.value") {
pval <- df[ , pvaluevar]
if (is.null(pval)) {
stop("Variable not found.")
} else {
stars <- ifelse(pval <= .001, "***",
ifelse(pval <= .05, "**",
ifelse(pval <= .1, "*", "")))
}
return(cbind.data.frame(df, stars))
}
# Create a balance table based on MatchBalance result
f_create_balancetable <- function(df, balance_vars, bal_result) {
f_addstar(
cbind.data.frame(
f_getlevelnames(df, balance_vars),
ldply(bal_result, function(x)
data.frame(mean.Tr=x$mean.Tr, mean.Co=x$mean.Co, p.value=x$p.value))
)
)
}
# Re-order columns
# http://stackoverflow.com/questions/18339370/reordering-columns-in-a-large-dataframe
moveMe <- function(data, tomove, where = "last", ba = NULL) {
temp <- setdiff(names(data), tomove)
x <- switch(
where,
first = data[c(tomove, temp)],
last = data[c(temp, tomove)],
before = {
if (is.null(ba)) stop("must specify ba column")
if (length(ba) > 1) stop("ba must be a single character string")
data[append(temp, values = tomove, after = (match(ba, temp)-1))]
},
after = {
if (is.null(ba)) stop("must specify ba column")
if (length(ba) > 1) stop("ba must be a single character string")
data[append(temp, values = tomove, after = (match(ba, temp)))]
})
x
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment