Skip to content

Instantly share code, notes, and snippets.

View stevenworthington's full-sized avatar

Steven Worthington stevenworthington

View GitHub Profile
@stevenworthington
stevenworthington / barplot_values_ggplot2.R
Created March 29, 2012 19:56
barplot_values_ggplot2
library(ggplot2)
library(plyr)
data(mpg)
# create a data frame with averages and standard deviations
hwy.means <- ddply(mpg, c("class", "year"), summarize, hwy.avg = mean(hwy), hwy.sd = sd(hwy))
# barplot with values over bars
ggplot(hwy.means) +
geom_bar(aes(class, hwy.avg, fill = factor(year)), position = "dodge", colour = "black", size = 0.3) +
@stevenworthington
stevenworthington / formula_creation.R
Created April 4, 2012 15:36
concatenate variables into a formula
# example of how to concatenate lots of variables into a formula without typing them out
# example from the High School and Beyond data set
hsb_df <- read.table("http://www.ats.ucla.edu/stat/R/notes/hs0.csv", header = TRUE, sep = ",")
# create the formula using variables from columns 4:8 and 10:11 as predictors
hsb_form <- formula( paste(c("math ~ 1", colnames(hsb_df[, c(4:8, 10:11)])), collapse = " + ") )
# fit the model
fit1 <- lm(hsb_form, data = hsb_df)
@stevenworthington
stevenworthington / ipak.R
Created July 25, 2012 19:44
Install and load multiple R packages at once
# ipak function: install and load multiple R packages.
# check to see if packages are installed. Install them if they are not, then load them into the R session.
ipak <- function(pkg){
new.pkg <- pkg[!(pkg %in% installed.packages()[, "Package"])]
if (length(new.pkg))
install.packages(new.pkg, dependencies = TRUE)
sapply(pkg, require, character.only = TRUE)
}
@stevenworthington
stevenworthington / centroid_perm.R
Created July 25, 2012 19:50
Permutation test for group differences using 3D coordinate data
# ===============================================================================
# Name : centroid_perm
# Original author : Steven Worthington (sworthington@iq.harvard.edu)
# Affiliation : IQSS, Harvard University
# Date (mm/dd/yyyy) : 06/14/2012
# Version : v0.8
# Aim : exact permutation test for group differences
# ===============================================================================
# Goal:
@stevenworthington
stevenworthington / k_medoids_uncent_corr.R
Last active March 4, 2021 10:58
Calculate K-medoids using the uncentered correlation distance method
# example of calculating K-medoids using the uncentered
# correlation metric as a measure of distance
# 0) load data
data(mtcars)
# 1) create a distance matrix using the "cosine of the angle" method (aka, uncentered correlation)
@stevenworthington
stevenworthington / lme4_contrast_example.R
Created July 11, 2013 19:23
Example of how to create custom contrasts to test hypotheses in lme4 models.
# Note: requires loading the "socsub" data frame (not a bundled R dataset)
# ------------------------------------------------------------------------------------
# pairwise comparisons including interactions
# use lm model to get design matrix
model1 <- lm(agro.rec.tot ~ sex*ageclass + loggrpmem, offset = logtimeage, data = socsub)
# list with character vectors
text <- list(a = "all day I play @sworth with R",
b = "all night I play @sworth with R")
# extract letters after "@" in a single character vector
sub("^.*@(\\w+).*", "\\1", text$a)
# extract letters after "@" in a list of character vectors
gsub("^.*@(\\w+).*", "\\1", text)
library(sp)
library(maptools)
# get North Carolina shape data
NC <- readShapePoly(system.file("shapes/sids.shp", package = "maptools")[1],
IDvar = "FIPSNO", proj4string = CRS("+proj=longlat +ellps=clrk66"))
# plot polygons
plot(NC, border = "blue", axes = TRUE, las = 1)
@stevenworthington
stevenworthington / caliper_text.R
Last active August 29, 2015 13:56
Extract blocks of text based on patterns
# set working directory
dir_path <- "path_to_text_files"
setwd(dir_path)
# create vector of filenames
filenames <- list.files(dir_path)
# read in files to a list
docList <- lapply(filenames, scan, what = "character", sep = "\n")
x <- c(
'knitr', # A general-purpose package for dynamic report generation in R.
# 'sqldf', # For running SQL statements on R data frames, optimized for convenience.
'randomForest', # Classification and regression based on a forest of trees using random inputs.
'arm', # R functions for processing lm, glm, svy.glm, mer and polr outputs.
'ggplot2', # An implementation of the Grammar of Graphics.
'gridExtra', # misc. high-level Grid functions
'plyr', # Tools for splitting, applying and combining data.
'tree', # Classification and regression trees.
'gbm', # Generalized Boosted Regression Models