Skip to content

Instantly share code, notes, and snippets.

View demel's full-sized avatar

De'Mel demel

View GitHub Profile
# Simplest possible marimekko/mosaic plot
doInstall <- TRUE # Change to FALSE if you don't want packages installed.
toInstall <- c("vcd", "ggplot2", "RColorBrewer")
if(doInstall){install.packages(toInstall, repos = "http://cran.us.r-project.org")}
lapply(toInstall, library, character.only = TRUE)
theme_set(theme_gray(base_size = 7))
# All you need to start with is individual count data, and a grouping variable
toInstall <- c("proxy")
if(doInstall){install.packages(toInstall, repos = "http://cran.us.r-project.org")}
lapply(toInstall, library, character.only = TRUE)
# kmeans++ center initialization algorithm
kMeansPP <- function(df, k, doPlot = TRUE){
kCenters <- data.frame(matrix(NA, ncol = ncol(df), nrow = k))
whichPoints <- rep(NA, k)
whichPoints[1] <- sample(1:nrow(df), 1)
kCenters[1, ] <- df[whichPoints[1], ] # Initial center
doInstall <- TRUE
toInstall <- c("ggplot2")
if(doInstall){install.packages(toInstall, repos = "http://cran.us.r-project.org")}
lapply(toInstall, library, character.only = TRUE)
ANES <- read.csv("http://www.oberlin.edu/faculty/cdesante/assets/downloads/ANES.csv")
ANES <- ANES[ANES$year == 2008, -c(1, 11, 17)] # Limit to just 2008 respondents,
head(ANES) # remove some non-helpful variables
# Fit several models with the same DV:
doInstall <- TRUE
toInstall <- c("Amelia", "ggplot2")
if(doInstall){install.packages(toInstall, repos = "http://cran.us.r-project.org")}
lapply(toInstall, library, character.only = TRUE)
ANES <- read.csv("http://www.oberlin.edu/faculty/cdesante/assets/downloads/ANES.csv")
ANES <- ANES[ANES$year == 2008, -c(1, 11, 17)] # Limit to just 2008 respondents,
head(ANES) # remove some non-helpful variables
with(ANES, plot(jitter(pid7), jitter(ideo7)))
#purpose: model the relationship between effects size, sample size and power
# holding the significance level constant (p = 0.05) for three of the most common statistical tests:
# 1) t-Test (two-sample)
# then visualize the results for medium small, medium and large effect sizes (0.2,0.5,0.8 ; http://www.uccs.edu/lbecker/effect-size.html)
#Need pwr package
if(!require(pwr)){install.packages("pwr");library("pwr")}
# t-TEST
doInstall <- TRUE
toInstall <- c("twitteR", "dismo", "maps", "ggplot2")
if(doInstall){install.packages(toInstall, repos = "http://cran.us.r-project.org")}
lapply(toInstall, library, character.only = TRUE)
searchTerm <- "#rstats"
searchResults <- searchTwitter(searchTerm, n = 1000) # Gather Tweets
tweetFrame <- twListToDF(searchResults) # Convert to a nice dF
userInfo <- lookupUsers(tweetFrame$screenName) # Batch lookup of user info
doInstall <- TRUE
toInstall <- c("ggplot2")
if(doInstall){install.packages(toInstall, repos = "http://cran.us.r-project.org")}
lapply(toInstall, library, character.only = TRUE)
ANES <- read.csv("http://www.oberlin.edu/faculty/cdesante/assets/downloads/ANES.csv")
ANES <- ANES[ANES$year == 2008, -c(1, 11, 17)] # Limit to just 2008 respondents,
head(ANES) # remove some non-helpful variables
# Fit several models with the same DV:
doInstall <- TRUE # Change to FALSE if you don't want packages installed.
toInstall <- c("sna", "ggplot2", "Hmisc", "reshape2")
if(doInstall){install.packages(toInstall, repos = "http://cran.r-project.org")}
lapply(toInstall, library, character.only = TRUE)
# Empty ggplot2 theme
new_theme_empty <- theme_bw()
new_theme_empty$line <- element_blank()
new_theme_empty$rect <- element_blank()
new_theme_empty$strip.text <- element_blank()
doInstall <- TRUE # Change to FALSE if you don't want packages installed.
toInstall <- c("maptools", "rgdal", "ggplot2", "spatstat", "RColorBrewer")
if(doInstall){install.packages(toInstall, repos = "http://cran.r-project.org")}
lapply(toInstall, library, character.only = TRUE)
# Taking an online compressed shapefile, and opening it in R
# From http://stackoverflow.com/a/3053883
temp <- tempfile() # 110th & 111th Congressional District Shapefiles
download.file("http://www.census.gov/geo/cob/bdy/cd/cd110shp/cd99_110_shp.zip",
temp) # See http://www.census.gov/geo/www/cob/cd110.html#shp
doInstall <- TRUE
toInstall <- c("ggplot2", "cluster", "MASS", "smacof")
if(doInstall){install.packages(toInstall, repos = "http://cran.us.r-project.org")}
lapply(toInstall, library, character.only = TRUE)
# Generate a matrix of dissimilarities from pairwise correlations
data(breakfast) # Preference orderings of breakfast items, from smacof
corrMat <- cor(breakfast, use = "pair")
distMat <- dist(corrMat)