Skip to content

Instantly share code, notes, and snippets.

@benfb
Last active August 29, 2015 14:19
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save benfb/cf43f03fae0529652fd5 to your computer and use it in GitHub Desktop.
Save benfb/cf43f03fae0529652fd5 to your computer and use it in GitHub Desktop.
# basic data setup
library(ggplot2)
d <- read.csv('/Users/ben/Documents/hampshire/S2015/collapse_phenomena/resistance.csv', header = TRUE, sep = ",")
load("/Users/ben/Downloads/drugs/DS0001/34933-0001-Data.rda")
d <- read.table('/Users/ben/Downloads/adolescenthealth/DS0001/21600-0001-Data.tsv', sep='\t', header=TRUE)
data <- da34933.0001 # store the dataframe in an easily accessible variable
# Convert to actual numerical variables
library(prettyR)
lbls <- sort(levels(data$PSILCY))
lbls <- (sub("^\\([0-9]+\\) +(.+$)", "\\1", lbls))
data$PSILCY <- as.numeric(sub("^\\(0*([0-9]+)\\).+$", "\\1", data$PSILCY))
data$PSILCY <- add.value.labels(data$PSILCY, lbls)
# plot amount of responses in each category for ketamine
p <- qplot(KETAMINE, data = data, xlab = "Ketamine", ylab = "Responses", geom="bar")
# scatterplot of two drugs
p <- qplot(KETAMINE, PSILCY, data = data, xlab = "Ketamine", ylab = "Psilocybin")
# hexbin
ggplot(Cigarette,aes(x=income, y=packpc)) + stat_binhex()
# correlations
cor(data$KETAMINE, data$PSILCY, use="complete.obs")
cor(m$grc, m$mean)
# save image
ggsave(filename = "file.png", plot=p)
# count variables
library(plyr)
count(data, vars="PSILCY")
# p value test
summary(lm(RowMean ~ ID, data = check5))
summary(lm(mean ~ grc, data = m))
# get average of every 5 rows
colMeans(matrix(d$steps, nrow=5))
# standard deviation graph
library(matrixStats)
m <- data.frame(colMeans(matrix(d$steps, nrow=5)), colSds(matrix(d$steps, nrow=5)), colMeans(matrix(d$grc, nrow=5)))
names(m)[1] <- "mean"
names(m)[2] <- "sd"
names(m)[3] <- "grc"
ggplot(m, aes(x=m$grc, y=m$mean, col=m$grc)) +
geom_errorbar(aes(ymin=m$mean-m$sd, ymax=m$mean+m$sd), width=.2) +
geom_line() +
geom_point()
# easier SD and mean calculations and plotting
library(plyr)
ds <- ddply(d, .(grc), summarise, mean = mean(steps), sd = sd(steps))
ggplot() + geom_point(data = d, aes(x = grc, y = steps)) + geom_point(data = ds, aes(x = grc, y = mean), colour = 'red', size = 3) + geom_errorbar(data = ds, aes (x = grc, y = mean, ymin = mean - sd, ymax = mean + sd), colour = 'red', width = 0.4)
ggplot(ds, aes(x=grc, y=mean)) + geom_errorbar(aes(ymin=mean-sd, ymax=mean+sd), width=.2) + geom_line() + geom_point()
p <- ggplot(ds, aes(x=grc, y=mean, ymin=mean-sd, ymax=mean+sd, colour=mean)) + geom_errorbar(width=1.5) + geom_line() + geom_point() + labs(x = "gain-reduction-chance", y="mean steps") + annotate("text", label = "p < .01", x = 60, y = 250, size = 8, colour = "red")
# line of best fit
c <- coef(lm(grc ~ mean, data = ds))
geom_abline(intercept=c[1], slope=c[2], linetype=2)
# graph with limits
p <- qplot(H1DA9, H1DA10, data = d, xlab = "Videos", ylab = "Games") + xlim(c(0, 100)) + ylim(c(0, 100))
# graph with sizes
p <- qplot(H1DA9, H1DA10, data = d, xlab = "Videos", ylab = "Games") + stat_sum(aes(size = ..n..))
# ecdata
library(ggplot2)
library(Ecdat)
data(Cigarette)
p <- qplot(income, packpc, data = Cigarette, xlab = "State Personal Income", ylab = "Packs Per Capita")
p <- ggplot(Cigarette, aes(x=income, y=packpc, colour=state)) + geom_point() + labs(x = "income", y="mean packs per capita", title ="Cigarette Use Related To Income")
p + guides(col = guide_legend(nrow = 12)) # divide legend into columns
# make pretty interactive graphs with plot.ly
library(plotly)
py <- plotly(user="bfb", key="8eju5js6ef")
response<-py$ggplotly()
# Austin crime mapping
library(ggmap)
AustinMap <- qmap("austin", zoom = 12, color = "bw", legend = "topleft")
AustinMap + geom_point(aes(x = LONGITUDE, y = LATITUDE, colour = Crime.Type), data = d)
# subset based on if name contains string
subset(d, ((grepl("ASSAULT", Crime.Type))))
# add themes to graphs
library("ggthemes")
p + theme_economist()
# calculate confidence interval
error <- qt(0.975,df=SAMPLESIZE-1)*STDEV/sqrt(SAMPLESIZE)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment