Last active
August 29, 2015 14:19
-
-
Save benfb/cf43f03fae0529652fd5 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# basic data setup | |
library(ggplot2) | |
d <- read.csv('/Users/ben/Documents/hampshire/S2015/collapse_phenomena/resistance.csv', header = TRUE, sep = ",") | |
load("/Users/ben/Downloads/drugs/DS0001/34933-0001-Data.rda") | |
d <- read.table('/Users/ben/Downloads/adolescenthealth/DS0001/21600-0001-Data.tsv', sep='\t', header=TRUE) | |
data <- da34933.0001 # store the dataframe in an easily accessible variable | |
# Convert to actual numerical variables | |
library(prettyR) | |
lbls <- sort(levels(data$PSILCY)) | |
lbls <- (sub("^\\([0-9]+\\) +(.+$)", "\\1", lbls)) | |
data$PSILCY <- as.numeric(sub("^\\(0*([0-9]+)\\).+$", "\\1", data$PSILCY)) | |
data$PSILCY <- add.value.labels(data$PSILCY, lbls) | |
# plot amount of responses in each category for ketamine | |
p <- qplot(KETAMINE, data = data, xlab = "Ketamine", ylab = "Responses", geom="bar") | |
# scatterplot of two drugs | |
p <- qplot(KETAMINE, PSILCY, data = data, xlab = "Ketamine", ylab = "Psilocybin") | |
# hexbin | |
ggplot(Cigarette,aes(x=income, y=packpc)) + stat_binhex() | |
# correlations | |
cor(data$KETAMINE, data$PSILCY, use="complete.obs") | |
cor(m$grc, m$mean) | |
# save image | |
ggsave(filename = "file.png", plot=p) | |
# count variables | |
library(plyr) | |
count(data, vars="PSILCY") | |
# p value test | |
summary(lm(RowMean ~ ID, data = check5)) | |
summary(lm(mean ~ grc, data = m)) | |
# get average of every 5 rows | |
colMeans(matrix(d$steps, nrow=5)) | |
# standard deviation graph | |
library(matrixStats) | |
m <- data.frame(colMeans(matrix(d$steps, nrow=5)), colSds(matrix(d$steps, nrow=5)), colMeans(matrix(d$grc, nrow=5))) | |
names(m)[1] <- "mean" | |
names(m)[2] <- "sd" | |
names(m)[3] <- "grc" | |
ggplot(m, aes(x=m$grc, y=m$mean, col=m$grc)) + | |
geom_errorbar(aes(ymin=m$mean-m$sd, ymax=m$mean+m$sd), width=.2) + | |
geom_line() + | |
geom_point() | |
# easier SD and mean calculations and plotting | |
library(plyr) | |
ds <- ddply(d, .(grc), summarise, mean = mean(steps), sd = sd(steps)) | |
ggplot() + geom_point(data = d, aes(x = grc, y = steps)) + geom_point(data = ds, aes(x = grc, y = mean), colour = 'red', size = 3) + geom_errorbar(data = ds, aes (x = grc, y = mean, ymin = mean - sd, ymax = mean + sd), colour = 'red', width = 0.4) | |
ggplot(ds, aes(x=grc, y=mean)) + geom_errorbar(aes(ymin=mean-sd, ymax=mean+sd), width=.2) + geom_line() + geom_point() | |
p <- ggplot(ds, aes(x=grc, y=mean, ymin=mean-sd, ymax=mean+sd, colour=mean)) + geom_errorbar(width=1.5) + geom_line() + geom_point() + labs(x = "gain-reduction-chance", y="mean steps") + annotate("text", label = "p < .01", x = 60, y = 250, size = 8, colour = "red") | |
# line of best fit | |
c <- coef(lm(grc ~ mean, data = ds)) | |
geom_abline(intercept=c[1], slope=c[2], linetype=2) | |
# graph with limits | |
p <- qplot(H1DA9, H1DA10, data = d, xlab = "Videos", ylab = "Games") + xlim(c(0, 100)) + ylim(c(0, 100)) | |
# graph with sizes | |
p <- qplot(H1DA9, H1DA10, data = d, xlab = "Videos", ylab = "Games") + stat_sum(aes(size = ..n..)) | |
# ecdata | |
library(ggplot2) | |
library(Ecdat) | |
data(Cigarette) | |
p <- qplot(income, packpc, data = Cigarette, xlab = "State Personal Income", ylab = "Packs Per Capita") | |
p <- ggplot(Cigarette, aes(x=income, y=packpc, colour=state)) + geom_point() + labs(x = "income", y="mean packs per capita", title ="Cigarette Use Related To Income") | |
p + guides(col = guide_legend(nrow = 12)) # divide legend into columns | |
# make pretty interactive graphs with plot.ly | |
library(plotly) | |
py <- plotly(user="bfb", key="8eju5js6ef") | |
response<-py$ggplotly() | |
# Austin crime mapping | |
library(ggmap) | |
AustinMap <- qmap("austin", zoom = 12, color = "bw", legend = "topleft") | |
AustinMap + geom_point(aes(x = LONGITUDE, y = LATITUDE, colour = Crime.Type), data = d) | |
# subset based on if name contains string | |
subset(d, ((grepl("ASSAULT", Crime.Type)))) | |
# add themes to graphs | |
library("ggthemes") | |
p + theme_economist() | |
# calculate confidence interval | |
error <- qt(0.975,df=SAMPLESIZE-1)*STDEV/sqrt(SAMPLESIZE) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment