Skip to content

Instantly share code, notes, and snippets.

@inkhorn
inkhorn / safe.max.r
Created April 29, 2012 01:39
Max Function that Returns NA when vector is full of NAs
safe.max = function(invector) {
na.pct = sum(is.na(invector))/length(invector)
if (na.pct == 1) {
return(NA) }
else {
return(max(invector,na.rm=TRUE))
}
}
@inkhorn
inkhorn / chisq_mining.r
Created May 2, 2012 00:40
Scripted example of using chisq to mine for relations between nominal variables
testvars = c(6,7,9,10,11,12,13,14,16, 17,18,19,20,21,23,24,25,26,384,375,376,386,385,387,388)
resultlist = c()
for (i in testvars) {
xsq = chisq.test(big.dataset[,i], big.dataset$DV_3lvls)$statistic
varname = names(big.dataset)[i]
tab = xtabs(~DV_3lvls + big.dataset[,i], data=big.dataset)
resultlist = rbind(resultlist, list(chisq=xsq, testvar=varname, xtab=tab))
}
@inkhorn
inkhorn / dedupe_records_w_less_info.r
Created May 4, 2012 01:42
Scripted example in R of removing records with duplicate IDs but are missing other info
# These column numbers represent fields with name/contact info that I've
# marked with 1s and 0s depending on whether or not there's anything in
# the field.
bio_cols = c(5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,23,24,25,26)
# Now we get the row numbers of all the records with duplicate IDs
dupe_id_rows = which(duplicated(big.dataset$ID) == TRUE)
@inkhorn
inkhorn / crossbarminmax.r
Created June 10, 2012 02:33
min median max crossbar with dots
scents = read.table("clipboard",header=TRUE,sep="\t")
strial3.by.sex.wide = ddply(scents, 'Sex', function (x) quantile(x$S.Trial.3, c(0,.5,1), na.rm=TRUE))
strial3.by.sex.smokers = melt(ddply(subset(scents,Smoker == "Y") , 'Sex', function (x) quantile(x$S.Trial.3, c(0,1), na.rm=TRUE)),variable.name="Percentile",value.name="Time")
ggplot() + geom_crossbar(data=strial3.by.sex.wide, aes(x=Sex, y=strial3.by.sex.wide$"50%", ymin=strial3.by.sex.wide$"0%", ymax=strial3.by.sex.wide$"100%"),fill="#bcc927",width=.75) +
geom_point(data=strial3.by.sex.smokers, aes(x=Sex, y=Time, stat="identity"), size=3)
+ opts(legend.title = theme_text(size=10, face="bold"), legend.text = theme_text(size=10),
axis.text.x=theme_text(size=10), axis.text.y=theme_text(size=10,hjust=1), axis.title.x=theme_text(size=12,face="bold"), axis.title.y=theme_text(size=12, angle=90,
face="bold")) + scale_y_continuous(name="Time to Completion")
@inkhorn
inkhorn / penultimax.r
Created September 14, 2012 01:45
Find the second highest value in a vector
penultimax = function(invector) {
# If the vector starts off as only having 1 or 0 numbers, return NA
if (length(invector) <= 1) {
return(NA)
}
first.max = safe.max(invector)
#Once we get the max, take it out of the vector and make newvector
newvector = invector[!invector == first.max]
#If newvector now has nothing in it, return NA
if (length(newvector) == 0) {
# Here's where I extract the database IDs and repeat them 50 times to make the column long enough for
# my new long-form dataset (596,100 rows)
client.data.new = rep(client.data[,1],50)
for (i in 2:32){
# for each column in the first 31 after the ID column, find the 49 matching columns
# to the right and stack them using melt
stacked.data = melt(client.data, id.vars="CnBio_ID", measure.vars=seq(i,(i+(31*49)),31), value.name=names(client.data)[i])
@inkhorn
inkhorn / toronto_casino.r
Created May 2, 2013 01:01
Casino Analysis
library(ff)
library(ffbase)
library(stringr)
library(ggplot2)
library(ggthemes)
library(reshape2)
library(RgoogleMaps)
# Loading 2 copies of the same data set so that I can convert one and have the original for its text values
casino = read.csv("/home/inkhorn/Downloads/casino_survey_results20130325.csv")
@inkhorn
inkhorn / toronto.casino.glm.r
Created May 17, 2013 18:57
toronto casino glm results
Call:
glm(formula = casino$Q6 == "City of Toronto" ~ GoBigorGoHome +
TechnicalDetails + Soc.Env.Issues, family = binomial(logit),
data = casino)
Deviance Residuals:
Min 1Q Median 3Q Max
-3.6426 -0.4745 -0.1156 0.4236 3.4835
Coefficients:
@inkhorn
inkhorn / adj.mun.cacsino.glm.r
Created May 17, 2013 18:59
adjacent municipality casino glm
Call:
glm(formula = casino$Q6 == "Adjacent Municipality" ~ GoBigorGoHome +
TechnicalDetails + Soc.Env.Issues, family = binomial(logit),
data = casino)
Deviance Residuals:
Min 1Q Median 3Q Max
-1.0633 -0.7248 -0.5722 -0.3264 2.7136
Coefficients:
@inkhorn
inkhorn / neither.casino.glm.r
Created May 17, 2013 19:00
neither casino glm
Call:
glm(formula = casino$Q6 == "Neither" ~ GoBigorGoHome + TechnicalDetails +
Soc.Env.Issues, family = binomial(logit), data = casino)
Deviance Residuals:
Min 1Q Median 3Q Max
-2.4090 -0.7344 -0.3934 0.8966 2.7194
Coefficients:
Estimate Std. Error z value Pr(>|z|)