Analysis code for looking at the relationship between project goal and % success from #SciFund. Note, I did it with my own private data set that has a lot of merged fields in it, but you can run this analysis or something like it with Zen's data at http:
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
source("./sciFundFunctions.r") | |
library(gridExtra) | |
################################################# | |
#######HISTOGRAMS | |
################################################# | |
#total raised histogram distribution | |
raised<-qplot(total, data=projects) + | |
theme_bw(base_size=16) + | |
xlab("\nTotal Raised") + | |
ylab("Count") | |
mean(projects$total) | |
median(projects$total) | |
#total asked for | |
asked<-qplot(Goal, data=projects)+ | |
theme_bw(base_size=16) + | |
xlab("\nTotal Asked For") + | |
ylab("Count") | |
mean(projects$Goal) | |
median(projects$Goal) | |
################################################# | |
#####goal influence | |
################################################# | |
a<-qplot(Goal, percent,data=projects, size=I(4))+ | |
theme_bw(base_size=16) + | |
xlab("\nGoal") + | |
ylab("Percent Raised") + | |
geom_vline(xintercept=7500, colour="red", lty=2, size=2) + | |
geom_text(aes(x=15000, y=150, label="Zone of Failure?"), size=8) | |
b<-qplot(Goal, percent,data=projects, size=I(4))+ | |
theme_bw(base_size=16) + | |
xlab("\nGoal") + | |
ylab("Percent Raised") + | |
stat_smooth(method="glm", family=gaussian(link="log"), size=2) | |
grid.arrange(a,b,ncol=2) | |
##ANALYSIS OF CURVES | |
aglm<-glm(percent ~ Goal, family=gaussian(link="log"), data=projects) | |
Anova(aglm) | |
summary(aglm) | |
summary(lm(percent ~ aglm$fitted.values , data=projects))$r.squared #r2=0.22 | |
####BOX | |
#get the likelihood of the data given some breakpoint | |
boxFun<-function(params){ | |
breakpoint<-params[1] | |
sdlog<-abs(params[2]) | |
sd3<-abs(params[3]) | |
prec<-projects$percent | |
idx<-which(projects$Goal>breakpoint) | |
-2*sum(dlnorm(prec[-idx], mean(prec[-idx]), sdlog, log=T))+sum(dlnorm(prec[idx], 0, sd3, log=T)) | |
} | |
#brute force solve - yes, it's brute force, but, optim was being squirely with my function above | |
x<-7500 | |
m<- boxFun(c(x,5,6)) | |
for(y in seq(1,100,.1)){ | |
for(z in seq(1,100,.1)){ | |
a<-boxFun(c(x,y,z)) | |
if(a<m) { | |
b<-c(x,y, z) | |
m<-a | |
} | |
}} | |
######AIC comparison | |
m+2*2 - AICc(aglm) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment