/GATech.r

## GATech.r
#Read in data
dataAdmit<-as.data.frame(read.csv("fall16.csv.txt"))

#Clean data for ones with decisions
dataAdmitClean<- dataAdmit[!(is.na(dataAdmit$Decision) | dataAdmit$Decision==""), ]

#Rather than try and replace the NA's...for no other reason than time,
#I'll just remove them. In practice I might try regressing to predict what
#their values should be. I'd feel more comfortable with more features.
dataAdmitClean<-dataAdmitClean[complete.cases(dataAdmitClean),]

#Now we have only complete data on which to train a model.
#We have so few data points that are complete so the validity will be questionable.
#As the semesters progress and as reports come in, and as feature reporting becomes standard,
#I expect accuracy will increase. I take issue with "Selective school" being <25% admission,
#when some top CS schools have a higher overall admission but do not release their CS admission rate.
#Ranking of school in CS nationally might be a better feature. Might include it later.


#***NOTE***: The subsequent model is pretty much useless.
#I started playing around with an SVM and a decision tree briefly before I realized that there was insufficient
#data about rejections specifically to make a meaningful predictive model.
#Perhaps with more data about rejections, meaningful predictions could be made.

install.packages('e1071', dependencies = TRUE)
library("e1071")
dataAdmitClean
#Here I assumed dates were not significant factors, everybody would have 3 letters in, and that selective/BigTech was sufficient
admit_model <- svm(formula=Decision ~ GPA+ Selective+ BigTech, data=dataAdmitClean)
summary(admit_model)
	#Read in data
	dataAdmit<-as.data.frame(read.csv("fall16.csv.txt"))

	#Clean data for ones with decisions
	dataAdmitClean<- dataAdmit[!(is.na(dataAdmit$Decision) \| dataAdmit$Decision==""), ]

	#Rather than try and replace the NA's...for no other reason than time,
	#I'll just remove them. In practice I might try regressing to predict what
	#their values should be. I'd feel more comfortable with more features.
	dataAdmitClean<-dataAdmitClean[complete.cases(dataAdmitClean),]

	#Now we have only complete data on which to train a model.
	#We have so few data points that are complete so the validity will be questionable.
	#As the semesters progress and as reports come in, and as feature reporting becomes standard,
	#I expect accuracy will increase. I take issue with "Selective school" being <25% admission,
	#when some top CS schools have a higher overall admission but do not release their CS admission rate.
	#Ranking of school in CS nationally might be a better feature. Might include it later.


	#*NOTE*: The subsequent model is pretty much useless.
	#I started playing around with an SVM and a decision tree briefly before I realized that there was insufficient
	#data about rejections specifically to make a meaningful predictive model.
	#Perhaps with more data about rejections, meaningful predictions could be made.

	install.packages('e1071', dependencies = TRUE)
	library("e1071")
	dataAdmitClean
	#Here I assumed dates were not significant factors, everybody would have 3 letters in, and that selective/BigTech was sufficient
	admit_model <- svm(formula=Decision ~ GPA+ Selective+ BigTech, data=dataAdmitClean)
	summary(admit_model)