You can clone with HTTPS or SSH.
#File created 1/31/13 #contains R code to #-read in Kaggle Competition Titanic Data csv file #-create a simple logistic regression model #-make predictions on training and test data #-write out test predictions to csv file # #Replace the <your path here> with the full path to your copy of train and test csv files. ################################################################################### #create a Kaggle account http://www.kaggle.com/account/register #read and agree to the rules if you choose to continue #enter the Kaggle Titantic Competition http://www.kaggle.com/c/titanic-gettingStarted #download train.csv and test.csv #obtain-download R from http://www.r-project.org/ #you will have to choose a ‘mirror’ or site – usually a university or research site #read the training data into a dataframe called train train<- read.table(“C:/Users/<your path here>/train.csv”, header = TRUE, sep = “,”) #set the pclass, passengers pseudoclass, to be ordered categorical train$pclass <-factor(train$pclass,levels = c(3, 2, 1), ordered = TRUE) #create a truth vector of survival results from training S = train$survived == 1 #read the test data into a dataframe named test test<- read.table(“C:/Users/<your path here>/test.csv”, header = TRUE, sep = “,”) #pclass is categorical for test data also test$pclass <-factor(test$pclass,levels = c(3, 2, 1), ordered = TRUE) #create a super simple logistic regression model with the training data #predicting survival based on passenger class and sex logistic.model <- glm(survived ~ pclass + sex, family = binomial(), data=train) #generate predictions for training data using the predict method of the logistic model training_predictions <- predict(logistic.model, type = “response”) #compute training error use an outcome cutoff at 0.5 training_error <-sum((training_predictions >= 0.5) != S)/nrow(train) training_error 1-training_error #training error for predictions in {0,1} test_predictions = predict(logistic.model, test, type = “response”) #using a probability cutoff of 0.5 for outcome of survived, default missing to deceased test_predictions[test_predictions >=0.5] <- 1 test_predictions[ test_predictions != 1] <- 0 test_predictions[is.na(test_predictions)] <- 0 #write out the test_predictions to a comma separated value, csv, file write.table(test_predictions, “C:/Users/<your path here>/predictions.csv”,col.names = F,row.names=F,quote=FALSE) #submit your predictions.csv file to Kaggle.com to view the resulting test data score
Super Simple Logistic Regression for Kaggle.com Titantic Competition
Super Simple Logistic Regression for Kaggle.com Titantic Competition