Skip to content

Instantly share code, notes, and snippets.

@flxw
Last active January 11, 2018 11:46
Show Gist options
  • Save flxw/5a0d3dda72477ec628ee287f515a7c9c to your computer and use it in GitHub Desktop.
Save flxw/5a0d3dda72477ec628ee287f515a7c9c to your computer and use it in GitHub Desktop.
source('load_data.R')
d = read_and_preprocess_data_file('data/BADS_WS1718_known.csv')
d = subset(d, select = -c(delivery_date)) # remove NAs
classdata = read_and_preprocess_data_file('data/BADS_WS1718_class.csv')
classdata = subset(classdata, select = -c(delivery_date)) # remove NAs
# train the final model with 632 bootstrapping
for (iter in 1:400) {
# sample with replacement here - to understand why please refer to the book
sampled_order_ids = sample(nrow(d), replace = TRUE)
sampled_order_ids = unique(sampled_order_ids)
training_set = d[sampled_order_ids,]
test_set = d[-sampled_order_ids,]
probs = append(probs, nrow(training_set)/nrow(d))
# train the model here with the training set, be sure to always train the same model,
# and not discard and continuously start at 0
# test the model accuracy with the test set and
# append it to the log
accs = append(accs, accuracy)
}
# plot accuracies to see change with higher number of iterations
plot(x=1:length(accs), y=accs, type='p')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment