Skip to content

Instantly share code, notes, and snippets.

@lievcin
Created February 6, 2018 11:59
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save lievcin/cd18d636d1cce6be40b413647e834cfa to your computer and use it in GitHub Desktop.
Save lievcin/cd18d636d1cce6be40b413647e834cfa to your computer and use it in GitHub Desktop.
def crossValidate(dataset, folds):
shuffle(dataset)
predictions = []
ground_truth = []
foldSize = int(len(dataset)/folds)
#preProcess and tokenize once!
dataset = [(t[0], toFeatureVector(preProcess(t[1])), t[2]) for t in dataset]
for i in range(0,len(dataset), foldSize):
trainFolds = dataset[:i] + dataset[i+foldSize:]
validationFold = dataset[i: i+foldSize]
training_set = [(t[1], t[2]) for t in trainFolds]
classifier = trainClassifier(training_set)
validation_set = [(t[0], t[1]) for t in validationFold]
predictions.append(predictLabels(validationFold, classifier))
ground_truth.append([ l[2] for l in validationFold])
return ground_truth, predictions
def predictLabels(reviewSamples, classifier):
return classifier.classify_many(map(lambda t: t[1], reviewSamples))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment