Skip to content

Instantly share code, notes, and snippets.

@vijayanandrp
Created November 30, 2017 14:44
Show Gist options
  • Save vijayanandrp/1cc8faf76aa5e2aac30b5054a2ebd56d to your computer and use it in GitHub Desktop.
Save vijayanandrp/1cc8faf76aa5e2aac30b5054a2ebd56d to your computer and use it in GitHub Desktop.
def train_and_test(train_percent=0.80):
    feature_set = prepare_data_set()
    validate_data_set(feature_set)
    random.shuffle(feature_set)
    total = len(feature_set)
    cut_point = int(total * train_percent)
    # splitting Dataset into train and test
    train_set = feature_set[:cut_point]
    test_set = feature_set[cut_point:]

    # fitting feature matrix to the model
    classifier = NaiveBayesClassifier.train(train_set)

    print('{} Accuracy- {}'.format('Naive Bayes', classify.accuracy(classifier, test_set)))
    print('Most informative features')
    informative_features = classifier.most_informative_features(n=5)
    for feature in informative_features:
        print("\t {} = {} ".format(*feature))
    return classifier
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment