Created
November 3, 2015 00:07
-
-
Save michael-erasmus/43b451166d9c11ea134e to your computer and use it in GitHub Desktop.
Simple logistic model
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
selected_words = [ | |
'receipt', | |
'card', | |
'refund', | |
'month', | |
'monthly', | |
'plan', | |
'profit', | |
'charged', | |
'charge', | |
'digits', | |
'visa', | |
'money', | |
'upgrade', | |
'upgraded', | |
'pay', | |
'bill', | |
'billing', | |
'billed', | |
'cancel', | |
'cancelled', | |
'accounts', | |
'credit', | |
'year', | |
'bank', | |
'discount', | |
'small' | |
] | |
import re | |
threads['words'] = threads['body'].apply(lambda body: re.sub("<[^>]*>", "", body)) | |
#remove punctuation, whitespace and lowercase it all | |
threads['words'] = threads['words'].apply(lambda words: re.sub("[\W\d]", " ", words.lower().strip())) | |
threads_tfidf = graphlab.text_analytics.tf_idf(threads['words']) | |
threads['top10'] = threads_tfidf['docs'].apply(lambda t: " ".join(sorted(t, key=t.get, reverse=True)[1:10])) | |
threads['word_count'] = graphlab.text_analytics.count_words(threads['words']) | |
def selected_word_count(word, counts): | |
if word in counts: | |
return counts[word] | |
else: | |
return 0 | |
results = {} | |
for selected_word in selected_words: | |
threads[selected_word] = threads['word_count'].apply(lambda words: selected_word_count(selected_word, words)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
billing_model = graphlab.logistic_classifier.create(train_data, | |
target='area_billing', | |
features=selected_words, | |
validation_set=test_data) | |
billing_model.evaluate(test_threads) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment