-
-
Save bertomartin/c4434610cd5ff8936d658726bd61982f to your computer and use it in GitHub Desktop.
Simple logistic model
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
selected_words = [ | |
'receipt', | |
'card', | |
'refund', | |
'month', | |
'monthly', | |
'plan', | |
'profit', | |
'charged', | |
'charge', | |
'digits', | |
'visa', | |
'money', | |
'upgrade', | |
'upgraded', | |
'pay', | |
'bill', | |
'billing', | |
'billed', | |
'cancel', | |
'cancelled', | |
'accounts', | |
'credit', | |
'year', | |
'bank', | |
'discount', | |
'small' | |
] | |
import re | |
threads['words'] = threads['body'].apply(lambda body: re.sub("<[^>]*>", "", body)) | |
#remove punctuation, whitespace and lowercase it all | |
threads['words'] = threads['words'].apply(lambda words: re.sub("[\W\d]", " ", words.lower().strip())) | |
threads_tfidf = graphlab.text_analytics.tf_idf(threads['words']) | |
threads['top10'] = threads_tfidf['docs'].apply(lambda t: " ".join(sorted(t, key=t.get, reverse=True)[1:10])) | |
threads['word_count'] = graphlab.text_analytics.count_words(threads['words']) | |
def selected_word_count(word, counts): | |
if word in counts: | |
return counts[word] | |
else: | |
return 0 | |
results = {} | |
for selected_word in selected_words: | |
threads[selected_word] = threads['word_count'].apply(lambda words: selected_word_count(selected_word, words)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
billing_model = graphlab.logistic_classifier.create(train_data, | |
target='area_billing', | |
features=selected_words, | |
validation_set=test_data) | |
billing_model.evaluate(test_threads) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment