Skip to content

Instantly share code, notes, and snippets.

@cxtadment
Last active April 18, 2016 17:55
Show Gist options
  • Save cxtadment/2e364d957f5fd586f9c6b3f9931ef7c3 to your computer and use it in GitHub Desktop.
Save cxtadment/2e364d957f5fd586f9c6b3f9931ef7c3 to your computer and use it in GitHub Desktop.
def pickle_words_features(microblogType):
microblogs = Microblog.objects(microblogType=microblogType)
all_words = []
for microblog in microblogs:
all_words.extend(microblog.words)
all_words = nltk.FreqDist(all_words)
words_features = list(all_words.keys())
with open(WORDS_FEATURES_PATH, 'wb') as output_file:
pickle.dump(words_features, output_file)
def feature_filter(document, words_features):
words = set(document)
features = {}
for w in words_features:
features[w] = (w in words)
return features
def get_feature_set(microblogType):
microblogs = Microblog.objects(microblogType=microblogType)
words_features = get_words_features_pickle()
feature_sets = [(feature_filter(microblog.words, words_features), microblog.polarity) for microblog in microblogs]
return feature_sets
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment