Skip to content

Instantly share code, notes, and snippets.

@glortho
Created May 14, 2015 20:30
Show Gist options
  • Save glortho/200d6c05ffadf6e893e9 to your computer and use it in GitHub Desktop.
Save glortho/200d6c05ffadf6e893e9 to your computer and use it in GitHub Desktop.
Predict if a tweet belongs to a category(based on its text) using a pre-built[Supoort Vector Machine](http://en.wikipedia.org/wiki/Support_vector_machine)
#!/bin/sh
"exec" "twxec" "-e" "predict_tweet" "$0" "$@"
{{docstring "Predict if a tweet belongs to a category(based on its text) using a pre-built[Supoort Vector Machine](http://en.wikipedia.org/wiki/Support_vector_machine)"}}
from trickle.nner.en.nner import tokens as tokenize
import numpy as np
import csv
import importlib
import pickle
with open({{string Model_File}}) as f:
model_data = pickle.load(f)
vocab_coordspace = model_data["vocab_coordspace"]
vocab_set = set(vocab_coordspace.keys())
split_modules = model_data["feature_generator"].split(".")
tokenization_module = importlib.import_module(".".join(split_modules[:-1]))
tokenize = getattr(tokenization_module, split_modules[-1])
class_label = {{string Classification_Label}}
classifier = model_data["classifier"]
def token_vector(doc):
tokens = set(tokenize(doc)) & vocab_set
t_vec = np.zeros([len(vocab_set)])
t_vec[[vocab_coordspace[token] for token in tokens]] = 1
return(t_vec)
def predict_tweet(msg):
try:
tvec = token_vector(msg["text"])
classifications = msg.get("classifications", {})
classifications[class_label] = np.asscalar(
classifier.predict(tvec)) == 1
msg["classifications"] = classifications
except:
msg["classifications"] = msg.get("classifications", {})
return msg
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment