Skip to content

Instantly share code, notes, and snippets.

View amn41's full-sized avatar

Alan Nichol amn41

View GitHub Profile
import numpy as np
def sum_vecs(embed,text):
tokens = text.split(' ')
vec = np.zeros(embed.W.shape[1])
for idx, term in enumerate(tokens):
if term in embed.vocab:
vec = vec + embed.W[embed.vocab[term], :]
import numpy as np
from sklearn.svm import SVC
from sklearn.decomposition import PCA
from sklearn.cross_validation import train_test_split
from sklearn.grid_search import GridSearchCV
from sklearn.metrics import classification_report
import matplotlib.pyplot as plt
import pickle
\emph{hello}
import sys, os
from mitie import *
sample = ner_training_instance(["I", "am", "looking", "for", "some", "cheap", "Mexican", "food", "."])
sample.add_entity(xrange(5,6), "pricerange")
sample.add_entity(xrange(6,7), "cuisine")
# And we add another training example
sample2 = ner_training_instance(["show", "me", "indian", "restaurants", "in", "the", "centre", "."])
sample2.add_entity(xrange(2,3), "cuisine")
import sys, os
from mitie import *
trainer = text_categorizer_trainer("/path/to/total_word_feature_extractor.dat")
data = {} # same as before - omitted for brevity
for label in training_examples.keys():
for text in training_examples[label]["examples"]:
tokens = tokenize(text)
vocab_file ="/path/to/vocab_file"
vectors_file ="/path/to/vectors_file"
embed = Embedding(vocab_file,vectors_file)
cuisine_refs = ["mexican","chinese","french","british","american"]
threshold = 0.2
text = "I want to find an indian restaurant"
def find_similar_words(embed,text,refs,thresh):
C = np.zeros((len(refs),embed.W.shape[1]))
for idx, term in enumerate(refs):
if term in embed.vocab:
C[idx,:] = embed.W[embed.vocab[term], :]
tokens = text.split(' ')
@amn41
amn41 / embedding.py
Last active October 17, 2017 01:19
class Embedding(object):
def __init__(self,vocab_file,vectors_file):
with open(vocab_file, 'r') as f:
words = [x.rstrip().split(' ')[0] for x in f.readlines()]
with open(vectors_file, 'r') as f:
vectors = {}
for line in f:
vals = line.rstrip().split(' ')
vectors[vals[0]] = [float(x) for x in vals[1:]]
while ( not formData.is_complete() ):
questionKey = formData.first_missing_field()
ask(questions[questionKey])
# I like using seaborn, but of course you can also just use this as a set of colours.
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
# from seaborn docs
def sinplot(flip=1):
x = np.linspace(0, 14, 100)
for i in range(1, 7):