This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from gensim.models import word2vec | |
bigram_to_search = "hello_there" | |
def bigram2vec(unigrams, bigram_to_search): | |
bigrams = Phrases(unigrams) | |
model = word2vec.Word2Vec(bigrams[unigrams]) | |
if bigram_to_search in model.vocab.keys(): | |
return model[bigram_to_search] | |
else: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
from nltk.parse.stanford import StanfordParser, StanfordDependencyParser | |
os.environ["CLASSPATH"]= "/usr/local/stanford-models/stanford-postagger-full-2016-10-31/:usr/local/stanford-models/stanford-ner-2016-10-31/:/usr/local/stanford-models/stanford-parser-full-2016-10-31/" | |
os.environ["STANFORD_MODELS"]= "/usr/local/stanford-models/stanford-postagger-full-2016-10-31/models:/usr/local/stanford-models/stanford-ner-2016-10-31/classifiers" | |
stan_parser = StanfordParser() | |
stan_dep_parser = StanfordDependencyParser() | |
sents = ["The Mavericks won against the Jets", "Golden State Warriors thrashed LA Lakers"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from nltk.corpus import twitter_samples | |
from nltk.twitter.util import json2csv | |
twitter_samples.fileids() | |
strings = twitter_samples.strings('negative_tweets.json') | |
for string in strings[:15]: | |
print(string) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
these are functions from within the sklearn module | |
""" | |
def strip_accents_unicode(s): | |
"""Transform accentuated unicode symbols into their simple counterpart | |
Warning: the python-level loop and join operations make this | |
implementation 20 times slower than the strip_accents_ascii basic | |
normalization. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
writer = pd.ExcelWriter('filename.xlsx', engine='xlsxwriter') | |
# Convert the dataframe to an XlsxWriter Excel object. | |
df_test.to_excel(writer, sheet_name='Sheet1') | |
writer.save() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def deaccent(text): | |
""" | |
Remove accentuation from the given string. Input text is either a unicode string or utf8 encoded bytestring. | |
Return input string with accents removed, as unicode. | |
>>> deaccent("Šéf chomutovských komunistů dostal poštou bílý prášek") | |
u'Sef chomutovskych komunistu dostal postou bily prasek' | |
""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#load glove vectors | |
#download them from http://nlp.stanford.edu/data/glove.6B.zip | |
embeddings_index = {} | |
GLOVE_DIR = 'glove.6B' | |
import os | |
f = open(os.path.join(GLOVE_DIR, 'glove.6B.%id.txt' %EMBEDDING_DIM)) | |
for line in f: | |
values = line.split() | |
word = values[0] | |
coefs = np.asarray(values[1:], dtype='float32') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
with tf.name_scope("train_op"): | |
trainables = tf.trainable_variables() | |
print "trainables", trainables | |
for train_obj in trainables: | |
print train_obj.name, train_obj.get_shape() | |
print "grad", tf.gradients(self.loss, [train_obj]) | |
grads = tf.gradients(self.loss, trainables) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import tensorflow as tf | |
import numpy as np | |
def gaussian_noise_layer(input_layer, std): | |
noise = tf.random_normal(shape=tf.shape(input_layer), mean=0.0, stddev=std, dtype=tf.float32) | |
return input_layer + noise | |
inp = tf.placeholder(tf.float32, shape=[None, 8], name='input') |