Roopal Garg roopalgarg

## gensim_bigram_word_emb.txt
from gensim.models import word2vec

bigram_to_search = "hello_there"

def bigram2vec(unigrams, bigram_to_search):
    bigrams = Phrases(unigrams)
    model = word2vec.Word2Vec(bigrams[unigrams])
    if bigram_to_search in model.vocab.keys():
        return model[bigram_to_search]
    else:

## nltk_stanford_nlp.py
import os
from nltk.parse.stanford import StanfordParser, StanfordDependencyParser

os.environ["CLASSPATH"]= "/usr/local/stanford-models/stanford-postagger-full-2016-10-31/:usr/local/stanford-models/stanford-ner-2016-10-31/:/usr/local/stanford-models/stanford-parser-full-2016-10-31/"
os.environ["STANFORD_MODELS"]= "/usr/local/stanford-models/stanford-postagger-full-2016-10-31/models:/usr/local/stanford-models/stanford-ner-2016-10-31/classifiers"

stan_parser = StanfordParser()
stan_dep_parser = StanfordDependencyParser()

sents = ["The Mavericks won against the Jets", "Golden State Warriors thrashed LA Lakers"]

## nltk twitter corpus
from nltk.corpus import twitter_samples
from nltk.twitter.util import json2csv

twitter_samples.fileids()

strings = twitter_samples.strings('negative_tweets.json')
for string in strings[:15]:
    print(string)

## strip_accent.py
"""
these are functions from within the sklearn module
"""

def strip_accents_unicode(s):
    """Transform accentuated unicode symbols into their simple counterpart

    Warning: the python-level loop and join operations make this
    implementation 20 times slower than the strip_accents_ascii basic
    normalization.

## pandas_df_to_excel
writer = pd.ExcelWriter('filename.xlsx', engine='xlsxwriter')

# Convert the dataframe to an XlsxWriter Excel object.
df_test.to_excel(writer, sheet_name='Sheet1')

writer.save()

## deaccent
def deaccent(text):
    """
    Remove accentuation from the given string. Input text is either a unicode string or utf8 encoded bytestring.

    Return input string with accents removed, as unicode.

    >>> deaccent("Šéf chomutovských komunistů dostal poštou bílý prášek")
    u'Sef chomutovskych komunistu dostal postou bily prasek'

    """

## glove_load_vectors.py
#load glove vectors
#download them from http://nlp.stanford.edu/data/glove.6B.zip
embeddings_index = {}
GLOVE_DIR = 'glove.6B'
import os
f = open(os.path.join(GLOVE_DIR, 'glove.6B.%id.txt' %EMBEDDING_DIM))
for line in f:
    values = line.split()
    word = values[0]
    coefs = np.asarray(values[1:], dtype='float32')

## tensorflow-debug-gradients.py
with tf.name_scope("train_op"):
	trainables = tf.trainable_variables()

	print "trainables", trainables
	for train_obj in trainables:
		print train_obj.name, train_obj.get_shape()
		print "grad", tf.gradients(self.loss, [train_obj])

	grads = tf.gradients(self.loss, trainables)

## tensorflow-add-noise.py
import tensorflow as tf
import numpy as np


def gaussian_noise_layer(input_layer, std):
    noise = tf.random_normal(shape=tf.shape(input_layer), mean=0.0, stddev=std, dtype=tf.float32)
    return input_layer + noise


inp = tf.placeholder(tf.float32, shape=[None, 8], name='input')
	from gensim.models import word2vec

	bigram_to_search = "hello_there"

	def bigram2vec(unigrams, bigram_to_search):
	bigrams = Phrases(unigrams)
	model = word2vec.Word2Vec(bigrams[unigrams])
	if bigram_to_search in model.vocab.keys():
	return model[bigram_to_search]
	else:
	import os
	from nltk.parse.stanford import StanfordParser, StanfordDependencyParser

	os.environ["CLASSPATH"]= "/usr/local/stanford-models/stanford-postagger-full-2016-10-31/:usr/local/stanford-models/stanford-ner-2016-10-31/:/usr/local/stanford-models/stanford-parser-full-2016-10-31/"
	os.environ["STANFORD_MODELS"]= "/usr/local/stanford-models/stanford-postagger-full-2016-10-31/models:/usr/local/stanford-models/stanford-ner-2016-10-31/classifiers"

	stan_parser = StanfordParser()
	stan_dep_parser = StanfordDependencyParser()

	sents = ["The Mavericks won against the Jets", "Golden State Warriors thrashed LA Lakers"]
	from nltk.corpus import twitter_samples
	from nltk.twitter.util import json2csv

	twitter_samples.fileids()

	strings = twitter_samples.strings('negative_tweets.json')
	for string in strings[:15]:
	print(string)
	"""
	these are functions from within the sklearn module
	"""

	def strip_accents_unicode(s):
	"""Transform accentuated unicode symbols into their simple counterpart

	Warning: the python-level loop and join operations make this
	implementation 20 times slower than the strip_accents_ascii basic
	normalization.
	writer = pd.ExcelWriter('filename.xlsx', engine='xlsxwriter')

	# Convert the dataframe to an XlsxWriter Excel object.
	df_test.to_excel(writer, sheet_name='Sheet1')

	writer.save()
	def deaccent(text):
	"""
	Remove accentuation from the given string. Input text is either a unicode string or utf8 encoded bytestring.

	Return input string with accents removed, as unicode.

	>>> deaccent("Šéf chomutovských komunistů dostal poštou bílý prášek")
	u'Sef chomutovskych komunistu dostal postou bily prasek'

	"""
	#load glove vectors
	#download them from http://nlp.stanford.edu/data/glove.6B.zip
	embeddings_index = {}
	GLOVE_DIR = 'glove.6B'
	import os
	f = open(os.path.join(GLOVE_DIR, 'glove.6B.%id.txt' %EMBEDDING_DIM))
	for line in f:
	values = line.split()
	word = values[0]
	coefs = np.asarray(values[1:], dtype='float32')
	with tf.name_scope("train_op"):
	trainables = tf.trainable_variables()

	print "trainables", trainables
	for train_obj in trainables:
	print train_obj.name, train_obj.get_shape()
	print "grad", tf.gradients(self.loss, [train_obj])

	grads = tf.gradients(self.loss, trainables)
	import tensorflow as tf
	import numpy as np


	def gaussian_noise_layer(input_layer, std):
	noise = tf.random_normal(shape=tf.shape(input_layer), mean=0.0, stddev=std, dtype=tf.float32)
	return input_layer + noise


	inp = tf.placeholder(tf.float32, shape=[None, 8], name='input')