Megha Agarwal megha444

## install.py
pip install glove_python
import nltk
nltk.download('stopwords')
nltk.download ('punkt')
nltk.download('wordnet')

## input.py
lines=  ["Hello this is a tutorial to convert word to integer" , "It is a beautiful day" , "Jack is going to office"]

## tokenize.py
from nltk.tokenize import sent_tokenize, word_tokenize
word_tokens=[]
i=0
for line in lines:
 words = word_tokenize(line)
 word_tokens.insert(i,words)
 i=i+1
print (word_tokens)

## stopword_removal.py
from nltk.corpus import stopwords
stop_words=stopwords.words('english')
lines_without_stopwords=[]
for line in lines:
  stop_removed=[]
for line in word_tokens:
  for word in line:
    if word not in stop_words:
      stop_removed.append(word)
print (stop_removed)

## lemmatize.py
from nltk import WordNetLemmatizer
from nltk.stem import WordNetLemmatizer
wordnet_lemmatizer = WordNetLemmatizer()
lines_with_lemmas=[] #stop words contain the set of stop words
for line in lines:
  lem_line=[]
for word in stop_removed:
  lem_line.append(wordnet_lemmatizer.lemmatize(word))
string=''
new_lines=','.join([str(i) for i in lem_line])

## build_model.py
#importing the glove library
from glove import Corpus, Glove
# creating a corpus object
corpus = Corpus()
#training the corpus to generate the co occurence matrix which is used in GloVe
corpus.fit(new_lines, window=10)
#creating a Glove object which will use the matrix created in the above lines to create embeddings
#We can set the learning rate as it uses Gradient Descent and number of components
glove = Glove(no_components=5, learning_rate=0.05)


## add_corpus.py
glove.add_dictionary(corpus.dictionary)

## evaluate.py
print glove.word_vectors[glove.dictionary['samsung']]

## load_data.py
with open(‘data/reviews.txt’, ‘r’) as f:
 reviews = f.read()
with open(‘data/labels.txt’, ‘r’) as f:
 labels = f.read()
print(reviews[:50])
print()
print(labels[:26])

## load_data.py
with open(‘data/reviews.txt’, ‘r’) as f:
 reviews = f.read()
with open(‘data/labels.txt’, ‘r’) as f:
 labels = f.read()
	pip install glove_python
	import nltk
	nltk.download('stopwords')
	nltk.download ('punkt')
	nltk.download('wordnet')
	from nltk.tokenize import sent_tokenize, word_tokenize
	word_tokens=[]
	i=0
	for line in lines:
	words = word_tokenize(line)
	word_tokens.insert(i,words)
	i=i+1
	print (word_tokens)
	from nltk.corpus import stopwords
	stop_words=stopwords.words('english')
	lines_without_stopwords=[]
	for line in lines:
	stop_removed=[]
	for line in word_tokens:
	for word in line:
	if word not in stop_words:
	stop_removed.append(word)
	print (stop_removed)
	from nltk import WordNetLemmatizer
	from nltk.stem import WordNetLemmatizer
	wordnet_lemmatizer = WordNetLemmatizer()
	lines_with_lemmas=[] #stop words contain the set of stop words
	for line in lines:
	lem_line=[]
	for word in stop_removed:
	lem_line.append(wordnet_lemmatizer.lemmatize(word))
	string=''
	new_lines=','.join([str(i) for i in lem_line])
	#importing the glove library
	from glove import Corpus, Glove
	# creating a corpus object
	corpus = Corpus()
	#training the corpus to generate the co occurence matrix which is used in GloVe
	corpus.fit(new_lines, window=10)
	#creating a Glove object which will use the matrix created in the above lines to create embeddings
	#We can set the learning rate as it uses Gradient Descent and number of components
	glove = Glove(no_components=5, learning_rate=0.05)
	with open(‘data/reviews.txt’, ‘r’) as f:
	reviews = f.read()
	with open(‘data/labels.txt’, ‘r’) as f:
	labels = f.read()
	print(reviews[:50])
	print()
	print(labels[:26])