djokester/QuoraAnswered4.py

## QuoraAnswered4.py
from nltk import word_tokenize
from nltk.corpus import stopwords
from gensim import models
from gensim.models.doc2vec import TaggedDocument
#Function for normalizing paragraphs.
def normalize(string):
    lst = word_tokenize(string)
    lst =[word.lower() for word in lst if word.isalpha()]
    lst = [w for w in lst if not w in stopwords.words('english')]
    return(lst)
# Aggregate questions under each topic tag as a paragraph.
# Normalize the paragraph
# Feed the normalized paragraph along with the topic tag into Gensim's Tagged Document function.
# Append the return value to docs.
docs = []
for index, item in enumerate(topic_list):
    question = " ".join(question_list[index])
    question = normalize(question)
    docs.append(TaggedDocument(words=question, tags=[item]))
	from nltk import word_tokenize
	from nltk.corpus import stopwords
	from gensim import models
	from gensim.models.doc2vec import TaggedDocument
	#Function for normalizing paragraphs.
	def normalize(string):
	lst = word_tokenize(string)
	lst =[word.lower() for word in lst if word.isalpha()]
	lst = [w for w in lst if not w in stopwords.words('english')]
	return(lst)
	# Aggregate questions under each topic tag as a paragraph.
	# Normalize the paragraph
	# Feed the normalized paragraph along with the topic tag into Gensim's Tagged Document function.
	# Append the return value to docs.
	docs = []
	for index, item in enumerate(topic_list):
	question = " ".join(question_list[index])
	question = normalize(question)
	docs.append(TaggedDocument(words=question, tags=[item]))