AyishaR/question_cleaning_text.py

## question_cleaning_text.py
sno = nltk.stem.SnowballStemmer('english')    # Initializing stemmer
wordcloud = [[], [], [], [], [], [], []]
all_sentences = []    # All cleaned sentences


for x in range(len(df['Questions'].values)):
    question = df['Questions'].values[x]
    classname = df['Category0'].values[x]

    cleaned_sentence = []
    sentence = removeURL(question)
    sentence = removeHTML(sentence)
    sentence = onlyAlphabets(sentence)
    sentence = sentence.lower()

    for word in sentence.split():
        #if word not in stop:
            stemmed = sno.stem(word)
            cleaned_sentence.append(stemmed)

            wordcloud[class_names.index(classname)].append(word)


    all_sentences.append(' '.join(cleaned_sentence))

# add as column in dataframe
X = all_sentences
	sno = nltk.stem.SnowballStemmer('english') # Initializing stemmer
	wordcloud = [[], [], [], [], [], [], []]
	all_sentences = [] # All cleaned sentences


	for x in range(len(df['Questions'].values)):
	question = df['Questions'].values[x]
	classname = df['Category0'].values[x]

	cleaned_sentence = []
	sentence = removeURL(question)
	sentence = removeHTML(sentence)
	sentence = onlyAlphabets(sentence)
	sentence = sentence.lower()

	for word in sentence.split():
	#if word not in stop:
	stemmed = sno.stem(word)
	cleaned_sentence.append(stemmed)

	wordcloud[class_names.index(classname)].append(word)


	all_sentences.append(' '.join(cleaned_sentence))

	# add as column in dataframe
	X = all_sentences