AyishaR/sarcasm preprocessing.py

## sarcasm preprocessing.py
sno = nltk.stem.SnowballStemmer('english')    # Initializing stemmer
wordcloud = [[], []]
all_sentences = []    # All cleaned sentences


for x in range(len(df['headline'].values)):
    headline = df['headline'].values[x]
    sarcasm = df['is_sarcastic'].values[x]

    cleaned_sentence = []
    sentence = removeURL(headline)
    sentence = removeHTML(sentence)
    sentence = onlyAlphabets(sentence)
    sentence = sentence.lower()

    for word in sentence.split():
        #if word not in stop:
            stemmed = sno.stem(word)
            cleaned_sentence.append(stemmed)

            wordcloud[sarcasm].append(word)


    all_sentences.append(' '.join(cleaned_sentence))

# add as column in dataframe
X = all_sentences
y = df['is_sarcastic']
	sno = nltk.stem.SnowballStemmer('english') # Initializing stemmer
	wordcloud = [[], []]
	all_sentences = [] # All cleaned sentences


	for x in range(len(df['headline'].values)):
	headline = df['headline'].values[x]
	sarcasm = df['is_sarcastic'].values[x]

	cleaned_sentence = []
	sentence = removeURL(headline)
	sentence = removeHTML(sentence)
	sentence = onlyAlphabets(sentence)
	sentence = sentence.lower()

	for word in sentence.split():
	#if word not in stop:
	stemmed = sno.stem(word)
	cleaned_sentence.append(stemmed)

	wordcloud[sarcasm].append(word)


	all_sentences.append(' '.join(cleaned_sentence))

	# add as column in dataframe
	X = all_sentences
	y = df['is_sarcastic']