AyishaR/spam_ham_preprocess_functions.py

## spam_ham_preprocess_functions.py
# Remove html tags
def removeHTML(sentence):
    regex = re.compile('<.*?>')
    return re.sub(regex, ' ', sentence)

# Remove URLs
def removeURL(sentence):
    regex = re.compile('http[s]?://\S+')
    return re.sub(regex, ' ', sentence)

# remove numbers, punctuation and any special characters (keep only alphabets)
def onlyAlphabets(sentence):
    regex = re.compile('[^a-zA-Z]')
    return re.sub(regex, ' ', sentence)

def removeRecurring(sentence):
    return re.sub(r'(.)\1{2,}', r'\1', sentence)

# Defining stopwords
stop = nltk.corpus.stopwords.words('english')
	# Remove html tags
	def removeHTML(sentence):
	regex = re.compile('<.*?>')
	return re.sub(regex, ' ', sentence)

	# Remove URLs
	def removeURL(sentence):
	regex = re.compile('http[s]?://\S+')
	return re.sub(regex, ' ', sentence)

	# remove numbers, punctuation and any special characters (keep only alphabets)
	def onlyAlphabets(sentence):
	regex = re.compile('[^a-zA-Z]')
	return re.sub(regex, ' ', sentence)

	def removeRecurring(sentence):
	return re.sub(r'(.)\1{2,}', r'\1', sentence)

	# Defining stopwords
	stop = nltk.corpus.stopwords.words('english')