Skip to content

Instantly share code, notes, and snippets.

@Joshuaek
Created October 31, 2017 20:40
Show Gist options
  • Save Joshuaek/87eaec01526b86a29a20a88fa11ddd67 to your computer and use it in GitHub Desktop.
Save Joshuaek/87eaec01526b86a29a20a88fa11ddd67 to your computer and use it in GitHub Desktop.
from sklearn.neural_network import MLPRegressor
from nltk.stem.lancaster import LancasterStemmer
from nltk.corpus import stopwords
import re
from operator import itemgetter
def prepareSentence(s):
stemmer = LancasterStemmer()
ignore_words = set(stopwords.words('english'))
regpattern = re.compile('[\W_]+" "')
s = re.sub('[^A-Za-z ]+', '', s)
words = nltk.word_tokenize(s.lower())
return [stemmer.stem(w.lower()) for w in words if w not in ignore_words]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment