Instantly share code, notes, and snippets.

Embed
What would you like to do?
import re
REPLACE_NO_SPACE = re.compile("(\.)|(\;)|(\:)|(\!)|(\')|(\?)|(\,)|(\")|(\()|(\))|(\[)|(\])")
REPLACE_WITH_SPACE = re.compile("(<br\s*/><br\s*/>)|(\-)|(\/)")
def preprocess_reviews(reviews):
reviews = [REPLACE_NO_SPACE.sub("", line.lower()) for line in reviews]
reviews = [REPLACE_WITH_SPACE.sub(" ", line) for line in reviews]
return reviews
reviews_train_clean = preprocess_reviews(reviews_train)
reviews_test_clean = preprocess_reviews(reviews_test)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment