Last active
March 8, 2019 20:44
-
-
Save aaronkub/257a1bd9215da3a7221148600d849450 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
REPLACE_NO_SPACE = re.compile("[.;:!\'?,\"()\[\]]") | |
REPLACE_WITH_SPACE = re.compile("(<br\s*/><br\s*/>)|(\-)|(\/)") | |
def preprocess_reviews(reviews): | |
reviews = [REPLACE_NO_SPACE.sub("", line.lower()) for line in reviews] | |
reviews = [REPLACE_WITH_SPACE.sub(" ", line) for line in reviews] | |
return reviews | |
reviews_train_clean = preprocess_reviews(reviews_train) | |
reviews_test_clean = preprocess_reviews(reviews_test) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment