Last active
January 23, 2020 05:06
-
-
Save c-l-nguyen/4c31e6ed367290b9d2c4682f7017e985 to your computer and use it in GitHub Desktop.
stopwords used for wordcloud
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# define stopwords to remove in word cloud | |
# some of these words were not useful in describing an area | |
# others were just so obvious that they appeared too much to be useful | |
stopwords = set(STOPWORDS) | |
unigram_stopwords = ["clean", "nice", "perfect", "austin", "great", "place", "stay", "definitely", | |
"would", "host", "house", "location", "home", "beautiful", "highly", "recommend", | |
"comfortable", "space", "would", "us", "everything", "'s", "bed", "amazing", | |
"room", "apartment", "ing", "really", "loved", "wonderful", "good", "needed", | |
"time", "thank", "need", "ed"] | |
bigram_stopwords = ["would_recommend", "definitely_recommend", "highly_recommend", "great_place", | |
"great_stay", "great_host", "would_definitely", "definitely_stay", "place_stay", | |
"great_location", "would_stay", "place_great", "location_great", "highly_recommended", | |
"clean_comfortable", "within_walking", "walking_distance", "stay_austin", | |
"house_clean", "place_clean", "recommend_staying", "everything_needed", | |
"would_highly", "recommend_place", "made_us", "us_feel", "nice_place"] | |
stopwords.update(unigram_stopwords) | |
stopwords.update(bigram_stopwords) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment