text preprocessing elmo
# remove punctuation marks | |
punctuation = '!"#$%&()*+-/:;<=>?@[\\]^_`{|}~' | |
train['clean_tweet'] = train['clean_tweet'].apply(lambda x: ''.join(ch for ch in x if ch not in set(punctuation))) | |
test['clean_tweet'] = test['clean_tweet'].apply(lambda x: ''.join(ch for ch in x if ch not in set(punctuation))) | |
# convert text to lowercase | |
train['clean_tweet'] = train['clean_tweet'].str.lower() | |
test['clean_tweet'] = test['clean_tweet'].str.lower() | |
# remove numbers | |
train['clean_tweet'] = train['clean_tweet'].str.replace("[0-9]", " ") | |
test['clean_tweet'] = test['clean_tweet'].str.replace("[0-9]", " ") | |
# remove whitespaces | |
train['clean_tweet'] = train['clean_tweet'].apply(lambda x:' '.join(x.split())) | |
test['clean_tweet'] = test['clean_tweet'].apply(lambda x: ' '.join(x.split())) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment