Created
February 27, 2019 21:35
-
-
Save talfco/0476b9e68ca57bda60af227441cf6221 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
import re | |
import unicodedata | |
def normalize_unicode_to_ascii(data): | |
normal = unicodedata.normalize('NFKD', data).encode('ASCII', 'ignore') | |
val = normal.decode("utf-8") | |
val = val.lower() | |
# remove special characters | |
val = re.sub('[^A-Za-z0-9 ]+', ' ', val) | |
# remove multiple spaces | |
val = re.sub(' +', ' ', val) | |
return val | |
def sort_words(words): | |
words = words.split(" ") | |
words.sort() | |
newSentence = " ".join(words) | |
return newSentence |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment