-
-
Save codecademydev/396908adf01e0ce2c06ee0f86cf45ca9 to your computer and use it in GitHub Desktop.
Codecademy export
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# These are the emails you will be censoring. The open() function is opening the text file that the emails are contained in and the .read() method is allowing us to save their contexts to the following variables: | |
email_one = open("email_one.txt", "r").read() | |
email_two = open("email_two.txt", "r").read() | |
email_three = open("email_three.txt", "r").read() | |
email_four = open("email_four.txt", "r").read() | |
proprietary_terms = ["she", "personality matrix", "sense of self", "self-preservation", "learning algorithm", "her", "herself"] | |
negative_words = ["concerned", "behind", "danger", "dangerous", "alarming", "alarmed", "out of control", "help", "unhappy", "bad", "upset", "awful", "broken", "damage", "damaging", "dismal", "distressed", "distressed", "concerning", "horrible", "horribly", "questionable"] | |
def cout1(content): | |
content = content.replace("learning algorithms", "...") | |
print(content) | |
# cout1(email_one) | |
def cout2(content, blacklist): | |
for word in blacklist: | |
content = content.replace(word, "...") | |
return content | |
# print(cout2(email_two, proprietary_terms)) | |
def cleaner(content, blacklist): | |
count = 0 | |
for word in content.split(): | |
if (word in blacklist): | |
content = content.replace(word, "^^"+word+"^^") | |
count += 1 | |
if (word in blacklist and count > 2): | |
content = content.replace(word, "."*len(word)) | |
return content | |
# print(cleaner(email_three, negative_words)) | |
def bleach(original_msg, blist1, blist2): | |
''' | |
On pass 1 thru <word_list>, strike through all banned phrases. | |
On pass 2, strike through words before and after existing strikes. | |
:param original_msg: email to sanitize | |
:param blist1: proprietary terms | |
:param blist2: negative words | |
:return: reconstructed email | |
''' | |
punc_list = [".", ",", "!", "?"] | |
ban_list = blist1 + blist2 | |
word_list = [] | |
for word in original_msg.split(): | |
word_list.append(word) | |
for idx, word in enumerate(word_list): # pass 1 | |
for banned_phrase in ban_list: | |
banned_words = banned_phrase.split() | |
for banned_word in banned_words: | |
for punc in punc_list: | |
if (banned_word == word.strip(punc).lower()): | |
word_list[idx] = word_list[idx].replace(word, "-" * len(word)) | |
parse_next = True | |
for idx, word in enumerate(word_list): # pass 2 | |
if (not parse_next): | |
parse_next = True | |
continue | |
if ("--" in word and 0 < idx < len(word_list) - 1 and parse_next): | |
word_list[idx - 1] = "-" * len(word_list[idx - 1]) | |
word_list[idx] = "+" * len(word_list[idx]) | |
word_list[idx + 1] = "-" * len(word_list[idx + 1]) | |
parse_next = False | |
return " ".join(word_list) | |
print(bleach(email_four, proprietary_terms, negative_words)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment