Skip to content

Instantly share code, notes, and snippets.

@codecademydev
Created April 2, 2020 02:20
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save codecademydev/396908adf01e0ce2c06ee0f86cf45ca9 to your computer and use it in GitHub Desktop.
Save codecademydev/396908adf01e0ce2c06ee0f86cf45ca9 to your computer and use it in GitHub Desktop.
Codecademy export
# These are the emails you will be censoring. The open() function is opening the text file that the emails are contained in and the .read() method is allowing us to save their contexts to the following variables:
email_one = open("email_one.txt", "r").read()
email_two = open("email_two.txt", "r").read()
email_three = open("email_three.txt", "r").read()
email_four = open("email_four.txt", "r").read()
proprietary_terms = ["she", "personality matrix", "sense of self", "self-preservation", "learning algorithm", "her", "herself"]
negative_words = ["concerned", "behind", "danger", "dangerous", "alarming", "alarmed", "out of control", "help", "unhappy", "bad", "upset", "awful", "broken", "damage", "damaging", "dismal", "distressed", "distressed", "concerning", "horrible", "horribly", "questionable"]
def cout1(content):
content = content.replace("learning algorithms", "...")
print(content)
# cout1(email_one)
def cout2(content, blacklist):
for word in blacklist:
content = content.replace(word, "...")
return content
# print(cout2(email_two, proprietary_terms))
def cleaner(content, blacklist):
count = 0
for word in content.split():
if (word in blacklist):
content = content.replace(word, "^^"+word+"^^")
count += 1
if (word in blacklist and count > 2):
content = content.replace(word, "."*len(word))
return content
# print(cleaner(email_three, negative_words))
def bleach(original_msg, blist1, blist2):
'''
On pass 1 thru <word_list>, strike through all banned phrases.
On pass 2, strike through words before and after existing strikes.
:param original_msg: email to sanitize
:param blist1: proprietary terms
:param blist2: negative words
:return: reconstructed email
'''
punc_list = [".", ",", "!", "?"]
ban_list = blist1 + blist2
word_list = []
for word in original_msg.split():
word_list.append(word)
for idx, word in enumerate(word_list): # pass 1
for banned_phrase in ban_list:
banned_words = banned_phrase.split()
for banned_word in banned_words:
for punc in punc_list:
if (banned_word == word.strip(punc).lower()):
word_list[idx] = word_list[idx].replace(word, "-" * len(word))
parse_next = True
for idx, word in enumerate(word_list): # pass 2
if (not parse_next):
parse_next = True
continue
if ("--" in word and 0 < idx < len(word_list) - 1 and parse_next):
word_list[idx - 1] = "-" * len(word_list[idx - 1])
word_list[idx] = "+" * len(word_list[idx])
word_list[idx + 1] = "-" * len(word_list[idx + 1])
parse_next = False
return " ".join(word_list)
print(bleach(email_four, proprietary_terms, negative_words))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment