Last active
June 10, 2020 09:21
-
-
Save MrN00b0t/7f6838655d0963c1c15f168f80dc1bf8 to your computer and use it in GitHub Desktop.
Codecademy: Censor Dispenser
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# These are the emails you will be censoring. The open() function is opening the text file that the emails are contained in and the .read() method is allowing us to save their contexts to the following variables: | |
email_one = open("email_one.txt", "r").read() | |
email_two = open("email_two.txt", "r").read() | |
email_three = open("email_three.txt", "r").read() | |
email_four = open("email_four.txt", "r").read() | |
#list of terms provided in challenge 2 | |
proprietary_terms = ["she", "personality matrix", "sense of self", "self-preservation", "learning algorithm", "her", "herself"] | |
#list of terms provided in challenge three. I added 'distressing' | |
negative_words = ["concerned", "behind", "danger", "dangerous", "alarming", "alarmed", "out of control", "help", "unhappy", "bad", "upset", "awful", "broken", "damage", "damaging", "dismal", "distressed", "distressed", "concerning", "horrible", "horribly", "questionable", 'distressing'] | |
#create a large list for the final challenge | |
biglist = proprietary_terms + negative_words | |
#create a list of common punctuation that appears after a word | |
end = ['.', '?', '!', ')', ';', ':', ',', ' '] | |
#this simple function takes in a phrase and replaces it with censored | |
def censor(phrase,text): | |
text = text.replace(phrase, 'X'*len(phrase)) | |
#check also for instances where phrase starts a sentence (capitalised) | |
title = phrase[0].upper() + phrase[1:] | |
text = text.replace(title, 'X'*len(phrase)) | |
return text | |
#print(censor('learning algorithms', email_one)) | |
#take a list of words/phrases and censor them from document | |
def censorlist(phraselist, text): | |
for phrase in phraselist: | |
#handle cases of how word/phrase would appear if NOT punctuated | |
#also avoid 'herself' becoming 'censoredself' | |
middle = phrase + ' ' | |
title = middle.title() | |
firstword = middle[0].upper() + middle[1:] | |
text = text.replace(middle, 'X'*(len(middle)-1) + ' ') | |
text = text.replace(title, 'X'*(len(title)-1) + ' ') | |
text = text.replace(firstword, 'X'*(len(firstword)-1) + ' ') | |
#handle case where text is a searched phrase in isolation | |
if len(phrase) == len(text): | |
text = text.replace(phrase, 'X'*len(phrase)) | |
#check for punctuated cases and return censored with same punctuation | |
for punc in end: | |
punctuated = phrase + punc | |
text = text.replace(punctuated, 'X'*len(punctuated) + punc) | |
return text | |
#print(email_two) | |
#print(censorlist(proprietary_terms, email_two)) | |
#take a list of negative words and censor after ANY TWO occurrences | |
#ALSO censor everything from a phraselist | |
def positive(negwords, phraselist, text): | |
#split the document into individual words | |
split = text.split(' ') | |
titlelist = [] | |
punclist = [] | |
#create expanded list which includes capitalised negative words | |
for i in negwords: | |
titlelist.append(i) | |
titlelist.append(i.title()) | |
#expand list further to create punctuated words to search | |
for i in titlelist: | |
punclist.append(i) | |
#also create cases where negword begins newline | |
for j in end: | |
punclist.append(i + j) | |
punclist.append('\n\n' + i + j) | |
count = 0 | |
#check each word in split to see if it is a negword | |
for i in range(len(split)): | |
for j in punclist: | |
if split[i] == j: | |
count += 1 | |
#check to see if 2 or more negwords have been detected so far | |
if count < 3: | |
continue | |
else: | |
split[i] = 'XXXXXX' | |
#added following to catch phrase from negwords ('out of control') | |
#However, only works after two other negwords have been found | |
#Handle cases where < 3 negwords have been found | |
try: | |
splitter = split.index('XXXXXX') | |
except: | |
splitter = 0 | |
toclean = split[splitter:] | |
partform = ' '.join(split[:splitter]) | |
toclean = ' '.join(toclean) | |
#use censorlist() to catch any phrases in negwords but only operate on section | |
#AFTER 3 negwords have been found | |
toclean = censorlist(negwords, toclean) | |
#rebuild the document | |
reform = partform + ' ' + toclean | |
reform = censorlist(phraselist, reform) | |
return reform | |
#print(positive(negative_words, proprietary_terms, email_three)) | |
#Final challenge, handle punctuatio, case and preserve length | |
#Censor ALL negative words and ALL instances of defined phrases | |
#AND censor all words before and after a negword/defined phrase | |
#For this challenge, negative words and defined phrases are combined into biglist above | |
def bigcensor(phraselist, text): | |
split = text.split(' ') | |
print(split) | |
titlelist = [] | |
punclist = [] | |
filtered = [] | |
#as before create expanded list to include punctuation and casing | |
for i in phraselist: | |
titlelist.append(i) | |
titlelist.append(i.title()) | |
titlelist.append(i.upper()) | |
for j in titlelist: | |
punclist.append(j) | |
for k in end: | |
punclist.append(j + k) | |
punclist.append('\n\n' + j + k) | |
#start creating new censored document as we work through the split | |
for l in range(len(split)): | |
filtered.append(split[l]) | |
for m in punclist: | |
#logic to handle extraction and censoring of words before and after | |
if split[l] == m: | |
current = filtered.pop() | |
before = filtered.pop() | |
bef = '' | |
cur = '' | |
for n in range(len(before)): | |
bef = bef + 'X' | |
for o in range(len(current)): | |
cur = cur + 'X' | |
filtered.append(bef) | |
filtered.append(cur) | |
after = split[l + 1] | |
aft = '' | |
for p in range(len(after)): | |
aft = aft + 'X' | |
split[l + 1] = aft | |
#reassemble the document | |
reform = ' '.join(filtered) | |
reform = censorlist(phraselist, reform) | |
#do a final pass to deal with events near \n | |
#This does not filter word before/after, unfortunately | |
for q in punclist: | |
reform = censor(q, reform) | |
return reform | |
#print(email_four) | |
#print(bigcensor(biglist, email_four)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment