Skip to content

Instantly share code, notes, and snippets.

Last active June 10, 2020 09:21
Show Gist options
  • Save MrN00b0t/7f6838655d0963c1c15f168f80dc1bf8 to your computer and use it in GitHub Desktop.
Save MrN00b0t/7f6838655d0963c1c15f168f80dc1bf8 to your computer and use it in GitHub Desktop.
Codecademy: Censor Dispenser
# These are the emails you will be censoring. The open() function is opening the text file that the emails are contained in and the .read() method is allowing us to save their contexts to the following variables:
email_one = open("email_one.txt", "r").read()
email_two = open("email_two.txt", "r").read()
email_three = open("email_three.txt", "r").read()
email_four = open("email_four.txt", "r").read()
#list of terms provided in challenge 2
proprietary_terms = ["she", "personality matrix", "sense of self", "self-preservation", "learning algorithm", "her", "herself"]
#list of terms provided in challenge three. I added 'distressing'
negative_words = ["concerned", "behind", "danger", "dangerous", "alarming", "alarmed", "out of control", "help", "unhappy", "bad", "upset", "awful", "broken", "damage", "damaging", "dismal", "distressed", "distressed", "concerning", "horrible", "horribly", "questionable", 'distressing']
#create a large list for the final challenge
biglist = proprietary_terms + negative_words
#create a list of common punctuation that appears after a word
end = ['.', '?', '!', ')', ';', ':', ',', ' ']
#this simple function takes in a phrase and replaces it with censored
def censor(phrase,text):
text = text.replace(phrase, 'X'*len(phrase))
#check also for instances where phrase starts a sentence (capitalised)
title = phrase[0].upper() + phrase[1:]
text = text.replace(title, 'X'*len(phrase))
return text
#print(censor('learning algorithms', email_one))
#take a list of words/phrases and censor them from document
def censorlist(phraselist, text):
for phrase in phraselist:
#handle cases of how word/phrase would appear if NOT punctuated
#also avoid 'herself' becoming 'censoredself'
middle = phrase + ' '
title = middle.title()
firstword = middle[0].upper() + middle[1:]
text = text.replace(middle, 'X'*(len(middle)-1) + ' ')
text = text.replace(title, 'X'*(len(title)-1) + ' ')
text = text.replace(firstword, 'X'*(len(firstword)-1) + ' ')
#handle case where text is a searched phrase in isolation
if len(phrase) == len(text):
text = text.replace(phrase, 'X'*len(phrase))
#check for punctuated cases and return censored with same punctuation
for punc in end:
punctuated = phrase + punc
text = text.replace(punctuated, 'X'*len(punctuated) + punc)
return text
#print(censorlist(proprietary_terms, email_two))
#take a list of negative words and censor after ANY TWO occurrences
#ALSO censor everything from a phraselist
def positive(negwords, phraselist, text):
#split the document into individual words
split = text.split(' ')
titlelist = []
punclist = []
#create expanded list which includes capitalised negative words
for i in negwords:
#expand list further to create punctuated words to search
for i in titlelist:
#also create cases where negword begins newline
for j in end:
punclist.append(i + j)
punclist.append('\n\n' + i + j)
count = 0
#check each word in split to see if it is a negword
for i in range(len(split)):
for j in punclist:
if split[i] == j:
count += 1
#check to see if 2 or more negwords have been detected so far
if count < 3:
split[i] = 'XXXXXX'
#added following to catch phrase from negwords ('out of control')
#However, only works after two other negwords have been found
#Handle cases where < 3 negwords have been found
splitter = split.index('XXXXXX')
splitter = 0
toclean = split[splitter:]
partform = ' '.join(split[:splitter])
toclean = ' '.join(toclean)
#use censorlist() to catch any phrases in negwords but only operate on section
#AFTER 3 negwords have been found
toclean = censorlist(negwords, toclean)
#rebuild the document
reform = partform + ' ' + toclean
reform = censorlist(phraselist, reform)
return reform
#print(positive(negative_words, proprietary_terms, email_three))
#Final challenge, handle punctuatio, case and preserve length
#Censor ALL negative words and ALL instances of defined phrases
#AND censor all words before and after a negword/defined phrase
#For this challenge, negative words and defined phrases are combined into biglist above
def bigcensor(phraselist, text):
split = text.split(' ')
titlelist = []
punclist = []
filtered = []
#as before create expanded list to include punctuation and casing
for i in phraselist:
for j in titlelist:
for k in end:
punclist.append(j + k)
punclist.append('\n\n' + j + k)
#start creating new censored document as we work through the split
for l in range(len(split)):
for m in punclist:
#logic to handle extraction and censoring of words before and after
if split[l] == m:
current = filtered.pop()
before = filtered.pop()
bef = ''
cur = ''
for n in range(len(before)):
bef = bef + 'X'
for o in range(len(current)):
cur = cur + 'X'
after = split[l + 1]
aft = ''
for p in range(len(after)):
aft = aft + 'X'
split[l + 1] = aft
#reassemble the document
reform = ' '.join(filtered)
reform = censorlist(phraselist, reform)
#do a final pass to deal with events near \n
#This does not filter word before/after, unfortunately
for q in punclist:
reform = censor(q, reform)
return reform
#print(bigcensor(biglist, email_four))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment