Skip to content

Instantly share code, notes, and snippets.

@codecademydev
Created April 9, 2020 16:41
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save codecademydev/4f217aad83bd7a23cf0b0c90a2b08a3e to your computer and use it in GitHub Desktop.
Save codecademydev/4f217aad83bd7a23cf0b0c90a2b08a3e to your computer and use it in GitHub Desktop.
Codecademy export
def censor_text(text, phrases, limit=0, near=False, letters="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'"):
# Words and word positions
words = []
word_positions = []
letter_inds = [i for i in range(len(text)) if text[i] in letters]
noletter_inds = [i for i in range(len(text)) if not i in letter_inds]
point = -1
while [i for i in letter_inds if i > point]:
first_letter = [i for i in letter_inds if i > point][0]
word_positions.append(first_letter)
if [i for i in noletter_inds if i > first_letter]:
first_noletter = [i for i in noletter_inds if i > first_letter][0]
words.append(text[first_letter:first_noletter].lower())
point = first_noletter
else:
words.append(text[first_letter:].lower())
point = len(text)
# Censor words
censor_words = []
phrases_lists = [phrase.split() for phrase in phrases]
check = 0
for num_position in range(len(words)):
for phrase_list in phrases_lists:
if num_position + len(phrase_list) <= len(words):
if [words[i] for i in range(num_position, num_position + len(phrase_list))] == phrase_list:
if check >= limit:
censor_words += range(num_position, num_position + len(phrase_list))
check +=1
break
if near == True:
near_censor_words = []
for position in censor_words:
if position > 0 and not (position - 1) in near_censor_words:
near_censor_words.append(position - 1)
if position < len(words) - 1 and not (position + 1) in near_censor_words:
near_censor_words.append(position + 1)
censor_words += near_censor_words
# Censor full text
censor_text = text
for num_position in censor_words:
length = len(words[num_position])
first_letter = word_positions[num_position]
first_noletter = word_positions[num_position] + length
censor_text = censor_text[:first_letter]+"X"*length+censor_text[first_noletter:]
return censor_text
email_one = open("email_one.txt", "r").read()
email_two = open("email_two.txt", "r").read()
email_three = open("email_three.txt", "r").read()
email_four = open("email_four.txt", "r").read()
proprietary_terms = ["she", "personality matrix", "sense of self", "self-preservation", "learning algorithm", "learning algorithms", "her", "herself"]
negative_words = ["concerned", "behind", "danger", "dangerous", "alarming", "alarmed", "out of control", "help", "unhappy", "bad", "upset", "awful", "broken", "damage", "damaging", "dismal", "distressed", "distressing", "concerning", "horrible", "horribly", "questionable"]
email_one_censored = censor_text(email_one, ["learning algorithms"])
email_two_censored = censor_text(email_two, proprietary_terms)
email_three_censored = censor_text(censor_text(email_three, proprietary_terms), negative_words, 2)
email_four_censored = censor_text(email_four, negative_words + proprietary_terms, near=True)
# print(email_one_censored)
# print(email_two_censored)
# print(email_three_censored)
# print(email_four_censored)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment