-
-
Save codecademydev/4f217aad83bd7a23cf0b0c90a2b08a3e to your computer and use it in GitHub Desktop.
Codecademy export
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def censor_text(text, phrases, limit=0, near=False, letters="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'"): | |
# Words and word positions | |
words = [] | |
word_positions = [] | |
letter_inds = [i for i in range(len(text)) if text[i] in letters] | |
noletter_inds = [i for i in range(len(text)) if not i in letter_inds] | |
point = -1 | |
while [i for i in letter_inds if i > point]: | |
first_letter = [i for i in letter_inds if i > point][0] | |
word_positions.append(first_letter) | |
if [i for i in noletter_inds if i > first_letter]: | |
first_noletter = [i for i in noletter_inds if i > first_letter][0] | |
words.append(text[first_letter:first_noletter].lower()) | |
point = first_noletter | |
else: | |
words.append(text[first_letter:].lower()) | |
point = len(text) | |
# Censor words | |
censor_words = [] | |
phrases_lists = [phrase.split() for phrase in phrases] | |
check = 0 | |
for num_position in range(len(words)): | |
for phrase_list in phrases_lists: | |
if num_position + len(phrase_list) <= len(words): | |
if [words[i] for i in range(num_position, num_position + len(phrase_list))] == phrase_list: | |
if check >= limit: | |
censor_words += range(num_position, num_position + len(phrase_list)) | |
check +=1 | |
break | |
if near == True: | |
near_censor_words = [] | |
for position in censor_words: | |
if position > 0 and not (position - 1) in near_censor_words: | |
near_censor_words.append(position - 1) | |
if position < len(words) - 1 and not (position + 1) in near_censor_words: | |
near_censor_words.append(position + 1) | |
censor_words += near_censor_words | |
# Censor full text | |
censor_text = text | |
for num_position in censor_words: | |
length = len(words[num_position]) | |
first_letter = word_positions[num_position] | |
first_noletter = word_positions[num_position] + length | |
censor_text = censor_text[:first_letter]+"X"*length+censor_text[first_noletter:] | |
return censor_text | |
email_one = open("email_one.txt", "r").read() | |
email_two = open("email_two.txt", "r").read() | |
email_three = open("email_three.txt", "r").read() | |
email_four = open("email_four.txt", "r").read() | |
proprietary_terms = ["she", "personality matrix", "sense of self", "self-preservation", "learning algorithm", "learning algorithms", "her", "herself"] | |
negative_words = ["concerned", "behind", "danger", "dangerous", "alarming", "alarmed", "out of control", "help", "unhappy", "bad", "upset", "awful", "broken", "damage", "damaging", "dismal", "distressed", "distressing", "concerning", "horrible", "horribly", "questionable"] | |
email_one_censored = censor_text(email_one, ["learning algorithms"]) | |
email_two_censored = censor_text(email_two, proprietary_terms) | |
email_three_censored = censor_text(censor_text(email_three, proprietary_terms), negative_words, 2) | |
email_four_censored = censor_text(email_four, negative_words + proprietary_terms, near=True) | |
# print(email_one_censored) | |
# print(email_two_censored) | |
# print(email_three_censored) | |
# print(email_four_censored) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment