codecademydev/script.py Secret

## script.py
# These are the emails you will be censoring. The open() function is opening the text file that the emails are contained in and the .read() method is allowing us to save their contexts to the following variables:
email_one = open("email_one.txt", "r").read()
email_two = open("email_two.txt", "r").read()
email_three = open("email_three.txt", "r").read()
email_four = open("email_four.txt", "r").read()


######task 2##############

#first function for censoring just one forbidden phrase out of the list#

def censorer1(forbidden_phrase,text):
  censored_text = text.replace(forbidden_phrase,"CENSORED")
  return censored_text

#print(censorer1("learning algorithms", email_one))


########task3###############

#second function to censor all the words in the proprietary_terms list.


proprietary_terms = ["she", "personality matrix", "sense of self", "self-preservation", "learning algorithm", "her", "herself"]


def censorer2(forbidden_lst,text):
  censored_text = text
  for word in forbidden_lst:
    if word in text:
      censored_text = censored_text.replace(word, "CENSORED")

  return censored_text

#print(censorer2(proprietary_terms, email_two))

############task4################

#Thrird function including the same code as censorer2 at the beginning. You can find the descriptions for the steps to censor the words of the negative_words list right above each step.

negative_words = ["concerned", "behind", "danger", "dangerous", "alarming", "alarmed", "out of control", "help", "unhappy", "bad", "upset", "awful", "broken", "damage", "damaging", "dismal", "distressed", "distressed", "concerning", "horrible", "horribly", "questionable"]

#same procedure as in censorer2
def censorer3(text, forbidden_lst, negative_lst):
  censored_text = text
  for word in forbidden_lst:
    if word in text:
      censored_text = censored_text.replace(word, "CENSORED")

#find the index position of the first letter of each element in the negative_lst and store it in the possible_index_match. I used .lower to also include the words with a capital first letter.

  index_pos_lst = []
  for element in negative_lst:
    censored_text_low = censored_text.lower()
    possible_index_match = censored_text_low.find(element)

#now I check possible_index_match for '-1' (which means that the word doesnt appear in the text) and double numbers (as for 'danger' in 'dangerous').

    if possible_index_match != -1 and not possible_index_match in index_pos_lst:

#now I only append the left over indexes to the new list indexes_to_censor. Moreover I sort the list so I can, in the next line, delete the first two indexes. As a consequence I can
#"censor any occurance of a word from the “negative words” list after any “negative” word has occurred twice"#

      index_pos_lst.append(censored_text_low.find(element))
      index_pos_lst.sort()
      indexes_to_censor = index_pos_lst [2:]

#To replace the word and not changing the index locations of all the other words I need to find out, where the next whitespace after the negative word appears. Within the for-Loop I can do that for every word from negative_lst to create my 'positive_and_censored_text'.

  positive_and_censored_text = censored_text
  for i in range(len(indexes_to_censor)):
    next_whitespace = positive_and_censored_text.find(" ",indexes_to_censor[i])

#In the end I replace the word from the email with as much X's as the word has letters by censoring from the index of the first letter to the index of the next whitespace.

    positive_and_censored_text = positive_and_censored_text.replace(positive_and_censored_text[indexes_to_censor[i]:next_whitespace], 'X'*(next_whitespace - indexes_to_censor[i]))

    #!!!!!PROBLEM: I don't know, how to deal with "out of control". My strategy (finding the next whitespace) doesnt work here...!!!!!

  return positive_and_censored_text

#print(censorer3(email_three, proprietary_terms, negative_words))
#print(email_three)


##########
################
#########task5################

#I use the code of the previous task 4, now named censorer4. Changes are described in the descriptions (I deleted the old ones)

def censorer4(text, forbidden_lst, negative_lst):
  censored_text = text
  for word in forbidden_lst:
    if word in text:
      censored_text = censored_text.replace(word, "CENSORED")

#find the index position of the first letter of each element in the negative_lst and store it in the possible_index_match. I used .lower() to also include the words with a capital first letter.

##PROBLEM: What can I do instead of .find() to find ALL of the same element in the list? Now it only can censor the first "help", because the .find() only detects the first appearence...

  index_pos_lst = []
  positive_and_censored_text = censored_text

#now I check possible_index_match for '-1' (which means that the word doesnt appear in the text) and double numbers (as for 'danger' in 'dangerous').

  for element in negative_lst:
    censored_text_low = censored_text.lower()
    possible_index_match = censored_text_low.find(element)

#The indexes_to_censor now include also the first two elements of the list.

    indexes_to_censor = []
    if possible_index_match != -1 and not possible_index_match in index_pos_lst:


#now I only append the left over indexes to the new list indexes_to_censor. Moreover I sort the list so I can, in the next line, delete the first two indexes. As a consequence I can
#"censor any occurance of a word from the “negative words” list after any “negative” word has occurred twice"#

      index_pos_lst.append(censored_text_low.find(element))
      index_pos_lst.sort()
      indexes_to_censor = index_pos_lst

#Now I check, where the word ends by looking for whitespace, question mark etc. The lowest number in the possible_word_ends list is the end of the word (word_end).

    possible_word_ends = []
    word_ends = []
    for i in range(len(indexes_to_censor)):
      possible_word_ends += [positive_and_censored_text.find(" ",indexes_to_censor[i])]
      possible_word_ends += [positive_and_censored_text.find(".",indexes_to_censor[i])]
      possible_word_ends += [positive_and_censored_text.find("!",indexes_to_censor[i])]
      possible_word_ends += [positive_and_censored_text.find(":",indexes_to_censor[i])]
      possible_word_ends += [positive_and_censored_text.find("?",indexes_to_censor[i])]
      possible_word_ends += [positive_and_censored_text.find(",",indexes_to_censor[i])]

      for value in possible_word_ends:
        if value >= 0:
          word_ends.append(value)

      word_end = min(word_ends)


#word_end is the end of the expression in the email, so I can replace it with as many X's as there are letters in the word.

      positive_and_censored_text = positive_and_censored_text.replace(positive_and_censored_text[indexes_to_censor[i]:word_end], 'X'*(word_end - indexes_to_censor[i]))


#!!!!!PROBLEM: I don't know, how to deal with "out of control". My strategy (finding the next whitespace) doesnt work here...!!!!!

  return positive_and_censored_text

print(censorer4(email_four, proprietary_terms, negative_words))
#print(email_three)
	# These are the emails you will be censoring. The open() function is opening the text file that the emails are contained in and the .read() method is allowing us to save their contexts to the following variables:
	email_one = open("email_one.txt", "r").read()
	email_two = open("email_two.txt", "r").read()
	email_three = open("email_three.txt", "r").read()
	email_four = open("email_four.txt", "r").read()


	######task 2##############

	#first function for censoring just one forbidden phrase out of the list#

	def censorer1(forbidden_phrase,text):
	censored_text = text.replace(forbidden_phrase,"CENSORED")
	return censored_text

	#print(censorer1("learning algorithms", email_one))


	########task3###############

	#second function to censor all the words in the proprietary_terms list.


	proprietary_terms = ["she", "personality matrix", "sense of self", "self-preservation", "learning algorithm", "her", "herself"]


	def censorer2(forbidden_lst,text):
	censored_text = text
	for word in forbidden_lst:
	if word in text:
	censored_text = censored_text.replace(word, "CENSORED")

	return censored_text

	#print(censorer2(proprietary_terms, email_two))

	############task4################

	#Thrird function including the same code as censorer2 at the beginning. You can find the descriptions for the steps to censor the words of the negative_words list right above each step.

	negative_words = ["concerned", "behind", "danger", "dangerous", "alarming", "alarmed", "out of control", "help", "unhappy", "bad", "upset", "awful", "broken", "damage", "damaging", "dismal", "distressed", "distressed", "concerning", "horrible", "horribly", "questionable"]

	#same procedure as in censorer2
	def censorer3(text, forbidden_lst, negative_lst):
	censored_text = text
	for word in forbidden_lst:
	if word in text:
	censored_text = censored_text.replace(word, "CENSORED")

	#find the index position of the first letter of each element in the negative_lst and store it in the possible_index_match. I used .lower to also include the words with a capital first letter.

	index_pos_lst = []
	for element in negative_lst:
	censored_text_low = censored_text.lower()
	possible_index_match = censored_text_low.find(element)

	#now I check possible_index_match for '-1' (which means that the word doesnt appear in the text) and double numbers (as for 'danger' in 'dangerous').

	if possible_index_match != -1 and not possible_index_match in index_pos_lst:

	#now I only append the left over indexes to the new list indexes_to_censor. Moreover I sort the list so I can, in the next line, delete the first two indexes. As a consequence I can
	#"censor any occurance of a word from the “negative words” list after any “negative” word has occurred twice"#

	index_pos_lst.append(censored_text_low.find(element))
	index_pos_lst.sort()
	indexes_to_censor = index_pos_lst [2:]

	#To replace the word and not changing the index locations of all the other words I need to find out, where the next whitespace after the negative word appears. Within the for-Loop I can do that for every word from negative_lst to create my 'positive_and_censored_text'.

	positive_and_censored_text = censored_text
	for i in range(len(indexes_to_censor)):
	next_whitespace = positive_and_censored_text.find(" ",indexes_to_censor[i])

	#In the end I replace the word from the email with as much X's as the word has letters by censoring from the index of the first letter to the index of the next whitespace.

	positive_and_censored_text = positive_and_censored_text.replace(positive_and_censored_text[indexes_to_censor[i]:next_whitespace], 'X'*(next_whitespace - indexes_to_censor[i]))

	#!!!!!PROBLEM: I don't know, how to deal with "out of control". My strategy (finding the next whitespace) doesnt work here...!!!!!

	return positive_and_censored_text

	#print(censorer3(email_three, proprietary_terms, negative_words))
	#print(email_three)


	##########
	################
	#########task5################

	#I use the code of the previous task 4, now named censorer4. Changes are described in the descriptions (I deleted the old ones)

	def censorer4(text, forbidden_lst, negative_lst):
	censored_text = text
	for word in forbidden_lst:
	if word in text:
	censored_text = censored_text.replace(word, "CENSORED")

	#find the index position of the first letter of each element in the negative_lst and store it in the possible_index_match. I used .lower() to also include the words with a capital first letter.

	##PROBLEM: What can I do instead of .find() to find ALL of the same element in the list? Now it only can censor the first "help", because the .find() only detects the first appearence...

	index_pos_lst = []
	positive_and_censored_text = censored_text

	#now I check possible_index_match for '-1' (which means that the word doesnt appear in the text) and double numbers (as for 'danger' in 'dangerous').

	for element in negative_lst:
	censored_text_low = censored_text.lower()
	possible_index_match = censored_text_low.find(element)

	#The indexes_to_censor now include also the first two elements of the list.

	indexes_to_censor = []
	if possible_index_match != -1 and not possible_index_match in index_pos_lst:


	#now I only append the left over indexes to the new list indexes_to_censor. Moreover I sort the list so I can, in the next line, delete the first two indexes. As a consequence I can
	#"censor any occurance of a word from the “negative words” list after any “negative” word has occurred twice"#

	index_pos_lst.append(censored_text_low.find(element))
	index_pos_lst.sort()
	indexes_to_censor = index_pos_lst

	#Now I check, where the word ends by looking for whitespace, question mark etc. The lowest number in the possible_word_ends list is the end of the word (word_end).

	possible_word_ends = []
	word_ends = []
	for i in range(len(indexes_to_censor)):
	possible_word_ends += [positive_and_censored_text.find(" ",indexes_to_censor[i])]
	possible_word_ends += [positive_and_censored_text.find(".",indexes_to_censor[i])]
	possible_word_ends += [positive_and_censored_text.find("!",indexes_to_censor[i])]
	possible_word_ends += [positive_and_censored_text.find(":",indexes_to_censor[i])]
	possible_word_ends += [positive_and_censored_text.find("?",indexes_to_censor[i])]
	possible_word_ends += [positive_and_censored_text.find(",",indexes_to_censor[i])]

	for value in possible_word_ends:
	if value >= 0:
	word_ends.append(value)

	word_end = min(word_ends)



	#word_end is the end of the expression in the email, so I can replace it with as many X's as there are letters in the word.

	positive_and_censored_text = positive_and_censored_text.replace(positive_and_censored_text[indexes_to_censor[i]:word_end], 'X'*(word_end - indexes_to_censor[i]))



	#!!!!!PROBLEM: I don't know, how to deal with "out of control". My strategy (finding the next whitespace) doesnt work here...!!!!!

	return positive_and_censored_text

	print(censorer4(email_four, proprietary_terms, negative_words))
	#print(email_three)