codecademydev/script.py Secret

## script.py
# These are the emails you will be censoring. The open() function is opening the text file that the emails are contained in and the .read() method is allowing us to save their contexts to the following variables:
email_one = open("email_one.txt", "r").read()
email_two = open("email_two.txt", "r").read()
email_three = open("email_three.txt", "r").read()
email_four = open("email_four.txt", "r").read()

redact_replacer = "[REDACTED]"
proprietary_terms = ["she", "personality matrix", "sense of self", "self-preservation", "learning algorithm", "her", "herself"]
negative_words = ["concerned", "behind", "dangerous", "danger",  "alarming", "alarmed", "out of control", "help", "unhappy", "bad", "upset", "awful", "broken", "damaging", "damage",  "dismal", "distressed", "distressing", "concerning", "horrible", "horribly", "questionable"]
# Note that the order of negative words has been changed such that terms that are 'subset' of other
# terms come after the full term. For example "danger" is part of "dangerous", so "dangerous" needs
# to come first or you'll end up with "[REDACTED]ous" in the output.

# For my current skill level I'm really happy with this one. Word subsets like "her" are a little bit
# troublesome, as they results in some "[REDACTED]e"s etc.
def redact_proprietary(document, proprietary_list, redacted_word):
    for word in proprietary_list: # Remove all occurences of these words
        for i in range(len(document)): # Index the document
            if document[i:i+len(word)].lower() == word.lower(): # Check if slice of the document is equal to word that should be replaced
               document = document.replace(document[i:i+len(word)], redacted_word) # Redact a word in the document
    return document

# print(redact_proprietary(document, proprietary_terms, redact_replacer))

# I think this handles pretty much all cases. Word subsets like "her" are a little bit troublesome,
# as they results in some "[REDACTED]e"s etc.
def redact_negative(document, negatives_list, redacted_word):
    negative_counter = 0 # Count the times an item on negatives_list has been seen
    for i in range(len(document)): # Index the document
        for negative in negatives_list: # Remove occurences of words on negatives_list when occurences exceed two
            if document[i:i+len(negative)].lower() == negative.lower() and negative_counter < 2: # Increment counter for first 2 occurences of items on negatives_list
                negative_counter += 1
            elif document[i:i+len(negative)].lower() == negative.lower(): # Redact a word in the document after 2 occurences
                document = document.replace(document[i:i+len(negative)], redacted_word)
    return document

# print(redact_negative(redact_proprietary(email_three, proprietary_terms, redact_replacer), negative_words, redact_replacer))

# This somewhat works, but has problems with pharses such as "personality matrix", as .split()
# can't really handle them.
def redact_all(document, proprietary_list, negatives_list, redact_replacer):
    document = document.replace('\n', ' * ') # Replace line change with a special character for later reversion
    document_to_words = document.split(' ') # Split the document into list so items in it can be changed
    all_to_be_redacted = proprietary_list + negatives_list # Make a list of words that need censoring

    for word in all_to_be_redacted:
        for i in range(len(document_to_words)):
            if word.lower() == document_to_words[i].lower():
                before_redacted = i - 1 # Handles censoring the word before the actual word
                after_redacted = i + 1 # Handles censoring the word after the actual word
                document_to_words[i] = redact_replacer
                document_to_words[before_redacted] = redact_replacer
                document_to_words[after_redacted] = redact_replacer
    document = ' '.join(document_to_words) # Merge the document
    document = document.replace(' * ', '\n') # Reverse the line change done earlier
    return document

print(email_four)
print(redact_all(email_four, proprietary_terms, negative_words, redact_replacer))
	# These are the emails you will be censoring. The open() function is opening the text file that the emails are contained in and the .read() method is allowing us to save their contexts to the following variables:
	email_one = open("email_one.txt", "r").read()
	email_two = open("email_two.txt", "r").read()
	email_three = open("email_three.txt", "r").read()
	email_four = open("email_four.txt", "r").read()

	redact_replacer = "[REDACTED]"
	proprietary_terms = ["she", "personality matrix", "sense of self", "self-preservation", "learning algorithm", "her", "herself"]
	negative_words = ["concerned", "behind", "dangerous", "danger", "alarming", "alarmed", "out of control", "help", "unhappy", "bad", "upset", "awful", "broken", "damaging", "damage", "dismal", "distressed", "distressing", "concerning", "horrible", "horribly", "questionable"]
	# Note that the order of negative words has been changed such that terms that are 'subset' of other
	# terms come after the full term. For example "danger" is part of "dangerous", so "dangerous" needs
	# to come first or you'll end up with "[REDACTED]ous" in the output.

	# For my current skill level I'm really happy with this one. Word subsets like "her" are a little bit
	# troublesome, as they results in some "[REDACTED]e"s etc.
	def redact_proprietary(document, proprietary_list, redacted_word):
	for word in proprietary_list: # Remove all occurences of these words
	for i in range(len(document)): # Index the document
	if document[i:i+len(word)].lower() == word.lower(): # Check if slice of the document is equal to word that should be replaced
	document = document.replace(document[i:i+len(word)], redacted_word) # Redact a word in the document
	return document

	# print(redact_proprietary(document, proprietary_terms, redact_replacer))

	# I think this handles pretty much all cases. Word subsets like "her" are a little bit troublesome,
	# as they results in some "[REDACTED]e"s etc.
	def redact_negative(document, negatives_list, redacted_word):
	negative_counter = 0 # Count the times an item on negatives_list has been seen
	for i in range(len(document)): # Index the document
	for negative in negatives_list: # Remove occurences of words on negatives_list when occurences exceed two
	if document[i:i+len(negative)].lower() == negative.lower() and negative_counter < 2: # Increment counter for first 2 occurences of items on negatives_list
	negative_counter += 1
	elif document[i:i+len(negative)].lower() == negative.lower(): # Redact a word in the document after 2 occurences
	document = document.replace(document[i:i+len(negative)], redacted_word)
	return document

	# print(redact_negative(redact_proprietary(email_three, proprietary_terms, redact_replacer), negative_words, redact_replacer))

	# This somewhat works, but has problems with pharses such as "personality matrix", as .split()
	# can't really handle them.
	def redact_all(document, proprietary_list, negatives_list, redact_replacer):
	document = document.replace('\n', ' * ') # Replace line change with a special character for later reversion
	document_to_words = document.split(' ') # Split the document into list so items in it can be changed
	all_to_be_redacted = proprietary_list + negatives_list # Make a list of words that need censoring

	for word in all_to_be_redacted:
	for i in range(len(document_to_words)):
	if word.lower() == document_to_words[i].lower():
	before_redacted = i - 1 # Handles censoring the word before the actual word
	after_redacted = i + 1 # Handles censoring the word after the actual word
	document_to_words[i] = redact_replacer
	document_to_words[before_redacted] = redact_replacer
	document_to_words[after_redacted] = redact_replacer
	document = ' '.join(document_to_words) # Merge the document
	document = document.replace(' * ', '\n') # Reverse the line change done earlier
	return document

	print(email_four)
	print(redact_all(email_four, proprietary_terms, negative_words, redact_replacer))