ChrisVilches/kindle_highlight_clean.py

## kindle_highlight_clean.py
import sys
import re

# TODO: The main code should be inside a __main__, I think.

FILE = sys.argv[1]

file = open(FILE, "r", encoding="utf8")
data = file.read()
file.close()

if (
    ("class='bookTitle'" not in data)
    or ("class='noteHeading'" not in data)
    or (not FILE.endswith(".html"))
):
    raise Exception("It seems this file is not a Kindle highlight HTML file.")

original_data = data

replacement_rules = {
    " .": ".",
    " ,": ",",
    "( ": "(",
    " )": ")",
    " :": ":",
    " ;": ";",
    " ?": "?",
    " !": "!",
    " - ": "-",
    " / ": "/",
    "“ ": "“",
    " ”": "”",
}

replacement_rules_regex = {
    "# ([0-9])": "#\\1"
}

for key, value in replacement_rules.items():
    data = data.replace(key, value)

for key, value in replacement_rules_regex.items():
    data = re.sub(key, value, data)

if original_data == data:
    print("Data didn't change.")
else:
    print("Data changed.")
    file = open(FILE, "w", encoding="utf8")
    file.write(data)
    file.close()

print("OK")
	import sys
	import re

	# TODO: The main code should be inside a __main__, I think.

	FILE = sys.argv[1]

	file = open(FILE, "r", encoding="utf8")
	data = file.read()
	file.close()

	if (
	("class='bookTitle'" not in data)
	or ("class='noteHeading'" not in data)
	or (not FILE.endswith(".html"))
	):
	raise Exception("It seems this file is not a Kindle highlight HTML file.")

	original_data = data

	replacement_rules = {
	" .": ".",
	" ,": ",",
	"( ": "(",
	" )": ")",
	" :": ":",
	" ;": ";",
	" ?": "?",
	" !": "!",
	" - ": "-",
	" / ": "/",
	"“ ": "“",
	" ”": "”",
	}

	replacement_rules_regex = {
	"# ([0-9])": "#\\1"
	}

	for key, value in replacement_rules.items():
	data = data.replace(key, value)

	for key, value in replacement_rules_regex.items():
	data = re.sub(key, value, data)

	if original_data == data:
	print("Data didn't change.")
	else:
	print("Data changed.")
	file = open(FILE, "w", encoding="utf8")
	file.write(data)
	file.close()

	print("OK")