Skip to content

Instantly share code, notes, and snippets.

@laffan
Last active February 24, 2021 21:37
Show Gist options
  • Save laffan/7b945d256028d2ffaacd4d99be40ca34 to your computer and use it in GitHub Desktop.
Save laffan/7b945d256028d2ffaacd4d99be40ca34 to your computer and use it in GitHub Desktop.
Kindle Note Regex (Python)
# Kindle Highlights/Notes Regex
# June 4 2017
# Credit to https://stackoverflow.com/q/16947390 for a solid start
import re
def parse_file(in_file):
read_file = open(in_file, 'r')
file_lines = read_file.readlines()
read_file.close()
raw_note = "".join(file_lines)
raw_note = raw_note.split("==========")
for note in raw_note:
# Regex Pieces
title_author_regex = "(.+) \((.+)\)\r*\n"
location_regex = ".+?(?=Location|page)([A-z]+)\s([1-9-\-]+)"
date_regex = ".+?(?=Added\son)Added\son\s([a-zA-Z]+),\s([a-zA-Z]+)\s([0-9]+),\s([0-9]+)\s"
time_regex = "([0-9]+):([0-9]+):([0-9]+)\s(AM|PM)"
content_regex = "\r*\n\r\n(.*)"
# Compile Regex
regex_string =\
title_author_regex +\
location_regex +\
date_regex +\
time_regex +\
content_regex
regex = re.compile(regex_string)
print note
print regex.findall(note)
print "\n\n--------------------------\n\n"
parse_file("kindle-data.txt")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment