bitbutter/kindle_tana_paste_from_book_name.py

## kindle_tana_paste_from_book_name.py
import json
import sys
sys.stdout.reconfigure(encoding='utf-8')
import io
import os
import traceback
from datetime import datetime
import re
booktitle = os.environ['ESPANSO_BOOKTITLE']
clippings_file = "E:/documents/My Clippings.txt"

# Create routine that gathers all notes for the book passed in the ESPANSO_BOOKTITLE variable,
# sort them in the correct order, and put them in a json structure which gets passed to from_json_to_tanapaste
def get_book_highlights(book_title):
    # Open the clippings file and read its contents
    with open(clippings_file, "r", encoding="utf-8") as f:
        contents = f.read()

    # extract title and author using regular expression
    match = re.match(r'^(.+)\s\((.+)\)$', book_title)
    title = match.group(1)
    author = match.group(2)

    # Split the contents into separate notes
    notes = contents.split("==========\n")

    # Create a list to hold the highlights for the specified book title
    highlights = []

    # Loop through the notes and extract the highlights for the specified book title
    for note in notes:
        lines = note.split("\n")
        if len(lines) > 2:
            if book_title in lines[0]:
                book_title = lines[0]
                meta_line = lines[1]
                match = re.search(r'Page (\d+)', meta_line)
                page_number = match.group(1)

                formatted_date=""
                timestamp=""

                date_str = meta_line.split("Added on ")[-1].strip()
                try:
                    date_obj = datetime.strptime(date_str, "%A, %B %d, %Y, %I:%M %p")
                    timestamp = date_obj.timestamp()
                except ValueError:
                    continue

                # extract the date string using regular expressions
                match = re.search(r'Added on [A-Za-z]+, ([A-Za-z]+ \d+, \d+)', meta_line)
                if match:
                    date_str = match.group(1)
                    # create datetime object from extracted value
                    dt = datetime.strptime(date_str, '%B %d, %Y')
                    # format datetime object as desired
                    day_str = str(dt.day).strip() + '{S}'
                    formatted_date = dt.strftime('%B%e' + '{S}' + ', %Y').replace('{S}', str(dt.day % 10 if 10 < dt.day < 14 else {1: 'st', 2: 'nd', 3: 'rd'}.get(dt.day % 10, 'th')))

                highlight = {
                    "text": lines[3],
                    "location": {
                        "type": "kindle"
                    },
                    "note": None,
                    "timestamp": timestamp,
                    "page": page_number,
                    "date": formatted_date
                }
                if len(lines) > 3 and "Note:" in lines[3]:
                    highlight["note"] = lines[3].replace("Note:", "").strip()
                highlights.append(highlight)

    highlights.sort(key=lambda x: (int(x['page']), x['timestamp']))

    # Create a dictionary to hold the book information and highlights
    book_data = {
        "title": title,
        "authors": author,
        "asin": None,
        "highlights": highlights
    }

    #print("book_data:")
    #print(book_data)
    return book_data

def from_json_to_tanapaste(data) -> str:
    # Create a list to hold the tana Paste lines
    lines = []

    # Add the tana Paste prefix
    lines.append("%%tana%%")

    # Add the book title and ASIN
    lines.append(f"- {data['title']} #book")
    lines.append(f"  - authors:: {data['authors']} #person")
    if data['asin'] is not None:
        lines.append(f"  - ASIN:: {data['asin']}")
        lines.append(f"  - url:: https://www.amazon.com/exec/obidos/ASIN/{data['asin']}")

    # Add the highlights
    lines.append("  - Highlights")

    for highlight in data["highlights"]:
        lines.append(f"    - {highlight['text']} #[[kindle highlight]]")
        if 'url' in highlight['location']:
            lines.append(f"      - url:: {highlight['location']['url']}")
        if highlight["note"] is not None:
            lines.append(f"      - note:: {highlight['note']}")
        if highlight["page"] is not None:
            lines.append(f"      - page:: {highlight['page']}")
        if highlight["date"] is not None:
            lines.append(f"      - date:: [[{highlight['date']}]]")

    # Return the tana Paste string
    return "\n".join(lines)

# Call the get_book_highlights function with the specified book title
book_data = get_book_highlights(booktitle)

# Convert the book data to a JSON string and pass it to the from_json_to_tanapaste function
json_str = json.dumps(book_data, ensure_ascii=False)
tana_paste_str = from_json_to_tanapaste(json.loads(json_str))

print(tana_paste_str)
	import json
	import sys
	sys.stdout.reconfigure(encoding='utf-8')
	import io
	import os
	import traceback
	from datetime import datetime
	import re
	booktitle = os.environ['ESPANSO_BOOKTITLE']
	clippings_file = "E:/documents/My Clippings.txt"

	# Create routine that gathers all notes for the book passed in the ESPANSO_BOOKTITLE variable,
	# sort them in the correct order, and put them in a json structure which gets passed to from_json_to_tanapaste
	def get_book_highlights(book_title):
	# Open the clippings file and read its contents
	with open(clippings_file, "r", encoding="utf-8") as f:
	contents = f.read()

	# extract title and author using regular expression
	match = re.match(r'^(.+)\s\((.+)\)$', book_title)
	title = match.group(1)
	author = match.group(2)

	# Split the contents into separate notes
	notes = contents.split("==========\n")

	# Create a list to hold the highlights for the specified book title
	highlights = []

	# Loop through the notes and extract the highlights for the specified book title
	for note in notes:
	lines = note.split("\n")
	if len(lines) > 2:
	if book_title in lines[0]:
	book_title = lines[0]
	meta_line = lines[1]
	match = re.search(r'Page (\d+)', meta_line)
	page_number = match.group(1)

	formatted_date=""
	timestamp=""

	date_str = meta_line.split("Added on ")[-1].strip()
	try:
	date_obj = datetime.strptime(date_str, "%A, %B %d, %Y, %I:%M %p")
	timestamp = date_obj.timestamp()
	except ValueError:
	continue

	# extract the date string using regular expressions
	match = re.search(r'Added on [A-Za-z]+, ([A-Za-z]+ \d+, \d+)', meta_line)
	if match:
	date_str = match.group(1)
	# create datetime object from extracted value
	dt = datetime.strptime(date_str, '%B %d, %Y')
	# format datetime object as desired
	day_str = str(dt.day).strip() + '{S}'
	formatted_date = dt.strftime('%B%e' + '{S}' + ', %Y').replace('{S}', str(dt.day % 10 if 10 < dt.day < 14 else {1: 'st', 2: 'nd', 3: 'rd'}.get(dt.day % 10, 'th')))

	highlight = {
	"text": lines[3],
	"location": {
	"type": "kindle"
	},
	"note": None,
	"timestamp": timestamp,
	"page": page_number,
	"date": formatted_date
	}
	if len(lines) > 3 and "Note:" in lines[3]:
	highlight["note"] = lines[3].replace("Note:", "").strip()
	highlights.append(highlight)

	highlights.sort(key=lambda x: (int(x['page']), x['timestamp']))

	# Create a dictionary to hold the book information and highlights
	book_data = {
	"title": title,
	"authors": author,
	"asin": None,
	"highlights": highlights
	}

	#print("book_data:")
	#print(book_data)
	return book_data

	def from_json_to_tanapaste(data) -> str:
	# Create a list to hold the tana Paste lines
	lines = []

	# Add the tana Paste prefix
	lines.append("%%tana%%")

	# Add the book title and ASIN
	lines.append(f"- {data['title']} #book")
	lines.append(f" - authors:: {data['authors']} #person")
	if data['asin'] is not None:
	lines.append(f" - ASIN:: {data['asin']}")
	lines.append(f" - url:: https://www.amazon.com/exec/obidos/ASIN/{data['asin']}")

	# Add the highlights
	lines.append(" - Highlights")

	for highlight in data["highlights"]:
	lines.append(f" - {highlight['text']} #[[kindle highlight]]")
	if 'url' in highlight['location']:
	lines.append(f" - url:: {highlight['location']['url']}")
	if highlight["note"] is not None:
	lines.append(f" - note:: {highlight['note']}")
	if highlight["page"] is not None:
	lines.append(f" - page:: {highlight['page']}")
	if highlight["date"] is not None:
	lines.append(f" - date:: [[{highlight['date']}]]")

	# Return the tana Paste string
	return "\n".join(lines)

	# Call the get_book_highlights function with the specified book title
	book_data = get_book_highlights(booktitle)

	# Convert the book data to a JSON string and pass it to the from_json_to_tanapaste function
	json_str = json.dumps(book_data, ensure_ascii=False)
	tana_paste_str = from_json_to_tanapaste(json.loads(json_str))

	print(tana_paste_str)