murphybread/automation.py

## automation.py
import os
import json
import shutil
import re

import convert_md_to_json


# Load the JSON file containing the directory structure
def load_json_structure(file_path):
    with open(file_path, "r") as file:
        return json.load(file)


# Create directories based on the JSON structure
def create_directories(base_path, structure):
    for major_key, major_val in structure["MajorCategories"].items():
        major_dir = os.path.join(base_path, major_key)
        os.makedirs(major_dir, exist_ok=True)
        for minor_key in major_val.get("MinorCategories", {}):
            minor_dir = os.path.join(major_dir, minor_key)
            os.makedirs(minor_dir, exist_ok=True)
            subcategories = major_val["MinorCategories"][minor_key].get(
                "Subcategories", {}
            )
            for sub_key in subcategories:
                sub_dir = os.path.join(minor_dir, sub_key)
                os.makedirs(sub_dir, exist_ok=True)


def move_files_from_Entrance(Entrance_path, base_path, structure):
    # Check if Entrance directory has any markdown files
    md_files = [f for f in os.listdir(Entrance_path) if f.endswith(".md")]
    if not md_files:
        print("No books to work on.")
        return

    files_moved = 0  # Counter for the number of files moved

    # Move each markdown file to its new location
    for file in md_files:
        new_path = determine_new_path(file, structure, base_path)
        source_path = os.path.join(Entrance_path, file)

        print(f"Trying to move: {source_path} to {new_path}")

        if new_path:
            print(f"Trying to move: {source_path} to {new_path}")
            shutil.move(source_path, new_path)
            print(f"Moved {file} to {new_path}")
            files_moved += 1

    if files_moved == 0:
        print("No books moved.")


def determine_new_path(file_name, structure, base_path):
    # Remove file extension and split the filename into parts
    parts = file_name.replace(".md", "").split(" ")
    subcategory_code = parts[0]
    book_suffix = parts[1] if len(parts) > 1 else None

    print(f"Processing file: {file_name}")
    print(f"Subcategory code: {subcategory_code}, Book suffix: {book_suffix}")

    # Iterate through the JSON structure to find the matching path
    for major_key, major_val in structure["MajorCategories"].items():
        # Check if file matches a major category
        if subcategory_code == major_key:
            path = os.path.join(base_path, major_key, file_name)
            print(f"Matched major category. Path: {path}")
            return path

        for minor_key, minor_val in major_val.get("MinorCategories", {}).items():
            # Check if file matches a minor category
            if subcategory_code == minor_key:
                path = os.path.join(base_path, major_key, minor_key, file_name)
                print(f"Matched minor category. Path: {path}")
                return path

            for sub_key in minor_val.get("Subcategories", {}):
                # Check if file matches a subcategory or book within a subcategory
                if sub_key == subcategory_code or (
                    book_suffix and f"{sub_key} {book_suffix}" == subcategory_code
                ):
                    sub_dir = os.path.join(base_path, major_key, minor_key, sub_key)
                    path = os.path.join(sub_dir, file_name)
                    print(f"Matched subcategory/book. Path: {path}")
                    return path

    print(f"No matching path found for: {file_name}")
    return None


# Function to add tags to Markdown files
def add_tags_to_md_files(base_path, json_structure):
    print(f'start add_tags_to_md files')
    for root, dirs, files in os.walk(base_path):
        if "Entrance" not in root:  # Skip processing if not in the Entrance directory
            continue
        for file in files:
            if file.endswith(".md"):
                file_path = os.path.join(root, file)
                try:
                    with open(file_path, "r+", encoding="utf-8") as f:
                        content = f.read()
                        f.seek(0)  # Go back to the start of the file

                        new_tag = construct_tag(file, json_structure)
                        print(f'new_tag: {new_tag}')

                        if "---" in content and new_tag:
                            parts = content.split("---", 2)
                            if len(parts) == 3:
                                header, middle, body = parts
                                modified_middle = f"{middle}---\n{new_tag}\n"
                                new_content = f"{header}---{modified_middle}{body}"
                                f.write(new_content)
                                f.truncate()  # Remove the rest of the old content
                                print(f"Added tag to {file}")
                        else:
                            print(f"No header found in {file}, skipping.")

                except UnicodeDecodeError as e:
                    print(f"Error reading {file}: {e}")


# Tag define
def construct_tag(file_name, json_structure):
    # Regex patterns (make sure these accurately match your filenames)
    print("start cont+++++++++++++")
    print(f'file_name:{file_name}')

    major_regex = re.compile(r'^([0-9]{1}00)\.md$')
    minor_regex = re.compile(r'^([0-9]{1}[1-9][0-9])\.md$')
    subcategory_regex = re.compile(r'^([0-9]{1}[1-9][0-9])\.([0-9]{2})\.md$')
    book_regex = re.compile(r'^([0-9]{1}[1-9][0-9])\.([0-9]{2})\s([a-z])\.md$', re.IGNORECASE)


    major_code, minor_code, sub_code, book_code = "", "", "", ""


    if major_regex.match(file_name):
        major_code = major_regex.match(file_name).group(1)
    elif minor_regex.match(file_name):
        major_code = minor_regex.match(file_name).group(1)[:1] + '00'
        minor_code = minor_regex.match(file_name).group(1)
    elif subcategory_regex.match(file_name):
        major_code = subcategory_regex.match(file_name).group(1)[:1] + '00'
        minor_code = subcategory_regex.match(file_name).group(1)
        sub_code = subcategory_regex.match(file_name).group(2)
    elif book_regex.match(file_name):
        major_code = book_regex.match(file_name).group(1)[:1] + '00'
        minor_code = book_regex.match(file_name).group(1)
        sub_code = book_regex.match(file_name).group(2)
        book_code = book_regex.match(file_name).group(3)


    tag = ""

    if major_code:
        major_info = json_structure.get("MajorCategories", {}).get(major_code, {})
        tag += f"#[[{major_code}]]#{major_info.get('title', '').replace(' ', '_')}"
    if minor_code:
        minor_info = major_info.get("MinorCategories", {}).get(minor_code, {})
        tag += f"#[[{minor_code}]]#{minor_info.get('title', '').replace(' ', '_')}"
    if sub_code:
        sub_info = minor_info.get("Subcategories", {}).get(f"{minor_code}.{sub_code}", {})
        tag += f"#[[{minor_code}.{sub_code}]]#{sub_info.get('title', '').replace(' ', '_')}"
    if book_code:
        book_info = sub_info.get("Books", {}).get(f"{minor_code}.{sub_code} {book_code}", "")
        tag += f"#[[{minor_code}.{sub_code} {book_code}]]#{book_info.replace(' ', '_')}"


    print(major_code, minor_code , sub_code, book_code)


    return tag


# Ensure that we are in the correct directory to prevent affecting other directories
current_dir = os.getcwd()
expected_dir_name = "Library"

if expected_dir_name not in current_dir:
    print(
        f"Error: Current directory {current_dir} is not '{expected_dir_name}'. Exiting script."
    )
    exit()


# Main execution
base_directory = os.path.abspath(os.path.join(os.getcwd(), os.pardir))
json_file_name = "structure.json"


# start convert md
md_file = "../Entrance/Call Number Index.md"
json_file = "structure.json"

print(f"Read from {md_file}")
convert_md_to_json.md_to_json(md_file, json_file)
print(f"Output json file is {json_file}")

# end convert md

json_structure = load_json_structure(json_file_name)
Entrance_directory = os.path.join(base_directory, "Entrance")


print(f"json_file_name: {json_file_name}")
print(f"base_directory: {base_directory}")
print(f"Entrance_directory: {Entrance_directory}")
print("*--------------------*")

create_directories(base_directory, json_structure)
add_tags_to_md_files(Entrance_directory, json_structure)
move_files_from_Entrance(
    Entrance_directory, base_directory, json_structure
)  # Added this line


## convert_md_to_json.py
import re
import json
import shutil
import os


def md_to_json(md_file, json_file):
    with open(md_file, "r") as file:
        lines = file.readlines()

    json_structure = {"MajorCategories": {}}
    current_major = current_minor = current_sub = None

    for i, line in enumerate(lines):
        line = line.strip()  # Remove leading and trailing whitespaces
        if not line or line.startswith("---"):
            continue  # Skip empty lines and metadata lines

        print(f"Processing line {i}: {line}")  # Debug print

        try:
            # Match major, minor, subcategories, and book entries
            major_match = re.match(r"- \[\[(\d0\d)\]\]\s*(.*)", line)
            minor_match = re.match(r"- \[\[(\d[1-9]\d)\]\]\s*(.*)", line)
            sub_match = re.match(r"- \[\[(\d{3}\.\d{2})\]\]\s*(.*)", line)
            book_match = re.match(r"- \[\[(\d{3}\.\d{2} [a-zA-Z])\]\]\s*(.*)", line)

            if major_match:
                current_major, title = major_match.groups()
                json_structure["MajorCategories"][current_major] = {
                    "value": current_major,
                    "title": title,
                    "MinorCategories": {},
                }
                print(f"Major Category: {current_major}, Title: {title}")  # Debug print

            elif minor_match:
                current_minor, title = minor_match.groups()
                json_structure["MajorCategories"][current_major]["MinorCategories"][
                    current_minor
                ] = {"title": title, "Subcategories": {}}
                print(f"Minor Category: {current_minor}, Title: {title}")  # Debug print

            elif sub_match:
                current_sub, title = sub_match.groups()
                json_structure["MajorCategories"][current_major]["MinorCategories"][
                    current_minor
                ]["Subcategories"][current_sub] = {"title": title, "Books": {}}
                print(f"Subcategory: {current_sub}, Title: {title}")  # Debug print

            elif book_match:
                book_code, book_title = book_match.groups()
                json_structure["MajorCategories"][current_major]["MinorCategories"][
                    current_minor
                ]["Subcategories"][current_sub]["Books"][book_code] = book_title
                print(f"Book: {book_code}, Title: {book_title}")  # Debug print

        except Exception as e:
            print(f"Error processing line {i}: '{line}'")
            print(str(e))

    # Save JSON structure to file
    temp_json_path = os.path.join(os.getcwd(), json_file)
    with open(temp_json_path, "w") as outfile:
        json.dump(json_structure, outfile, indent=4)

    # Move the JSON file to the current directory (Manage)
    current_dir_path = os.getcwd()
    destination_path = os.path.join(current_dir_path, json_file)
    shutil.move(temp_json_path, destination_path)
    print(f"Moved {json_file} to {destination_path}")


# Usage example
md_file = "../Entrance/Call Number Index.md"
json_file = "structure.json"

print(f"Read from {md_file}")
md_to_json(md_file, json_file)
print(f"Output json file is {json_file}")

## create_base_template.py
import re
import yaml
from pathlib import Path


def extract_description_from_md(file_path):
    """Extract description field from markdown file"""
    with file_path.open('r', encoding='utf-8') as md_file:
        content = md_file.read()
        metadata = re.search(r'^---\n(.*?)\n---', content, re.DOTALL)


        if metadata:
            extracted_metadata = metadata.group(1)
            # print(f"Extracted YAML:\n {md_file} {extracted_metadata}")  # Diagnostic print
            try:
                data = yaml.safe_load(extracted_metadata)
                description = data.get('description', 'No description provided')
                # print(f"dscription: {description}")  # Diagnostic print

                return description
            except yaml.YAMLError as e:
                print(f"YAML Error: {e}")
                return 'None'
        return 'None'

def create_base_template(library_path, output_file, template_file_name='base_template.md'):
    """Navigate the directory structure to extract the description and create base_template.md"""

    print(f'***************Start Create base template*************** \nlibrary_path = {library_path} \noutput_file = {output_file} \nTEMPLATE_FILE_NAME = {template_file_name}')

    descriptions = {}
    pattern = re.compile(r'^\d{3}.*\.md$')

    for path in library_path.rglob('*.md'):
        if pattern.match(path.name):
            description = extract_description_from_md(path)
            if description:
                relative_path = path.relative_to(library_path)
                # Ensure descriptions use forward slashes in paths
                descriptions[relative_path.as_posix()] = description

    output_file.parent.mkdir(parents=True, exist_ok=True)

    with output_file.open('w', encoding='utf-8') as out_file:
        for posix_path, desc in descriptions.items():
            out_file.write(f'## {posix_path}\n description: {desc}\n\n')

    print(f'***************Finished!! Create base template *************** \nbase template : library_path = {library_path} \noutput_file = {output_file} \nTEMPLATE_FILE_NAME = {template_file_name}')
	import os
	import json
	import shutil
	import re

	import convert_md_to_json


	# Load the JSON file containing the directory structure
	def load_json_structure(file_path):
	with open(file_path, "r") as file:
	return json.load(file)


	# Create directories based on the JSON structure
	def create_directories(base_path, structure):
	for major_key, major_val in structure["MajorCategories"].items():
	major_dir = os.path.join(base_path, major_key)
	os.makedirs(major_dir, exist_ok=True)
	for minor_key in major_val.get("MinorCategories", {}):
	minor_dir = os.path.join(major_dir, minor_key)
	os.makedirs(minor_dir, exist_ok=True)
	subcategories = major_val["MinorCategories"][minor_key].get(
	"Subcategories", {}
	)
	for sub_key in subcategories:
	sub_dir = os.path.join(minor_dir, sub_key)
	os.makedirs(sub_dir, exist_ok=True)


	def move_files_from_Entrance(Entrance_path, base_path, structure):
	# Check if Entrance directory has any markdown files
	md_files = [f for f in os.listdir(Entrance_path) if f.endswith(".md")]
	if not md_files:
	print("No books to work on.")
	return

	files_moved = 0 # Counter for the number of files moved

	# Move each markdown file to its new location
	for file in md_files:
	new_path = determine_new_path(file, structure, base_path)
	source_path = os.path.join(Entrance_path, file)

	print(f"Trying to move: {source_path} to {new_path}")

	if new_path:
	print(f"Trying to move: {source_path} to {new_path}")
	shutil.move(source_path, new_path)
	print(f"Moved {file} to {new_path}")
	files_moved += 1

	if files_moved == 0:
	print("No books moved.")


	def determine_new_path(file_name, structure, base_path):
	# Remove file extension and split the filename into parts
	parts = file_name.replace(".md", "").split(" ")
	subcategory_code = parts[0]
	book_suffix = parts[1] if len(parts) > 1 else None

	print(f"Processing file: {file_name}")
	print(f"Subcategory code: {subcategory_code}, Book suffix: {book_suffix}")

	# Iterate through the JSON structure to find the matching path
	for major_key, major_val in structure["MajorCategories"].items():
	# Check if file matches a major category
	if subcategory_code == major_key:
	path = os.path.join(base_path, major_key, file_name)
	print(f"Matched major category. Path: {path}")
	return path

	for minor_key, minor_val in major_val.get("MinorCategories", {}).items():
	# Check if file matches a minor category
	if subcategory_code == minor_key:
	path = os.path.join(base_path, major_key, minor_key, file_name)
	print(f"Matched minor category. Path: {path}")
	return path

	for sub_key in minor_val.get("Subcategories", {}):
	# Check if file matches a subcategory or book within a subcategory
	if sub_key == subcategory_code or (
	book_suffix and f"{sub_key} {book_suffix}" == subcategory_code
	):
	sub_dir = os.path.join(base_path, major_key, minor_key, sub_key)
	path = os.path.join(sub_dir, file_name)
	print(f"Matched subcategory/book. Path: {path}")
	return path

	print(f"No matching path found for: {file_name}")
	return None


	# Function to add tags to Markdown files
	def add_tags_to_md_files(base_path, json_structure):
	print(f'start add_tags_to_md files')
	for root, dirs, files in os.walk(base_path):
	if "Entrance" not in root: # Skip processing if not in the Entrance directory
	continue
	for file in files:
	if file.endswith(".md"):
	file_path = os.path.join(root, file)
	try:
	with open(file_path, "r+", encoding="utf-8") as f:
	content = f.read()
	f.seek(0) # Go back to the start of the file

	new_tag = construct_tag(file, json_structure)
	print(f'new_tag: {new_tag}')

	if "---" in content and new_tag:
	parts = content.split("---", 2)
	if len(parts) == 3:
	header, middle, body = parts
	modified_middle = f"{middle}---\n{new_tag}\n"
	new_content = f"{header}---{modified_middle}{body}"
	f.write(new_content)
	f.truncate() # Remove the rest of the old content
	print(f"Added tag to {file}")
	else:
	print(f"No header found in {file}, skipping.")

	except UnicodeDecodeError as e:
	print(f"Error reading {file}: {e}")



	# Tag define
	def construct_tag(file_name, json_structure):
	# Regex patterns (make sure these accurately match your filenames)
	print("start cont+++++++++++++")
	print(f'file_name:{file_name}')

	major_regex = re.compile(r'^([0-9]{1}00)\.md$')
	minor_regex = re.compile(r'^([0-9]{1}[1-9][0-9])\.md$')
	subcategory_regex = re.compile(r'^([0-9]{1}[1-9][0-9])\.([0-9]{2})\.md$')
	book_regex = re.compile(r'^([0-9]{1}[1-9][0-9])\.([0-9]{2})\s([a-z])\.md$', re.IGNORECASE)


	major_code, minor_code, sub_code, book_code = "", "", "", ""


	if major_regex.match(file_name):
	major_code = major_regex.match(file_name).group(1)
	elif minor_regex.match(file_name):
	major_code = minor_regex.match(file_name).group(1)[:1] + '00'
	minor_code = minor_regex.match(file_name).group(1)
	elif subcategory_regex.match(file_name):
	major_code = subcategory_regex.match(file_name).group(1)[:1] + '00'
	minor_code = subcategory_regex.match(file_name).group(1)
	sub_code = subcategory_regex.match(file_name).group(2)
	elif book_regex.match(file_name):
	major_code = book_regex.match(file_name).group(1)[:1] + '00'
	minor_code = book_regex.match(file_name).group(1)
	sub_code = book_regex.match(file_name).group(2)
	book_code = book_regex.match(file_name).group(3)


	tag = ""

	if major_code:
	major_info = json_structure.get("MajorCategories", {}).get(major_code, {})
	tag += f"#[[{major_code}]]#{major_info.get('title', '').replace(' ', '_')}"
	if minor_code:
	minor_info = major_info.get("MinorCategories", {}).get(minor_code, {})
	tag += f"#[[{minor_code}]]#{minor_info.get('title', '').replace(' ', '_')}"
	if sub_code:
	sub_info = minor_info.get("Subcategories", {}).get(f"{minor_code}.{sub_code}", {})
	tag += f"#[[{minor_code}.{sub_code}]]#{sub_info.get('title', '').replace(' ', '_')}"
	if book_code:
	book_info = sub_info.get("Books", {}).get(f"{minor_code}.{sub_code} {book_code}", "")
	tag += f"#[[{minor_code}.{sub_code} {book_code}]]#{book_info.replace(' ', '_')}"


	print(major_code, minor_code , sub_code, book_code)


	return tag






	# Ensure that we are in the correct directory to prevent affecting other directories
	current_dir = os.getcwd()
	expected_dir_name = "Library"

	if expected_dir_name not in current_dir:
	print(
	f"Error: Current directory {current_dir} is not '{expected_dir_name}'. Exiting script."
	)
	exit()


	# Main execution
	base_directory = os.path.abspath(os.path.join(os.getcwd(), os.pardir))
	json_file_name = "structure.json"


	# start convert md
	md_file = "../Entrance/Call Number Index.md"
	json_file = "structure.json"

	print(f"Read from {md_file}")
	convert_md_to_json.md_to_json(md_file, json_file)
	print(f"Output json file is {json_file}")

	# end convert md

	json_structure = load_json_structure(json_file_name)
	Entrance_directory = os.path.join(base_directory, "Entrance")


	print(f"json_file_name: {json_file_name}")
	print(f"base_directory: {base_directory}")
	print(f"Entrance_directory: {Entrance_directory}")
	print("--------------------")

	create_directories(base_directory, json_structure)
	add_tags_to_md_files(Entrance_directory, json_structure)
	move_files_from_Entrance(
	Entrance_directory, base_directory, json_structure
	) # Added this line
	import re
	import yaml
	from pathlib import Path


	def extract_description_from_md(file_path):
	"""Extract description field from markdown file"""
	with file_path.open('r', encoding='utf-8') as md_file:
	content = md_file.read()
	metadata = re.search(r'^---\n(.*?)\n---', content, re.DOTALL)


	if metadata:
	extracted_metadata = metadata.group(1)
	# print(f"Extracted YAML:\n {md_file} {extracted_metadata}") # Diagnostic print
	try:
	data = yaml.safe_load(extracted_metadata)
	description = data.get('description', 'No description provided')
	# print(f"dscription: {description}") # Diagnostic print

	return description
	except yaml.YAMLError as e:
	print(f"YAML Error: {e}")
	return 'None'
	return 'None'

	def create_base_template(library_path, output_file, template_file_name='base_template.md'):
	"""Navigate the directory structure to extract the description and create base_template.md"""

	print(f'*************Start Create base template************* \nlibrary_path = {library_path} \noutput_file = {output_file} \nTEMPLATE_FILE_NAME = {template_file_name}')

	descriptions = {}
	pattern = re.compile(r'^\d{3}.*\.md$')

	for path in library_path.rglob('*.md'):
	if pattern.match(path.name):
	description = extract_description_from_md(path)
	if description:
	relative_path = path.relative_to(library_path)
	# Ensure descriptions use forward slashes in paths
	descriptions[relative_path.as_posix()] = description

	output_file.parent.mkdir(parents=True, exist_ok=True)

	with output_file.open('w', encoding='utf-8') as out_file:
	for posix_path, desc in descriptions.items():
	out_file.write(f'## {posix_path}\n description: {desc}\n\n')

	print(f'*************Finished!! Create base template ************* \nbase template : library_path = {library_path} \noutput_file = {output_file} \nTEMPLATE_FILE_NAME = {template_file_name}')