Skip to content

Instantly share code, notes, and snippets.

@murphybread
Last active March 31, 2024 09:16
Show Gist options
  • Save murphybread/a0a351e489cde4b2064fcd3a7855d885 to your computer and use it in GitHub Desktop.
Save murphybread/a0a351e489cde4b2064fcd3a7855d885 to your computer and use it in GitHub Desktop.
python files for manage Library(my own web platform)
import os
import json
import shutil
import re
import convert_md_to_json
# Load the JSON file containing the directory structure
def load_json_structure(file_path):
with open(file_path, "r") as file:
return json.load(file)
# Create directories based on the JSON structure
def create_directories(base_path, structure):
for major_key, major_val in structure["MajorCategories"].items():
major_dir = os.path.join(base_path, major_key)
os.makedirs(major_dir, exist_ok=True)
for minor_key in major_val.get("MinorCategories", {}):
minor_dir = os.path.join(major_dir, minor_key)
os.makedirs(minor_dir, exist_ok=True)
subcategories = major_val["MinorCategories"][minor_key].get(
"Subcategories", {}
)
for sub_key in subcategories:
sub_dir = os.path.join(minor_dir, sub_key)
os.makedirs(sub_dir, exist_ok=True)
def move_files_from_Entrance(Entrance_path, base_path, structure):
# Check if Entrance directory has any markdown files
md_files = [f for f in os.listdir(Entrance_path) if f.endswith(".md")]
if not md_files:
print("No books to work on.")
return
files_moved = 0 # Counter for the number of files moved
# Move each markdown file to its new location
for file in md_files:
new_path = determine_new_path(file, structure, base_path)
source_path = os.path.join(Entrance_path, file)
print(f"Trying to move: {source_path} to {new_path}")
if new_path:
print(f"Trying to move: {source_path} to {new_path}")
shutil.move(source_path, new_path)
print(f"Moved {file} to {new_path}")
files_moved += 1
if files_moved == 0:
print("No books moved.")
def determine_new_path(file_name, structure, base_path):
# Remove file extension and split the filename into parts
parts = file_name.replace(".md", "").split(" ")
subcategory_code = parts[0]
book_suffix = parts[1] if len(parts) > 1 else None
print(f"Processing file: {file_name}")
print(f"Subcategory code: {subcategory_code}, Book suffix: {book_suffix}")
# Iterate through the JSON structure to find the matching path
for major_key, major_val in structure["MajorCategories"].items():
# Check if file matches a major category
if subcategory_code == major_key:
path = os.path.join(base_path, major_key, file_name)
print(f"Matched major category. Path: {path}")
return path
for minor_key, minor_val in major_val.get("MinorCategories", {}).items():
# Check if file matches a minor category
if subcategory_code == minor_key:
path = os.path.join(base_path, major_key, minor_key, file_name)
print(f"Matched minor category. Path: {path}")
return path
for sub_key in minor_val.get("Subcategories", {}):
# Check if file matches a subcategory or book within a subcategory
if sub_key == subcategory_code or (
book_suffix and f"{sub_key} {book_suffix}" == subcategory_code
):
sub_dir = os.path.join(base_path, major_key, minor_key, sub_key)
path = os.path.join(sub_dir, file_name)
print(f"Matched subcategory/book. Path: {path}")
return path
print(f"No matching path found for: {file_name}")
return None
# Function to add tags to Markdown files
def add_tags_to_md_files(base_path, json_structure):
print(f'start add_tags_to_md files')
for root, dirs, files in os.walk(base_path):
if "Entrance" not in root: # Skip processing if not in the Entrance directory
continue
for file in files:
if file.endswith(".md"):
file_path = os.path.join(root, file)
try:
with open(file_path, "r+", encoding="utf-8") as f:
content = f.read()
f.seek(0) # Go back to the start of the file
new_tag = construct_tag(file, json_structure)
print(f'new_tag: {new_tag}')
if "---" in content and new_tag:
parts = content.split("---", 2)
if len(parts) == 3:
header, middle, body = parts
modified_middle = f"{middle}---\n{new_tag}\n"
new_content = f"{header}---{modified_middle}{body}"
f.write(new_content)
f.truncate() # Remove the rest of the old content
print(f"Added tag to {file}")
else:
print(f"No header found in {file}, skipping.")
except UnicodeDecodeError as e:
print(f"Error reading {file}: {e}")
# Tag define
def construct_tag(file_name, json_structure):
# Regex patterns (make sure these accurately match your filenames)
print("start cont+++++++++++++")
print(f'file_name:{file_name}')
major_regex = re.compile(r'^([0-9]{1}00)\.md$')
minor_regex = re.compile(r'^([0-9]{1}[1-9][0-9])\.md$')
subcategory_regex = re.compile(r'^([0-9]{1}[1-9][0-9])\.([0-9]{2})\.md$')
book_regex = re.compile(r'^([0-9]{1}[1-9][0-9])\.([0-9]{2})\s([a-z])\.md$', re.IGNORECASE)
major_code, minor_code, sub_code, book_code = "", "", "", ""
if major_regex.match(file_name):
major_code = major_regex.match(file_name).group(1)
elif minor_regex.match(file_name):
major_code = minor_regex.match(file_name).group(1)[:1] + '00'
minor_code = minor_regex.match(file_name).group(1)
elif subcategory_regex.match(file_name):
major_code = subcategory_regex.match(file_name).group(1)[:1] + '00'
minor_code = subcategory_regex.match(file_name).group(1)
sub_code = subcategory_regex.match(file_name).group(2)
elif book_regex.match(file_name):
major_code = book_regex.match(file_name).group(1)[:1] + '00'
minor_code = book_regex.match(file_name).group(1)
sub_code = book_regex.match(file_name).group(2)
book_code = book_regex.match(file_name).group(3)
tag = ""
if major_code:
major_info = json_structure.get("MajorCategories", {}).get(major_code, {})
tag += f"#[[{major_code}]]#{major_info.get('title', '').replace(' ', '_')}"
if minor_code:
minor_info = major_info.get("MinorCategories", {}).get(minor_code, {})
tag += f"#[[{minor_code}]]#{minor_info.get('title', '').replace(' ', '_')}"
if sub_code:
sub_info = minor_info.get("Subcategories", {}).get(f"{minor_code}.{sub_code}", {})
tag += f"#[[{minor_code}.{sub_code}]]#{sub_info.get('title', '').replace(' ', '_')}"
if book_code:
book_info = sub_info.get("Books", {}).get(f"{minor_code}.{sub_code} {book_code}", "")
tag += f"#[[{minor_code}.{sub_code} {book_code}]]#{book_info.replace(' ', '_')}"
print(major_code, minor_code , sub_code, book_code)
return tag
# Ensure that we are in the correct directory to prevent affecting other directories
current_dir = os.getcwd()
expected_dir_name = "Library"
if expected_dir_name not in current_dir:
print(
f"Error: Current directory {current_dir} is not '{expected_dir_name}'. Exiting script."
)
exit()
# Main execution
base_directory = os.path.abspath(os.path.join(os.getcwd(), os.pardir))
json_file_name = "structure.json"
# start convert md
md_file = "../Entrance/Call Number Index.md"
json_file = "structure.json"
print(f"Read from {md_file}")
convert_md_to_json.md_to_json(md_file, json_file)
print(f"Output json file is {json_file}")
# end convert md
json_structure = load_json_structure(json_file_name)
Entrance_directory = os.path.join(base_directory, "Entrance")
print(f"json_file_name: {json_file_name}")
print(f"base_directory: {base_directory}")
print(f"Entrance_directory: {Entrance_directory}")
print("*--------------------*")
create_directories(base_directory, json_structure)
add_tags_to_md_files(Entrance_directory, json_structure)
move_files_from_Entrance(
Entrance_directory, base_directory, json_structure
) # Added this line
import re
import json
import shutil
import os
def md_to_json(md_file, json_file):
with open(md_file, "r") as file:
lines = file.readlines()
json_structure = {"MajorCategories": {}}
current_major = current_minor = current_sub = None
for i, line in enumerate(lines):
line = line.strip() # Remove leading and trailing whitespaces
if not line or line.startswith("---"):
continue # Skip empty lines and metadata lines
print(f"Processing line {i}: {line}") # Debug print
try:
# Match major, minor, subcategories, and book entries
major_match = re.match(r"- \[\[(\d0\d)\]\]\s*(.*)", line)
minor_match = re.match(r"- \[\[(\d[1-9]\d)\]\]\s*(.*)", line)
sub_match = re.match(r"- \[\[(\d{3}\.\d{2})\]\]\s*(.*)", line)
book_match = re.match(r"- \[\[(\d{3}\.\d{2} [a-zA-Z])\]\]\s*(.*)", line)
if major_match:
current_major, title = major_match.groups()
json_structure["MajorCategories"][current_major] = {
"value": current_major,
"title": title,
"MinorCategories": {},
}
print(f"Major Category: {current_major}, Title: {title}") # Debug print
elif minor_match:
current_minor, title = minor_match.groups()
json_structure["MajorCategories"][current_major]["MinorCategories"][
current_minor
] = {"title": title, "Subcategories": {}}
print(f"Minor Category: {current_minor}, Title: {title}") # Debug print
elif sub_match:
current_sub, title = sub_match.groups()
json_structure["MajorCategories"][current_major]["MinorCategories"][
current_minor
]["Subcategories"][current_sub] = {"title": title, "Books": {}}
print(f"Subcategory: {current_sub}, Title: {title}") # Debug print
elif book_match:
book_code, book_title = book_match.groups()
json_structure["MajorCategories"][current_major]["MinorCategories"][
current_minor
]["Subcategories"][current_sub]["Books"][book_code] = book_title
print(f"Book: {book_code}, Title: {book_title}") # Debug print
except Exception as e:
print(f"Error processing line {i}: '{line}'")
print(str(e))
# Save JSON structure to file
temp_json_path = os.path.join(os.getcwd(), json_file)
with open(temp_json_path, "w") as outfile:
json.dump(json_structure, outfile, indent=4)
# Move the JSON file to the current directory (Manage)
current_dir_path = os.getcwd()
destination_path = os.path.join(current_dir_path, json_file)
shutil.move(temp_json_path, destination_path)
print(f"Moved {json_file} to {destination_path}")
# Usage example
md_file = "../Entrance/Call Number Index.md"
json_file = "structure.json"
print(f"Read from {md_file}")
md_to_json(md_file, json_file)
print(f"Output json file is {json_file}")
import re
import yaml
from pathlib import Path
def extract_description_from_md(file_path):
"""Extract description field from markdown file"""
with file_path.open('r', encoding='utf-8') as md_file:
content = md_file.read()
metadata = re.search(r'^---\n(.*?)\n---', content, re.DOTALL)
if metadata:
extracted_metadata = metadata.group(1)
# print(f"Extracted YAML:\n {md_file} {extracted_metadata}") # Diagnostic print
try:
data = yaml.safe_load(extracted_metadata)
description = data.get('description', 'No description provided')
# print(f"dscription: {description}") # Diagnostic print
return description
except yaml.YAMLError as e:
print(f"YAML Error: {e}")
return 'None'
return 'None'
def create_base_template(library_path, output_file, template_file_name='base_template.md'):
"""Navigate the directory structure to extract the description and create base_template.md"""
print(f'***************Start Create base template*************** \nlibrary_path = {library_path} \noutput_file = {output_file} \nTEMPLATE_FILE_NAME = {template_file_name}')
descriptions = {}
pattern = re.compile(r'^\d{3}.*\.md$')
for path in library_path.rglob('*.md'):
if pattern.match(path.name):
description = extract_description_from_md(path)
if description:
relative_path = path.relative_to(library_path)
# Ensure descriptions use forward slashes in paths
descriptions[relative_path.as_posix()] = description
output_file.parent.mkdir(parents=True, exist_ok=True)
with output_file.open('w', encoding='utf-8') as out_file:
for posix_path, desc in descriptions.items():
out_file.write(f'## {posix_path}\n description: {desc}\n\n')
print(f'***************Finished!! Create base template *************** \nbase template : library_path = {library_path} \noutput_file = {output_file} \nTEMPLATE_FILE_NAME = {template_file_name}')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment