Last active
March 31, 2024 09:16
-
-
Save murphybread/a0a351e489cde4b2064fcd3a7855d885 to your computer and use it in GitHub Desktop.
python files for manage Library(my own web platform)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import json | |
import shutil | |
import re | |
import convert_md_to_json | |
# Load the JSON file containing the directory structure | |
def load_json_structure(file_path): | |
with open(file_path, "r") as file: | |
return json.load(file) | |
# Create directories based on the JSON structure | |
def create_directories(base_path, structure): | |
for major_key, major_val in structure["MajorCategories"].items(): | |
major_dir = os.path.join(base_path, major_key) | |
os.makedirs(major_dir, exist_ok=True) | |
for minor_key in major_val.get("MinorCategories", {}): | |
minor_dir = os.path.join(major_dir, minor_key) | |
os.makedirs(minor_dir, exist_ok=True) | |
subcategories = major_val["MinorCategories"][minor_key].get( | |
"Subcategories", {} | |
) | |
for sub_key in subcategories: | |
sub_dir = os.path.join(minor_dir, sub_key) | |
os.makedirs(sub_dir, exist_ok=True) | |
def move_files_from_Entrance(Entrance_path, base_path, structure): | |
# Check if Entrance directory has any markdown files | |
md_files = [f for f in os.listdir(Entrance_path) if f.endswith(".md")] | |
if not md_files: | |
print("No books to work on.") | |
return | |
files_moved = 0 # Counter for the number of files moved | |
# Move each markdown file to its new location | |
for file in md_files: | |
new_path = determine_new_path(file, structure, base_path) | |
source_path = os.path.join(Entrance_path, file) | |
print(f"Trying to move: {source_path} to {new_path}") | |
if new_path: | |
print(f"Trying to move: {source_path} to {new_path}") | |
shutil.move(source_path, new_path) | |
print(f"Moved {file} to {new_path}") | |
files_moved += 1 | |
if files_moved == 0: | |
print("No books moved.") | |
def determine_new_path(file_name, structure, base_path): | |
# Remove file extension and split the filename into parts | |
parts = file_name.replace(".md", "").split(" ") | |
subcategory_code = parts[0] | |
book_suffix = parts[1] if len(parts) > 1 else None | |
print(f"Processing file: {file_name}") | |
print(f"Subcategory code: {subcategory_code}, Book suffix: {book_suffix}") | |
# Iterate through the JSON structure to find the matching path | |
for major_key, major_val in structure["MajorCategories"].items(): | |
# Check if file matches a major category | |
if subcategory_code == major_key: | |
path = os.path.join(base_path, major_key, file_name) | |
print(f"Matched major category. Path: {path}") | |
return path | |
for minor_key, minor_val in major_val.get("MinorCategories", {}).items(): | |
# Check if file matches a minor category | |
if subcategory_code == minor_key: | |
path = os.path.join(base_path, major_key, minor_key, file_name) | |
print(f"Matched minor category. Path: {path}") | |
return path | |
for sub_key in minor_val.get("Subcategories", {}): | |
# Check if file matches a subcategory or book within a subcategory | |
if sub_key == subcategory_code or ( | |
book_suffix and f"{sub_key} {book_suffix}" == subcategory_code | |
): | |
sub_dir = os.path.join(base_path, major_key, minor_key, sub_key) | |
path = os.path.join(sub_dir, file_name) | |
print(f"Matched subcategory/book. Path: {path}") | |
return path | |
print(f"No matching path found for: {file_name}") | |
return None | |
# Function to add tags to Markdown files | |
def add_tags_to_md_files(base_path, json_structure): | |
print(f'start add_tags_to_md files') | |
for root, dirs, files in os.walk(base_path): | |
if "Entrance" not in root: # Skip processing if not in the Entrance directory | |
continue | |
for file in files: | |
if file.endswith(".md"): | |
file_path = os.path.join(root, file) | |
try: | |
with open(file_path, "r+", encoding="utf-8") as f: | |
content = f.read() | |
f.seek(0) # Go back to the start of the file | |
new_tag = construct_tag(file, json_structure) | |
print(f'new_tag: {new_tag}') | |
if "---" in content and new_tag: | |
parts = content.split("---", 2) | |
if len(parts) == 3: | |
header, middle, body = parts | |
modified_middle = f"{middle}---\n{new_tag}\n" | |
new_content = f"{header}---{modified_middle}{body}" | |
f.write(new_content) | |
f.truncate() # Remove the rest of the old content | |
print(f"Added tag to {file}") | |
else: | |
print(f"No header found in {file}, skipping.") | |
except UnicodeDecodeError as e: | |
print(f"Error reading {file}: {e}") | |
# Tag define | |
def construct_tag(file_name, json_structure): | |
# Regex patterns (make sure these accurately match your filenames) | |
print("start cont+++++++++++++") | |
print(f'file_name:{file_name}') | |
major_regex = re.compile(r'^([0-9]{1}00)\.md$') | |
minor_regex = re.compile(r'^([0-9]{1}[1-9][0-9])\.md$') | |
subcategory_regex = re.compile(r'^([0-9]{1}[1-9][0-9])\.([0-9]{2})\.md$') | |
book_regex = re.compile(r'^([0-9]{1}[1-9][0-9])\.([0-9]{2})\s([a-z])\.md$', re.IGNORECASE) | |
major_code, minor_code, sub_code, book_code = "", "", "", "" | |
if major_regex.match(file_name): | |
major_code = major_regex.match(file_name).group(1) | |
elif minor_regex.match(file_name): | |
major_code = minor_regex.match(file_name).group(1)[:1] + '00' | |
minor_code = minor_regex.match(file_name).group(1) | |
elif subcategory_regex.match(file_name): | |
major_code = subcategory_regex.match(file_name).group(1)[:1] + '00' | |
minor_code = subcategory_regex.match(file_name).group(1) | |
sub_code = subcategory_regex.match(file_name).group(2) | |
elif book_regex.match(file_name): | |
major_code = book_regex.match(file_name).group(1)[:1] + '00' | |
minor_code = book_regex.match(file_name).group(1) | |
sub_code = book_regex.match(file_name).group(2) | |
book_code = book_regex.match(file_name).group(3) | |
tag = "" | |
if major_code: | |
major_info = json_structure.get("MajorCategories", {}).get(major_code, {}) | |
tag += f"#[[{major_code}]]#{major_info.get('title', '').replace(' ', '_')}" | |
if minor_code: | |
minor_info = major_info.get("MinorCategories", {}).get(minor_code, {}) | |
tag += f"#[[{minor_code}]]#{minor_info.get('title', '').replace(' ', '_')}" | |
if sub_code: | |
sub_info = minor_info.get("Subcategories", {}).get(f"{minor_code}.{sub_code}", {}) | |
tag += f"#[[{minor_code}.{sub_code}]]#{sub_info.get('title', '').replace(' ', '_')}" | |
if book_code: | |
book_info = sub_info.get("Books", {}).get(f"{minor_code}.{sub_code} {book_code}", "") | |
tag += f"#[[{minor_code}.{sub_code} {book_code}]]#{book_info.replace(' ', '_')}" | |
print(major_code, minor_code , sub_code, book_code) | |
return tag | |
# Ensure that we are in the correct directory to prevent affecting other directories | |
current_dir = os.getcwd() | |
expected_dir_name = "Library" | |
if expected_dir_name not in current_dir: | |
print( | |
f"Error: Current directory {current_dir} is not '{expected_dir_name}'. Exiting script." | |
) | |
exit() | |
# Main execution | |
base_directory = os.path.abspath(os.path.join(os.getcwd(), os.pardir)) | |
json_file_name = "structure.json" | |
# start convert md | |
md_file = "../Entrance/Call Number Index.md" | |
json_file = "structure.json" | |
print(f"Read from {md_file}") | |
convert_md_to_json.md_to_json(md_file, json_file) | |
print(f"Output json file is {json_file}") | |
# end convert md | |
json_structure = load_json_structure(json_file_name) | |
Entrance_directory = os.path.join(base_directory, "Entrance") | |
print(f"json_file_name: {json_file_name}") | |
print(f"base_directory: {base_directory}") | |
print(f"Entrance_directory: {Entrance_directory}") | |
print("*--------------------*") | |
create_directories(base_directory, json_structure) | |
add_tags_to_md_files(Entrance_directory, json_structure) | |
move_files_from_Entrance( | |
Entrance_directory, base_directory, json_structure | |
) # Added this line | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
import json | |
import shutil | |
import os | |
def md_to_json(md_file, json_file): | |
with open(md_file, "r") as file: | |
lines = file.readlines() | |
json_structure = {"MajorCategories": {}} | |
current_major = current_minor = current_sub = None | |
for i, line in enumerate(lines): | |
line = line.strip() # Remove leading and trailing whitespaces | |
if not line or line.startswith("---"): | |
continue # Skip empty lines and metadata lines | |
print(f"Processing line {i}: {line}") # Debug print | |
try: | |
# Match major, minor, subcategories, and book entries | |
major_match = re.match(r"- \[\[(\d0\d)\]\]\s*(.*)", line) | |
minor_match = re.match(r"- \[\[(\d[1-9]\d)\]\]\s*(.*)", line) | |
sub_match = re.match(r"- \[\[(\d{3}\.\d{2})\]\]\s*(.*)", line) | |
book_match = re.match(r"- \[\[(\d{3}\.\d{2} [a-zA-Z])\]\]\s*(.*)", line) | |
if major_match: | |
current_major, title = major_match.groups() | |
json_structure["MajorCategories"][current_major] = { | |
"value": current_major, | |
"title": title, | |
"MinorCategories": {}, | |
} | |
print(f"Major Category: {current_major}, Title: {title}") # Debug print | |
elif minor_match: | |
current_minor, title = minor_match.groups() | |
json_structure["MajorCategories"][current_major]["MinorCategories"][ | |
current_minor | |
] = {"title": title, "Subcategories": {}} | |
print(f"Minor Category: {current_minor}, Title: {title}") # Debug print | |
elif sub_match: | |
current_sub, title = sub_match.groups() | |
json_structure["MajorCategories"][current_major]["MinorCategories"][ | |
current_minor | |
]["Subcategories"][current_sub] = {"title": title, "Books": {}} | |
print(f"Subcategory: {current_sub}, Title: {title}") # Debug print | |
elif book_match: | |
book_code, book_title = book_match.groups() | |
json_structure["MajorCategories"][current_major]["MinorCategories"][ | |
current_minor | |
]["Subcategories"][current_sub]["Books"][book_code] = book_title | |
print(f"Book: {book_code}, Title: {book_title}") # Debug print | |
except Exception as e: | |
print(f"Error processing line {i}: '{line}'") | |
print(str(e)) | |
# Save JSON structure to file | |
temp_json_path = os.path.join(os.getcwd(), json_file) | |
with open(temp_json_path, "w") as outfile: | |
json.dump(json_structure, outfile, indent=4) | |
# Move the JSON file to the current directory (Manage) | |
current_dir_path = os.getcwd() | |
destination_path = os.path.join(current_dir_path, json_file) | |
shutil.move(temp_json_path, destination_path) | |
print(f"Moved {json_file} to {destination_path}") | |
# Usage example | |
md_file = "../Entrance/Call Number Index.md" | |
json_file = "structure.json" | |
print(f"Read from {md_file}") | |
md_to_json(md_file, json_file) | |
print(f"Output json file is {json_file}") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
import yaml | |
from pathlib import Path | |
def extract_description_from_md(file_path): | |
"""Extract description field from markdown file""" | |
with file_path.open('r', encoding='utf-8') as md_file: | |
content = md_file.read() | |
metadata = re.search(r'^---\n(.*?)\n---', content, re.DOTALL) | |
if metadata: | |
extracted_metadata = metadata.group(1) | |
# print(f"Extracted YAML:\n {md_file} {extracted_metadata}") # Diagnostic print | |
try: | |
data = yaml.safe_load(extracted_metadata) | |
description = data.get('description', 'No description provided') | |
# print(f"dscription: {description}") # Diagnostic print | |
return description | |
except yaml.YAMLError as e: | |
print(f"YAML Error: {e}") | |
return 'None' | |
return 'None' | |
def create_base_template(library_path, output_file, template_file_name='base_template.md'): | |
"""Navigate the directory structure to extract the description and create base_template.md""" | |
print(f'***************Start Create base template*************** \nlibrary_path = {library_path} \noutput_file = {output_file} \nTEMPLATE_FILE_NAME = {template_file_name}') | |
descriptions = {} | |
pattern = re.compile(r'^\d{3}.*\.md$') | |
for path in library_path.rglob('*.md'): | |
if pattern.match(path.name): | |
description = extract_description_from_md(path) | |
if description: | |
relative_path = path.relative_to(library_path) | |
# Ensure descriptions use forward slashes in paths | |
descriptions[relative_path.as_posix()] = description | |
output_file.parent.mkdir(parents=True, exist_ok=True) | |
with output_file.open('w', encoding='utf-8') as out_file: | |
for posix_path, desc in descriptions.items(): | |
out_file.write(f'## {posix_path}\n description: {desc}\n\n') | |
print(f'***************Finished!! Create base template *************** \nbase template : library_path = {library_path} \noutput_file = {output_file} \nTEMPLATE_FILE_NAME = {template_file_name}') | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment