Created
March 19, 2024 03:56
-
-
Save murphybread/e99137ae5f1706bdc6a2ed4efdaace7d to your computer and use it in GitHub Desktop.
manage
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
import json | |
import shutil | |
import os | |
def md_to_json(md_file, json_file): | |
with open(md_file, "r") as file: | |
lines = file.readlines() | |
json_structure = {"MajorCategories": {}} | |
current_major = current_minor = current_sub = None | |
for i, line in enumerate(lines): | |
line = line.strip() # Remove leading and trailing whitespaces | |
if not line or line.startswith("---"): | |
continue # Skip empty lines and metadata lines | |
print(f"Processing line {i}: {line}") # Debug print | |
try: | |
# Match major, minor, subcategories, and book entries | |
major_match = re.match(r"- \[\[(\d0\d)\]\]\s*(.*)", line) | |
minor_match = re.match(r"- \[\[(\d[1-9]\d)\]\]\s*(.*)", line) | |
sub_match = re.match(r"- \[\[(\d{3}\.\d{2})\]\]\s*(.*)", line) | |
book_match = re.match(r"- \[\[(\d{3}\.\d{2} [a-zA-Z])\]\]\s*(.*)", line) | |
if major_match: | |
current_major, title = major_match.groups() | |
json_structure["MajorCategories"][current_major] = { | |
"value": current_major, | |
"title": title, | |
"MinorCategories": {}, | |
} | |
print(f"Major Category: {current_major}, Title: {title}") # Debug print | |
elif minor_match: | |
current_minor, title = minor_match.groups() | |
json_structure["MajorCategories"][current_major]["MinorCategories"][ | |
current_minor | |
] = {"title": title, "Subcategories": {}} | |
print(f"Minor Category: {current_minor}, Title: {title}") # Debug print | |
elif sub_match: | |
current_sub, title = sub_match.groups() | |
json_structure["MajorCategories"][current_major]["MinorCategories"][ | |
current_minor | |
]["Subcategories"][current_sub] = {"title": title, "Books": {}} | |
print(f"Subcategory: {current_sub}, Title: {title}") # Debug print | |
elif book_match: | |
book_code, book_title = book_match.groups() | |
json_structure["MajorCategories"][current_major]["MinorCategories"][ | |
current_minor | |
]["Subcategories"][current_sub]["Books"][book_code] = book_title | |
print(f"Book: {book_code}, Title: {book_title}") # Debug print | |
except Exception as e: | |
print(f"Error processing line {i}: '{line}'") | |
print(str(e)) | |
# Save JSON structure to file | |
temp_json_path = os.path.join(os.getcwd(), json_file) | |
with open(temp_json_path, "w") as outfile: | |
json.dump(json_structure, outfile, indent=4) | |
# Move the JSON file to the current directory (Manage) | |
current_dir_path = os.getcwd() | |
destination_path = os.path.join(current_dir_path, json_file) | |
shutil.move(temp_json_path, destination_path) | |
print(f"Moved {json_file} to {destination_path}") | |
# Usage example | |
md_file = "../Entrance/Call Number Index.md" | |
json_file = "structure.json" | |
print(f"Read from {md_file}") | |
md_to_json(md_file, json_file) | |
print(f"Output json file is {json_file}") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"MajorCategories": { | |
"000": { | |
"value": "000", | |
"title": "IT Knowledge", | |
"MinorCategories": { | |
"010": { | |
"title": "Develop Knowledge", | |
"Subcategories": { | |
"010.00": { | |
"title": "Develop Computer Science Knowledge", | |
"Books": { | |
"010.00 a": "Essential Developer Insights", | |
"010.00 b": "Industry Domain Knowledge" | |
} | |
}, | |
"010.10": { | |
"title": "Develop Programming Language", | |
"Books": { | |
"010.10 a": "Bash shell" | |
} | |
} | |
} | |
}, | |
"020": { | |
"title": "Operation Knowledge", | |
"Subcategories": { | |
"020.00": { | |
"title": "Versioning", | |
"Books": { | |
"020.00 a": "Sementic Verisioning", | |
"020.00 b": "headver", | |
"020.00 c": "Versioning Strategy" | |
} | |
}, | |
"020.10": { | |
"title": "Git", | |
"Books": { | |
"020.10 a": "git init", | |
"020.10 b": "git commands", | |
"020.10 c": "git structure", | |
"020.10 d": "git submodule", | |
"020.10 e": "git gist" | |
} | |
} | |
} | |
}, | |
"030": { | |
"title": "Package", | |
"Subcategories": { | |
"030.00": { | |
"title": "PIP", | |
"Books": { | |
"030.00 a": "pip name and module name", | |
"030.00 b": "pip packages pre option in requirements" | |
} | |
} | |
} | |
}, | |
"080": { | |
"title": "English for IT Workers", | |
"Subcategories": { | |
"080.00": { | |
"title": "Developer English", | |
"Books": {} | |
} | |
} | |
}, | |
"090": { | |
"title": "External Insights an Trends", | |
"Subcategories": { | |
"090.00": { | |
"title": "Industry Trends", | |
"Books": {} | |
}, | |
"090.10": { | |
"title": "Live Session", | |
"Books": { | |
"090.10 a": "Job Definetion" | |
} | |
} | |
} | |
} | |
} | |
}, | |
"100": { | |
"value": "100", | |
"title": "Infra", | |
"MinorCategories": { | |
"110": { | |
"title": "DevOps Engineer Infra", | |
"Subcategories": {} | |
}, | |
"120": { | |
"title": "ML Engineer Infra", | |
"Subcategories": {} | |
} | |
} | |
}, | |
"200": { | |
"value": "200", | |
"title": "DevOps", | |
"MinorCategories": { | |
"210": { | |
"title": "CICD", | |
"Subcategories": { | |
"210.00": { | |
"title": "DevOps Fundemental", | |
"Books": { | |
"210.00 a": "DevOps Backgrond" | |
} | |
}, | |
"210.10": { | |
"title": "DevOps Culture", | |
"Books": {} | |
}, | |
"210.20": { | |
"title": "DevOps Solutions", | |
"Books": { | |
"210.20 a": "GitLab" | |
} | |
} | |
} | |
} | |
} | |
}, | |
"300": { | |
"value": "300", | |
"title": "Applications", | |
"MinorCategories": { | |
"310": { | |
"title": "Development Environment", | |
"Subcategories": { | |
"310.00": { | |
"title": "IDE", | |
"Books": { | |
"310.00 a": "Terminal", | |
"310.00 b": "Colab" | |
} | |
}, | |
"310.10": { | |
"title": "Conda", | |
"Books": { | |
"310.10 a": "Conda base", | |
"310.10 b": "Conda License" | |
} | |
} | |
} | |
}, | |
"320": { | |
"title": "Frontend", | |
"Subcategories": { | |
"320.00": { | |
"title": "HTML", | |
"Books": {} | |
}, | |
"320.10": { | |
"title": "CSS", | |
"Books": { | |
"320.10 b": "Border Properties" | |
} | |
}, | |
"320.30": { | |
"title": "Java Script", | |
"Books": {} | |
} | |
} | |
}, | |
"330": { | |
"title": "Backend", | |
"Subcategories": {} | |
} | |
} | |
}, | |
"400": { | |
"value": "400", | |
"title": "ML Engineer Basic", | |
"MinorCategories": { | |
"410": { | |
"title": "Mathematics", | |
"Subcategories": { | |
"410.00": { | |
"title": "Linear Algebra", | |
"Books": { | |
"410.00 a": "Fundamental Function", | |
"410.00 b": "Vector", | |
"410.00 c": "Vectors Properties", | |
"410.00 d": "Vector Operation" | |
} | |
}, | |
"410.10": { | |
"title": "Probability", | |
"Books": { | |
"410.10 a": "Fundamental Function" | |
} | |
}, | |
"410.20": { | |
"title": "Statistics", | |
"Books": { | |
"410.20 a": "Probability Distribution and Estimation", | |
"410.20 b": "Maximum Likelihood Estimation(MLE) and Deep Learning" | |
} | |
}, | |
"410.30": { | |
"title": "Calculus", | |
"Books": { | |
"410.30 a": "Fundamental" | |
} | |
} | |
} | |
}, | |
"420": { | |
"title": "Data", | |
"Subcategories": { | |
"420.00": { | |
"title": "Structured Data", | |
"Books": { | |
"420.00 a": "Numerical Data (Continuous and Discrete)", | |
"420.00 b": "Categorical Data (Ordinal and Nominal)" | |
} | |
}, | |
"420.10": { | |
"title": "Unstructured Data", | |
"Books": {} | |
}, | |
"420.20": { | |
"title": "Semi-structured Data", | |
"Books": { | |
"420.20 a": "JSON", | |
"420.20 b": "YAML" | |
} | |
} | |
} | |
}, | |
"430": { | |
"title": "ML Development Tools", | |
"Subcategories": { | |
"430.00": { | |
"title": "LangChain", | |
"Books": { | |
"430.00 a": "Langchain Components" | |
} | |
}, | |
"430.10": { | |
"title": "Streamlit", | |
"Books": { | |
"430.10 a": "LLM RAG Application Using Langchain and FAISS" | |
} | |
} | |
} | |
}, | |
"440": { | |
"title": "ML Methodology", | |
"Subcategories": { | |
"440.00": { | |
"title": "EDA(Exploratory Data Analysis)", | |
"Books": { | |
"440.00 a": "Initiating Exploratory Data Analysis" | |
} | |
} | |
} | |
} | |
} | |
}, | |
"500": { | |
"value": "500", | |
"title": "ML and DL Modeling", | |
"MinorCategories": { | |
"510": { | |
"title": "Natural Language Processing (NLP)", | |
"Subcategories": {} | |
}, | |
"520": { | |
"title": "Computer Vision", | |
"Subcategories": { | |
"520.00": { | |
"title": "Introduction to CV", | |
"Books": {} | |
}, | |
"520.30": { | |
"title": "Generative Models in CV", | |
"Books": { | |
"520.30 a": "GANs", | |
"520.30 b": "CLIP", | |
"520.30 c": "Stable Diffusion" | |
} | |
} | |
} | |
} | |
} | |
}, | |
"600": { | |
"value": "600", | |
"title": "ML Libraries and Implementation", | |
"MinorCategories": { | |
"610": { | |
"title": "Data Handling", | |
"Subcategories": { | |
"610.00": { | |
"title": "Pandas", | |
"Books": { | |
"610.00 a": "Pandas-basic" | |
} | |
}, | |
"610.10": { | |
"title": "NumPy", | |
"Books": { | |
"610.10 a": "Numpy Fundamental functions", | |
"610.10 b": "Numpy Appllied function" | |
} | |
} | |
} | |
}, | |
"620": { | |
"title": "Data visualization", | |
"Subcategories": { | |
"620.00": { | |
"title": "Matplotlib", | |
"Books": { | |
"620.00 a": "Matplotlib Fundamental" | |
} | |
}, | |
"620.10": { | |
"title": "Seabornn", | |
"Books": { | |
"620.10 a": "Seaborn Fundamental" | |
} | |
} | |
} | |
}, | |
"630": { | |
"title": "Machine Learning Frameworks", | |
"Subcategories": { | |
"630.00": { | |
"title": "scikit-learn", | |
"Books": { | |
"630.00 a": "scikit-learn functions" | |
} | |
}, | |
"630.10": { | |
"title": "PyTorch", | |
"Books": {} | |
}, | |
"630.20": { | |
"title": "TensorFlow", | |
"Books": {} | |
}, | |
"630.30": { | |
"title": "Langchain", | |
"Books": { | |
"630.30 a": "Amazon Bedrock with RAG", | |
"630.30 b": "RAG with FAISS" | |
} | |
} | |
} | |
}, | |
"640": { | |
"title": "Vector Database", | |
"Subcategories": { | |
"640.00": { | |
"title": "", | |
"Books": { | |
"640.00 a": "Milvus" | |
} | |
} | |
} | |
} | |
} | |
}, | |
"700": { | |
"value": "700", | |
"title": "Research Paper", | |
"MinorCategories": { | |
"710": { | |
"title": "methodologysh", | |
"Subcategories": { | |
"710.00": { | |
"title": "Multi-agent Reinforcement Learning", | |
"Books": { | |
"710.00 a": "Base Domains", | |
"710.00 b": "Paper Review" | |
} | |
} | |
} | |
} | |
} | |
}, | |
"800": { | |
"value": "800", | |
"title": "test_major", | |
"MinorCategories": { | |
"810": { | |
"title": "test_minor", | |
"Subcategories": { | |
"810.00": { | |
"title": "test_sub", | |
"Books": { | |
"810.00 a": "test_book" | |
} | |
} | |
} | |
} | |
} | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment