Skip to content

Instantly share code, notes, and snippets.

@murphybread
Created March 19, 2024 03:56
Show Gist options
  • Save murphybread/e99137ae5f1706bdc6a2ed4efdaace7d to your computer and use it in GitHub Desktop.
Save murphybread/e99137ae5f1706bdc6a2ed4efdaace7d to your computer and use it in GitHub Desktop.
manage
import re
import json
import shutil
import os
def md_to_json(md_file, json_file):
with open(md_file, "r") as file:
lines = file.readlines()
json_structure = {"MajorCategories": {}}
current_major = current_minor = current_sub = None
for i, line in enumerate(lines):
line = line.strip() # Remove leading and trailing whitespaces
if not line or line.startswith("---"):
continue # Skip empty lines and metadata lines
print(f"Processing line {i}: {line}") # Debug print
try:
# Match major, minor, subcategories, and book entries
major_match = re.match(r"- \[\[(\d0\d)\]\]\s*(.*)", line)
minor_match = re.match(r"- \[\[(\d[1-9]\d)\]\]\s*(.*)", line)
sub_match = re.match(r"- \[\[(\d{3}\.\d{2})\]\]\s*(.*)", line)
book_match = re.match(r"- \[\[(\d{3}\.\d{2} [a-zA-Z])\]\]\s*(.*)", line)
if major_match:
current_major, title = major_match.groups()
json_structure["MajorCategories"][current_major] = {
"value": current_major,
"title": title,
"MinorCategories": {},
}
print(f"Major Category: {current_major}, Title: {title}") # Debug print
elif minor_match:
current_minor, title = minor_match.groups()
json_structure["MajorCategories"][current_major]["MinorCategories"][
current_minor
] = {"title": title, "Subcategories": {}}
print(f"Minor Category: {current_minor}, Title: {title}") # Debug print
elif sub_match:
current_sub, title = sub_match.groups()
json_structure["MajorCategories"][current_major]["MinorCategories"][
current_minor
]["Subcategories"][current_sub] = {"title": title, "Books": {}}
print(f"Subcategory: {current_sub}, Title: {title}") # Debug print
elif book_match:
book_code, book_title = book_match.groups()
json_structure["MajorCategories"][current_major]["MinorCategories"][
current_minor
]["Subcategories"][current_sub]["Books"][book_code] = book_title
print(f"Book: {book_code}, Title: {book_title}") # Debug print
except Exception as e:
print(f"Error processing line {i}: '{line}'")
print(str(e))
# Save JSON structure to file
temp_json_path = os.path.join(os.getcwd(), json_file)
with open(temp_json_path, "w") as outfile:
json.dump(json_structure, outfile, indent=4)
# Move the JSON file to the current directory (Manage)
current_dir_path = os.getcwd()
destination_path = os.path.join(current_dir_path, json_file)
shutil.move(temp_json_path, destination_path)
print(f"Moved {json_file} to {destination_path}")
# Usage example
md_file = "../Entrance/Call Number Index.md"
json_file = "structure.json"
print(f"Read from {md_file}")
md_to_json(md_file, json_file)
print(f"Output json file is {json_file}")
{
"MajorCategories": {
"000": {
"value": "000",
"title": "IT Knowledge",
"MinorCategories": {
"010": {
"title": "Develop Knowledge",
"Subcategories": {
"010.00": {
"title": "Develop Computer Science Knowledge",
"Books": {
"010.00 a": "Essential Developer Insights",
"010.00 b": "Industry Domain Knowledge"
}
},
"010.10": {
"title": "Develop Programming Language",
"Books": {
"010.10 a": "Bash shell"
}
}
}
},
"020": {
"title": "Operation Knowledge",
"Subcategories": {
"020.00": {
"title": "Versioning",
"Books": {
"020.00 a": "Sementic Verisioning",
"020.00 b": "headver",
"020.00 c": "Versioning Strategy"
}
},
"020.10": {
"title": "Git",
"Books": {
"020.10 a": "git init",
"020.10 b": "git commands",
"020.10 c": "git structure",
"020.10 d": "git submodule",
"020.10 e": "git gist"
}
}
}
},
"030": {
"title": "Package",
"Subcategories": {
"030.00": {
"title": "PIP",
"Books": {
"030.00 a": "pip name and module name",
"030.00 b": "pip packages pre option in requirements"
}
}
}
},
"080": {
"title": "English for IT Workers",
"Subcategories": {
"080.00": {
"title": "Developer English",
"Books": {}
}
}
},
"090": {
"title": "External Insights an Trends",
"Subcategories": {
"090.00": {
"title": "Industry Trends",
"Books": {}
},
"090.10": {
"title": "Live Session",
"Books": {
"090.10 a": "Job Definetion"
}
}
}
}
}
},
"100": {
"value": "100",
"title": "Infra",
"MinorCategories": {
"110": {
"title": "DevOps Engineer Infra",
"Subcategories": {}
},
"120": {
"title": "ML Engineer Infra",
"Subcategories": {}
}
}
},
"200": {
"value": "200",
"title": "DevOps",
"MinorCategories": {
"210": {
"title": "CICD",
"Subcategories": {
"210.00": {
"title": "DevOps Fundemental",
"Books": {
"210.00 a": "DevOps Backgrond"
}
},
"210.10": {
"title": "DevOps Culture",
"Books": {}
},
"210.20": {
"title": "DevOps Solutions",
"Books": {
"210.20 a": "GitLab"
}
}
}
}
}
},
"300": {
"value": "300",
"title": "Applications",
"MinorCategories": {
"310": {
"title": "Development Environment",
"Subcategories": {
"310.00": {
"title": "IDE",
"Books": {
"310.00 a": "Terminal",
"310.00 b": "Colab"
}
},
"310.10": {
"title": "Conda",
"Books": {
"310.10 a": "Conda base",
"310.10 b": "Conda License"
}
}
}
},
"320": {
"title": "Frontend",
"Subcategories": {
"320.00": {
"title": "HTML",
"Books": {}
},
"320.10": {
"title": "CSS",
"Books": {
"320.10 b": "Border Properties"
}
},
"320.30": {
"title": "Java Script",
"Books": {}
}
}
},
"330": {
"title": "Backend",
"Subcategories": {}
}
}
},
"400": {
"value": "400",
"title": "ML Engineer Basic",
"MinorCategories": {
"410": {
"title": "Mathematics",
"Subcategories": {
"410.00": {
"title": "Linear Algebra",
"Books": {
"410.00 a": "Fundamental Function",
"410.00 b": "Vector",
"410.00 c": "Vectors Properties",
"410.00 d": "Vector Operation"
}
},
"410.10": {
"title": "Probability",
"Books": {
"410.10 a": "Fundamental Function"
}
},
"410.20": {
"title": "Statistics",
"Books": {
"410.20 a": "Probability Distribution and Estimation",
"410.20 b": "Maximum Likelihood Estimation(MLE) and Deep Learning"
}
},
"410.30": {
"title": "Calculus",
"Books": {
"410.30 a": "Fundamental"
}
}
}
},
"420": {
"title": "Data",
"Subcategories": {
"420.00": {
"title": "Structured Data",
"Books": {
"420.00 a": "Numerical Data (Continuous and Discrete)",
"420.00 b": "Categorical Data (Ordinal and Nominal)"
}
},
"420.10": {
"title": "Unstructured Data",
"Books": {}
},
"420.20": {
"title": "Semi-structured Data",
"Books": {
"420.20 a": "JSON",
"420.20 b": "YAML"
}
}
}
},
"430": {
"title": "ML Development Tools",
"Subcategories": {
"430.00": {
"title": "LangChain",
"Books": {
"430.00 a": "Langchain Components"
}
},
"430.10": {
"title": "Streamlit",
"Books": {
"430.10 a": "LLM RAG Application Using Langchain and FAISS"
}
}
}
},
"440": {
"title": "ML Methodology",
"Subcategories": {
"440.00": {
"title": "EDA(Exploratory Data Analysis)",
"Books": {
"440.00 a": "Initiating Exploratory Data Analysis"
}
}
}
}
}
},
"500": {
"value": "500",
"title": "ML and DL Modeling",
"MinorCategories": {
"510": {
"title": "Natural Language Processing (NLP)",
"Subcategories": {}
},
"520": {
"title": "Computer Vision",
"Subcategories": {
"520.00": {
"title": "Introduction to CV",
"Books": {}
},
"520.30": {
"title": "Generative Models in CV",
"Books": {
"520.30 a": "GANs",
"520.30 b": "CLIP",
"520.30 c": "Stable Diffusion"
}
}
}
}
}
},
"600": {
"value": "600",
"title": "ML Libraries and Implementation",
"MinorCategories": {
"610": {
"title": "Data Handling",
"Subcategories": {
"610.00": {
"title": "Pandas",
"Books": {
"610.00 a": "Pandas-basic"
}
},
"610.10": {
"title": "NumPy",
"Books": {
"610.10 a": "Numpy Fundamental functions",
"610.10 b": "Numpy Appllied function"
}
}
}
},
"620": {
"title": "Data visualization",
"Subcategories": {
"620.00": {
"title": "Matplotlib",
"Books": {
"620.00 a": "Matplotlib Fundamental"
}
},
"620.10": {
"title": "Seabornn",
"Books": {
"620.10 a": "Seaborn Fundamental"
}
}
}
},
"630": {
"title": "Machine Learning Frameworks",
"Subcategories": {
"630.00": {
"title": "scikit-learn",
"Books": {
"630.00 a": "scikit-learn functions"
}
},
"630.10": {
"title": "PyTorch",
"Books": {}
},
"630.20": {
"title": "TensorFlow",
"Books": {}
},
"630.30": {
"title": "Langchain",
"Books": {
"630.30 a": "Amazon Bedrock with RAG",
"630.30 b": "RAG with FAISS"
}
}
}
},
"640": {
"title": "Vector Database",
"Subcategories": {
"640.00": {
"title": "",
"Books": {
"640.00 a": "Milvus"
}
}
}
}
}
},
"700": {
"value": "700",
"title": "Research Paper",
"MinorCategories": {
"710": {
"title": "methodologysh",
"Subcategories": {
"710.00": {
"title": "Multi-agent Reinforcement Learning",
"Books": {
"710.00 a": "Base Domains",
"710.00 b": "Paper Review"
}
}
}
}
}
},
"800": {
"value": "800",
"title": "test_major",
"MinorCategories": {
"810": {
"title": "test_minor",
"Subcategories": {
"810.00": {
"title": "test_sub",
"Books": {
"810.00 a": "test_book"
}
}
}
}
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment