Created
March 14, 2023 09:01
-
-
Save databyjp/45e6ff851cd52ebb540fd866f631ba29 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"id": "7ea59359-8129-4f04-97a7-8c17d86a6352", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import os\n", | |
"from pathlib import Path" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"id": "cc52d372-6d11-43f4-b202-d8397283d799", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from pathlib import Path\n", | |
"\n", | |
"# Set the directory to search for *.md and *.mdx files\n", | |
"search_dir = Path('../weaviate-io/developers/')\n", | |
"\n", | |
"# Use glob() method to find *.md and *.mdx files\n", | |
"md_paths = list()\n", | |
"for file_path in search_dir.glob('**/*.md*'):\n", | |
" md_paths.append(file_path)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"id": "89d7b4b7-3484-4f42-ab86-cce0c8cf4e4f", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import re\n", | |
"anchor_paths = set()\n", | |
"for i, md_path in enumerate(md_paths):\n", | |
" with open(md_paths[i], 'r') as f:\n", | |
" md_txt = f.read()\n", | |
"\n", | |
" # Define the regex pattern\n", | |
" # pattern = r'.mdx?\\#([a-z]*)\\-?'\n", | |
" pattern = r\"/[a-z\\-]+\\.md?#[a-z0-9\\-]+?(?=\\))\"\n", | |
" \n", | |
" # Use re.findall() to search for the pattern in the text\n", | |
" matches = re.findall(pattern, md_txt)\n", | |
"\n", | |
" # Print the matches\n", | |
" anchor_paths.update(matches)\n", | |
"\n", | |
"anchor_paths = sorted(anchor_paths)\n", | |
"# for p in anchor_paths:\n", | |
"# print(p)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "a63223bf-466b-41ff-83f8-bc4447c16e76", | |
"metadata": {}, | |
"source": [ | |
"## Check whether the file includes this link" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"id": "2148f6ae-7647-43f2-b561-d33691cafc39", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"anchor_tuples = list()\n", | |
"for p in anchor_paths:\n", | |
" anchor_tuples.append(p[1:].split(\"#\"))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"id": "876b61c4-21ac-4b24-bf02-e065bdbeabf3", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Testing RegEx patterns\n", | |
"\n", | |
"# tmp_txts = [\n", | |
"# \"### Module Capabilities: additional.go\",\n", | |
"# \"### Read & Write requests while a backup is running\",\n", | |
"# \"### S3 (AWS or S3-compatible)\"\n", | |
"# ]\n", | |
"# for tmp_txt in tmp_txts:\n", | |
"# print(re.sub(r'[^\\w\\s\\#\\-]', '', tmp_txt).lower())" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"id": "ca008ffa-6842-4258-874e-4218af179b7a", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"# read write requests while a backup is running ['backups.md', 'read--write-requests-while-a-backup-is-running']\n", | |
"# parameters 1 ['objects.md', 'parameters-1']\n" | |
] | |
} | |
], | |
"source": [ | |
"for p in anchor_tuples:\n", | |
" matched = False\n", | |
" for md_path in md_paths:\n", | |
" if md_path.name == p[0]:\n", | |
" with open(md_path.absolute(), \"r\") as f:\n", | |
" tmp_txt = f.read()\n", | |
" search_str = \"# \" + p[1].replace(\"-\", \" \")\n", | |
" tmp_txt = re.sub(r'[^\\w\\s\\#\\-]', '', tmp_txt).replace(\"-\", \" \")\n", | |
" if search_str in tmp_txt.lower():\n", | |
" matched = True\n", | |
" break\n", | |
" if not matched:\n", | |
" print(search_str, p)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "45e4084a-6f8a-4209-9984-cdec726fcfcb", | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3 (ipykernel)", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.10.8" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment