Skip to content

Instantly share code, notes, and snippets.

@a-berg
Created March 18, 2023 13:36
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save a-berg/570c08f0402899e140ffecbf786f1c25 to your computer and use it in GitHub Desktop.
Save a-berg/570c08f0402899e140ffecbf786f1c25 to your computer and use it in GitHub Desktop.
directory crawler & dirtree formatter, generated by GPT-4
import argparse
from pathlib import Path
def crawl_directory(directory, level=None, alias=None):
def _crawl_directory(dir_path, curr_level):
if level is not None and curr_level > level:
return
nested_dict = {}
for item in dir_path.iterdir():
if item.is_file():
nested_dict[item.name] = None
elif item.is_dir():
nested_dict[item.name] = _crawl_directory(item, curr_level + 1)
return nested_dict
if alias is None:
alias = directory.name
return {alias: _crawl_directory(directory, 1)}
def to_dirtree_format(nested_dict):
def _to_dirtree_format(nested_dict, depth=1):
dirtree_str = ""
for key, value in nested_dict.items():
dirtree_str += f".{depth} {key}.\n"
if value is None: # It's a file
pass
else: # It's a directory
dirtree_str += _to_dirtree_format(value, depth + 1)
return dirtree_str
dirtree_str = _to_dirtree_format(nested_dict).replace("_", r"\_")
return "\\dirtree{%\n" + dirtree_str + "}"
def main():
parser = argparse.ArgumentParser(
description="Recursively crawl a directory and create a nested dictionary representing the file and folder structure"
)
parser.add_argument("directory", type=Path, help="The directory to crawl")
parser.add_argument(
"--level", "-L", type=int, default=None, help="The maximum depth of recursion"
)
parser.add_argument(
"--alias", "-a", type=str, default=None, help="An alias for the input directory"
)
parser.add_argument(
"--to-dirtree",
"-D",
action="store_true",
default=False,
help="formats the output to LaTeX dirtree instead of JSON.",
)
args = parser.parse_args()
result = crawl_directory(args.directory, args.level, args.alias)
result = result if not args.to_dirtree else to_dirtree_format(result)
print(result)
if __name__ == "__main__":
main()
You have to write a python script that:
1. Recursively crawls a directory, creating a nested dictionary representing the file and folder structure, with the first key being said directory.
2. The recursion could have a limit, like the standard `tree` bash command.
3. Using a recursive funcion is not a must, you could use a `while` loop if it helps performance and readability, or be creative with `dict` merging and splitting paths.
3. accepts a directory as an input
4. accepts a level optional argument to limit
5. accepts an optional alias for the input directory
6. uses pathlib for path related functions and argparse for argument parsing
Example: for a folder named "myfolder/" with the following structure:
notes/
├── bibliography.bibtex
├── clean_arch.md
├── dvc_cml.md
└── includes
├── tree_l1.tex
└── tree_l2.tex
the expected output is:
{
"notes": {
"bibliography.bibtex": None,
"clean_arch.md": None,
"dvc_cml.md": None,
"includes": {
"tree_l1.tex": None,
"tree_l2.tex": None,
}
}
}
Revise your work for potential bugs before submitting. Write tests in a separate script to check it too.
import unittest
from pathlib import Path
from directory_crawler import crawl_directory
class TestCrawlDirectory(unittest.TestCase):
def setUp(self):
self.test_dir = Path("mydir") #FIXME: add the complete structure so the test is viable.
def test_no_limit(self):
expected_output = {
"mydir": {
"fileA": None,
"fileB": None,
"fileC": None,
"folder1": {
"file1": None,
"file2": None,
}
}
}
self.assertEqual(crawl_directory(self.test_dir), expected_output)
def test_limit_1(self):
expected_output = {
"mydir": {
"fileA": None,
"fileB": None,
"fileC": None,
"folder1": None,
}
}
self.assertEqual(crawl_directory(self.test_dir, level=1), expected_output)
def test_alias(self):
expected_output = {
"my_dir": {
"fileA": None,
"fileB": None,
"fileC": None,
"folder1": {
"file1": None,
"file2": None,
}
}
}
self.assertEqual(crawl_directory(self.test_dir, alias="my_dir"), expected_output)
if __name__ == "__main__":
unittest.main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment