Skip to content

Instantly share code, notes, and snippets.

@tos-kamiya
Created November 19, 2023 16:14
Show Gist options
  • Save tos-kamiya/ee8499f35cd0b4e0d2f830720a194cbc to your computer and use it in GitHub Desktop.
Save tos-kamiya/ee8499f35cd0b4e0d2f830720a194cbc to your computer and use it in GitHub Desktop.
A tool for aggregating file extensions and languages in a directory, compatible with the tokei project (https://github.com/XAMPPRocky/tokei).
# The languages.json is obtained from the tokei project
# Source: https://github.com/XAMPPRocky/tokei/blob/c8e4d0703252c87b1df45382b365c6bb00769dbe/languages.json
from typing import Dict, Counter as CounterType
from collections import Counter
import json
import os
import sys
def load_ext_to_lang_name() -> Dict[str, str]:
"""
Load a dictionary mapping file extensions to language names.
:return: Dictionary with file extensions as keys and language names as values.
"""
# Retrieve the configuration file
script_dir = os.path.dirname(os.path.abspath(__file__))
languages_json_path = os.path.join(script_dir, "languages.json")
with open(languages_json_path, "r") as inp:
lang_data = json.load(inp)
# Parse the configuration file to build a dictionary from extension to language name
tbl = lang_data["languages"]
ext_to_name = dict()
for lang_id, info in tbl.items():
name = info.get("name", lang_id)
extensions = info.get("extensions")
if extensions:
for ext in extensions:
if not ext.startswith("."):
ext = "." + ext
ext_to_name[ext] = name
return ext_to_name
def count_file_extensions(directory: str) -> CounterType[str]:
"""
Count file extensions in a directory.
:param directory: Path of the directory to search.
:return: A Counter object counting files per extension.
"""
counts = Counter()
for _, _, files in os.walk(directory):
for file in files:
ext = os.path.splitext(file)[1]
counts[ext] += 1
return counts
__doc__ = """
lang_ext_counter.py is a tool for aggregating file extensions and languages in a directory, compatible with the tokei project (https://github.com/XAMPPRocky/tokei).
It analyzes all files in a specified directory, counts the occurrences of each file extension,
and maps them to their corresponding programming languages using data from tokei's languages.json.
"""
def main() -> None:
directory = sys.argv[1] if len(sys.argv) > 1 else os.curdir
ext_to_lang_name = load_ext_to_lang_name()
counts = count_file_extensions(directory)
ext_name_count_list = [(ext, ext_to_lang_name.get(ext) or "-", count) for ext, count in counts.items()]
print("extension\ttype\tcount")
for ext, name, count in sorted(ext_name_count_list):
print(f"{ext}\t{name}\t{count}")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment