Skip to content

Instantly share code, notes, and snippets.

@paulresdat
Created March 29, 2024 23:07
Show Gist options
  • Save paulresdat/c1c7ab8662965f83fcbbfe39874878ad to your computer and use it in GitHub Desktop.
Save paulresdat/c1c7ab8662965f83fcbbfe39874878ad to your computer and use it in GitHub Desktop.
An easy line count that has the ability to quickly add languages
import argparse
from enum import Enum
from os.path import abspath
from glob import glob
from io import TextIOWrapper
import pathlib
from typing import List, Pattern, AnyStr
import inspect
import re
from abc import ABCMeta, abstractmethod
# python3 line-count.py -h to get the list of commands
# Example use, default c#
# python3 line-count.py -d ./VSIProject/VirtualSwitchIndication
# Exclude some assemblies to just get the VSI project and related nugets
# specific to the VSI project (including VsiSignalR and VSICommon).
# -i added to include bracket characters on their own line: counts single "{", "}", "(", and ")" lines.
# python3 line-count.py -d ./VSIProject/VirtualSwitchIndication --lang c# --exclude AKRRCommon,AKRRItcm,AKRRAmqp -i
# for excluding other directories before counting you can always do the following, only shows the directories, doesn't process
# python3 line-count.py -d ./VSIProject/LINNTestEngine --lang python --show-dir
# then add the exclusions
# python3 line-count.py -d ./VSIProject/LINNTestEngine --lang python --exclude "site-packages,node_modules"
def main():
args = parse_arguments()
langs = get_langs([args.lang])
directory_path = abspath(args.directory)
directory = pathlib.Path(directory_path)
include_all = args.include_all_lines
exclude = []
if args.exclude is not None:
data = str(args.exclude)
exclude.extend([x.strip() for x in data.split(",")])
all_counts = {}
for lang in langs:
print("\nProcessing " + str(lang.lang_type()))
files = directory.rglob('*.' + lang.glob_file)
lang_exclusion = lang.excluded_directories()
lang_exclusion.extend(exclude)
file_match = []
for f in files:
if matches_inclusion(f, lang_exclusion):
file_match.append(f)
if args.show_dir:
match_dir = []
for f in file_match:
tmp = str(f).split("/")
match_dir.append("/".join(tmp[0:-1]))
match_dir = list(set(match_dir))
print("\n".join(match_dir))
# for debugging so we're skipping the processing
# only show directories that have files we're searching for
continue
fcount = 0
count = 0
for f in file_match:
fcount += 1
print("Files: " + str(fcount), end="\r")
count += lang.line_count(open(f, "r"), include_all)
all_counts[lang.glob_file] = count
if not args.show_dir:
print("\n")
print("Total Counts")
for c in all_counts:
print("Lines in ." + c + " files: " + str(all_counts[c]))
def matches_inclusion(file: str, exclude: List[str]) -> bool:
for e in exclude:
if e in str(file):
return False
return True
def parse_arguments():
parser = argparse.ArgumentParser(description="Line Counter")
parser.add_argument("--directory", "-d", action="store", help="Directory to point to")
parser.add_argument("--lang", "-l", action="store", type=CodeTypes, default=CodeTypes.csharp,
help="Parse the lines of a specific language: default c#")
parser.add_argument("--exclude", "-e", action="store", help="comma separated list of assembly names to exclude")
parser.add_argument("--include-all-lines", "-i", action="store_true", default=False,
help="include all lines except empty newlines")
parser.add_argument("--show-dir", "-sd", action="store_true", default=False,
help="Show the current list of directories that the files will be counted from")
return parser.parse_args()
class CodeTypes(str, Enum):
csharp = 'c#'
javascript = 'js'
typescript = 'ts'
python = 'python'
class ILanguage(metaclass=ABCMeta):
@classmethod
def __subclasshook__(cls, subclass: type) -> bool:
return (
hasattr(subclass, 'line_count') and callable(subclass.line_count)
or NotImplemented
)
@abstractmethod
def line_count(self, file: TextIOWrapper, include_all: bool = False) -> int:
raise NotImplementedError
@abstractmethod
def excluded_directories(self) -> List[str]:
raise NotImplementedError
@abstractmethod
def lang_type(self) -> 'CodeTypes':
raise NotImplementedError
@abstractmethod
def glob_file(self) -> str:
raise NotImplementedError
class Language(ILanguage):
def __init__(self):
self.glob_file = None
self._excluded_dir = []
self._lines_ignore = []
self._ignore_regexes = []
self._type: CodeTypes = None
def initialize(self):
if self.glob_file is None:
raise RuntimeError("Language does not have glob file variable set which is required to know what kind of files it should be searching")
self._ignore_regexes: List[Pattern[AnyStr@compile]] = []
for r in self._lines_ignore:
self._ignore_regexes.append(re.compile(r))
def __matches_exclusion(self, line: str):
for r in self._ignore_regexes:
if r.match(line):
return True
return False
def line_count(self, file: TextIOWrapper, include_all: bool = False) -> int:
count = 0
lines = file.readlines()
lines = [x.strip() for x in lines if x.strip() != ""]
for line in lines:
if not include_all:
if not self.__matches_exclusion(line):
count += 1
else:
count += 1
return count
def excluded_directories(self) -> List[str]:
return self._excluded_dir
def lang_type(self) -> 'CodeTypes':
return self._type
def glob_file(self) -> str:
return self.glob_file
def get_langs(filter: List[CodeTypes] = None) -> List[ILanguage]:
subclasses: List[ILanguage] = []
g = globals()
for n in g:
obj = g[n]
if inspect.isclass(obj) and (obj is not Language) and (Language in inspect.getmro(obj)):
cl: ILanguage = obj()
if filter is None:
subclasses.append(cl)
elif cl.lang_type() in filter:
subclasses.append(cl)
return subclasses
class JavaScript(Language):
def __init__(self):
super().__init__()
self._type = CodeTypes.javascript
self.glob_file = 'js'
self._excluded_dir = ['/bin/']
self._lines_ignore = ['^({|}|\)|\()*;?$']
self.initialize()
class Csharp(Language):
def __init__(self):
super().__init__()
self._type = CodeTypes.csharp
self.glob_file = 'cs'
self._excluded_dir = ['/obj/', '/bin/']
self._lines_ignore = ['^({|}|\)|\()*;?$']
self.initialize()
class Python(Language):
def __init__(self):
super().__init__()
self._type = CodeTypes.python
self.glob_file = 'py'
self._excluded_dir = []
self._lines_ignore = []
self.initialize()
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment