Last active
July 15, 2020 06:36
-
-
Save laalaguer/c001f6cbb82395399b23a15898046ca6 to your computer and use it in GitHub Desktop.
count the code, comments, empty lines in the project
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Python 3 Script | |
# Count the "real" code lines (exclude empty lines, and comments) in a project. | |
# Languages supported: Python, JavaScript, C, C++, Java, C#, etc.... | |
# Analyse: | |
# A source code file contains code, and comments. | |
# Single line comments are easy. Just // or # | |
# Multi-line comments are begin usually with /*, and ends with */, this is | |
# accomplished with a stack, maybe. /* // */ counts as a single block of comments. | |
# In Python is begin with ''' or """, then ends with three quotes too. | |
# Good news is: the total lines = code + comments + empty lines. | |
# So if we can count the empty lines AND comments, | |
# Then we can calculate the code lines. | |
import os | |
from typing import List, Tuple | |
C_FAMILY = ['js', 'ts', 'mjs', 'c', 'cpp', 'java', 'cs'] | |
PYTHON_FAMILY = ['py'] | |
EXTENSIONS = [] + C_FAMILY + PYTHON_FAMILY | |
def _lstrip(this_line: str) -> str: | |
''' Strip the leading whitespaces off the line ''' | |
return this_line.lstrip() | |
def _rstrip(this_line: str) -> str: | |
''' Strip the tailing whitespaces off the line ''' | |
return this_line.rstrip() | |
def _strip(this_line: str) -> str: | |
''' Strip on the head and tail off the line ''' | |
return _lstrip(_rstrip(this_line)) | |
def _get_ext(path_name: str): | |
path_name = _strip(path_name) | |
if not ('.' in path_name): | |
raise ValueError('path does not contain ., no extension?') | |
ext = path_name.split('.')[-1] | |
return ext | |
def _is_empty_line(ext: str, this_line: str) -> bool: | |
''' Judge if this line is empty or only with white spaces.''' | |
this_line = _strip(this_line) | |
if this_line == '': | |
return True | |
return False | |
def _is_begin_multi_comments(ext: str, this_line: str) -> bool: | |
''' Judge if this line is the begin of multi-line comments ''' | |
this_line = _strip(this_line) | |
if ext in PYTHON_FAMILY: | |
if this_line.startswith("'''") or this_line.startswith('"""'): | |
return True | |
return False | |
if ext in C_FAMILY: | |
if this_line.startswith('/*'): | |
return True | |
return False | |
raise ValueError('file with extension: ' + ext + ' not supported!') | |
def _is_end_multi_comments(ext: str, this_line: str) -> bool: | |
''' Judge if this line is the end of multi-line comments''' | |
this_line = _strip(this_line) | |
if ext in PYTHON_FAMILY: | |
if this_line.endswith("'''") or this_line.endswith('"""'): | |
return True | |
return False | |
if ext in C_FAMILY: | |
if this_line.endswith('*/'): | |
return True | |
return False | |
raise ValueError('file with extension: ' + ext + ' not supported!') | |
def _is_single_comment_line(ext: str, this_line: str) -> bool: | |
''' Judge if this line is a pure single line of comment ''' | |
this_line = _strip(this_line) | |
if ext in PYTHON_FAMILY: | |
if this_line.startswith('#'): | |
return True | |
if _is_begin_multi_comments(ext, this_line) and _is_end_multi_comments(ext, this_line): | |
return True | |
return False | |
if ext in C_FAMILY: | |
if this_line.startswith('//'): | |
return True | |
if _is_begin_multi_comments(ext, this_line) and _is_end_multi_comments(ext, this_line): | |
return True | |
return False | |
raise ValueError('file with extension: ' + ext + ' not supported!') | |
def _read_a_file(abs_path: str) -> List[str]: | |
''' Read a file, get its lines of string ''' | |
with open(abs_path, 'r') as f: | |
lines = [] | |
lines = f.readlines() | |
return lines | |
def detect_file(ext: str, abs_path: str) -> Tuple[int, int, int, int]: | |
''' Read a file, get its lines of code, comments and empty lines ''' | |
if not os.path.isfile(abs_path): | |
raise ValueError('path must point to a file...') | |
lines = _read_a_file(abs_path) | |
total_lines = len(lines) | |
code_lines = 0 | |
comment_lines = 0 | |
empty_lines = 0 | |
in_comments_block_flag = False | |
for idx, line in enumerate(lines): | |
if not in_comments_block_flag: | |
if _is_empty_line(ext, line): | |
empty_lines += 1 | |
continue | |
if _is_single_comment_line(ext, line): | |
comment_lines += 1 | |
continue | |
if _is_begin_multi_comments(ext, line): | |
in_comments_block_flag = True | |
comment_lines += 1 | |
continue | |
if _is_end_multi_comments(ext, line): | |
raise ValueError('not in comments block but see a end of block sign. ' + abs_path + ' :' + idx ) | |
else: | |
comment_lines += 1 | |
if _is_end_multi_comments(ext, line): | |
in_comments_block_flag = False | |
code_lines = total_lines - empty_lines - comment_lines | |
return total_lines, code_lines, comment_lines, empty_lines | |
def summarize(abs_dir_path: str, exclude_abs_folders: List, include_extensions: List, debug: bool = False) -> Tuple[int, int, int, int]: | |
''' | |
Summarize the absolute_directory_path folder, | |
but exclude the chosen immediate child folders. | |
Include the extensions. | |
''' | |
total_lines = 0 | |
code_lines = 0 | |
comment_lines = 0 | |
empty_lines = 0 | |
if not os.path.isdir(abs_dir_path): | |
raise ValueError('abs_dir_path should be a dir.') | |
items = os.listdir(abs_dir_path) | |
for item in items: | |
item_abs_path = os.path.join(abs_dir_path, item) | |
if os.path.isdir(item_abs_path): | |
if not item in exclude_abs_folders: | |
_total, _code, _comment, _empty = summarize(item_abs_path, exclude_abs_folders, include_extensions, debug) | |
if debug: | |
print(_total, _code, _comment, _empty, item_abs_path) | |
total_lines += _total | |
code_lines += _code | |
comment_lines += _comment | |
empty_lines += _empty | |
else: | |
if debug: | |
print(item_abs_path, 'ignored.') | |
elif os.path.isfile(item_abs_path): | |
ext = _get_ext(item_abs_path) | |
ext = ext.lower() | |
if ext in include_extensions: | |
_total, _code, _comment, _empty = detect_file(ext, item_abs_path) | |
if debug: | |
print(_total, _code, _comment, _empty, item_abs_path) | |
total_lines += _total | |
code_lines += _code | |
comment_lines += _comment | |
empty_lines += _empty | |
else: | |
raise ValueError(item_abs_path + ' is not a file or dir.') | |
return total_lines, code_lines, comment_lines, empty_lines | |
def pretty_print(abs_dir_path: str, exclude_abs_folders: List, include_extensions: List, debug: bool = False): | |
_total, _code, _comment, _empty = summarize(abs_dir_path, exclude_abs_folders, include_extensions, debug) | |
print('Summary:', abs_dir_path) | |
print('-' * (len('Summary: ') + len(abs_dir_path))) | |
print('Total\t', _total) | |
print('Code\t', _code) | |
print('Comment\t', _comment) | |
print('Empty\t', _empty) | |
print('-' * 16) | |
print('Code Percentage:\t', '{:.2f}%'.format(_code/_total * 100)) | |
print('Comment Percentage:\t', '{:.2f}%'.format(_comment/_total * 100)) | |
print() | |
if __name__ == "__main__": | |
pretty_print('/Users/username/github/thor-devkit.js', ['node_modules', 'dist', '.git'], ['ts', 'js', 'mjs'], False) | |
pretty_print('/Users/username/github/thor-devkit.netcore', ['.git'], ['cs'], False) | |
pretty_print('/Users/username/github/thor-devkit.py', ['.pytest_cache', '__pycache__', '.vscode', 'build', 'dist', '.git'], ['py'], False) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment