Skip to content

Instantly share code, notes, and snippets.

@laalaguer
Last active July 15, 2020 06:36
Show Gist options
  • Save laalaguer/c001f6cbb82395399b23a15898046ca6 to your computer and use it in GitHub Desktop.
Save laalaguer/c001f6cbb82395399b23a15898046ca6 to your computer and use it in GitHub Desktop.
count the code, comments, empty lines in the project
# Python 3 Script
# Count the "real" code lines (exclude empty lines, and comments) in a project.
# Languages supported: Python, JavaScript, C, C++, Java, C#, etc....
# Analyse:
# A source code file contains code, and comments.
# Single line comments are easy. Just // or #
# Multi-line comments are begin usually with /*, and ends with */, this is
# accomplished with a stack, maybe. /* // */ counts as a single block of comments.
# In Python is begin with ''' or """, then ends with three quotes too.
# Good news is: the total lines = code + comments + empty lines.
# So if we can count the empty lines AND comments,
# Then we can calculate the code lines.
import os
from typing import List, Tuple
C_FAMILY = ['js', 'ts', 'mjs', 'c', 'cpp', 'java', 'cs']
PYTHON_FAMILY = ['py']
EXTENSIONS = [] + C_FAMILY + PYTHON_FAMILY
def _lstrip(this_line: str) -> str:
''' Strip the leading whitespaces off the line '''
return this_line.lstrip()
def _rstrip(this_line: str) -> str:
''' Strip the tailing whitespaces off the line '''
return this_line.rstrip()
def _strip(this_line: str) -> str:
''' Strip on the head and tail off the line '''
return _lstrip(_rstrip(this_line))
def _get_ext(path_name: str):
path_name = _strip(path_name)
if not ('.' in path_name):
raise ValueError('path does not contain ., no extension?')
ext = path_name.split('.')[-1]
return ext
def _is_empty_line(ext: str, this_line: str) -> bool:
''' Judge if this line is empty or only with white spaces.'''
this_line = _strip(this_line)
if this_line == '':
return True
return False
def _is_begin_multi_comments(ext: str, this_line: str) -> bool:
''' Judge if this line is the begin of multi-line comments '''
this_line = _strip(this_line)
if ext in PYTHON_FAMILY:
if this_line.startswith("'''") or this_line.startswith('"""'):
return True
return False
if ext in C_FAMILY:
if this_line.startswith('/*'):
return True
return False
raise ValueError('file with extension: ' + ext + ' not supported!')
def _is_end_multi_comments(ext: str, this_line: str) -> bool:
''' Judge if this line is the end of multi-line comments'''
this_line = _strip(this_line)
if ext in PYTHON_FAMILY:
if this_line.endswith("'''") or this_line.endswith('"""'):
return True
return False
if ext in C_FAMILY:
if this_line.endswith('*/'):
return True
return False
raise ValueError('file with extension: ' + ext + ' not supported!')
def _is_single_comment_line(ext: str, this_line: str) -> bool:
''' Judge if this line is a pure single line of comment '''
this_line = _strip(this_line)
if ext in PYTHON_FAMILY:
if this_line.startswith('#'):
return True
if _is_begin_multi_comments(ext, this_line) and _is_end_multi_comments(ext, this_line):
return True
return False
if ext in C_FAMILY:
if this_line.startswith('//'):
return True
if _is_begin_multi_comments(ext, this_line) and _is_end_multi_comments(ext, this_line):
return True
return False
raise ValueError('file with extension: ' + ext + ' not supported!')
def _read_a_file(abs_path: str) -> List[str]:
''' Read a file, get its lines of string '''
with open(abs_path, 'r') as f:
lines = []
lines = f.readlines()
return lines
def detect_file(ext: str, abs_path: str) -> Tuple[int, int, int, int]:
''' Read a file, get its lines of code, comments and empty lines '''
if not os.path.isfile(abs_path):
raise ValueError('path must point to a file...')
lines = _read_a_file(abs_path)
total_lines = len(lines)
code_lines = 0
comment_lines = 0
empty_lines = 0
in_comments_block_flag = False
for idx, line in enumerate(lines):
if not in_comments_block_flag:
if _is_empty_line(ext, line):
empty_lines += 1
continue
if _is_single_comment_line(ext, line):
comment_lines += 1
continue
if _is_begin_multi_comments(ext, line):
in_comments_block_flag = True
comment_lines += 1
continue
if _is_end_multi_comments(ext, line):
raise ValueError('not in comments block but see a end of block sign. ' + abs_path + ' :' + idx )
else:
comment_lines += 1
if _is_end_multi_comments(ext, line):
in_comments_block_flag = False
code_lines = total_lines - empty_lines - comment_lines
return total_lines, code_lines, comment_lines, empty_lines
def summarize(abs_dir_path: str, exclude_abs_folders: List, include_extensions: List, debug: bool = False) -> Tuple[int, int, int, int]:
'''
Summarize the absolute_directory_path folder,
but exclude the chosen immediate child folders.
Include the extensions.
'''
total_lines = 0
code_lines = 0
comment_lines = 0
empty_lines = 0
if not os.path.isdir(abs_dir_path):
raise ValueError('abs_dir_path should be a dir.')
items = os.listdir(abs_dir_path)
for item in items:
item_abs_path = os.path.join(abs_dir_path, item)
if os.path.isdir(item_abs_path):
if not item in exclude_abs_folders:
_total, _code, _comment, _empty = summarize(item_abs_path, exclude_abs_folders, include_extensions, debug)
if debug:
print(_total, _code, _comment, _empty, item_abs_path)
total_lines += _total
code_lines += _code
comment_lines += _comment
empty_lines += _empty
else:
if debug:
print(item_abs_path, 'ignored.')
elif os.path.isfile(item_abs_path):
ext = _get_ext(item_abs_path)
ext = ext.lower()
if ext in include_extensions:
_total, _code, _comment, _empty = detect_file(ext, item_abs_path)
if debug:
print(_total, _code, _comment, _empty, item_abs_path)
total_lines += _total
code_lines += _code
comment_lines += _comment
empty_lines += _empty
else:
raise ValueError(item_abs_path + ' is not a file or dir.')
return total_lines, code_lines, comment_lines, empty_lines
def pretty_print(abs_dir_path: str, exclude_abs_folders: List, include_extensions: List, debug: bool = False):
_total, _code, _comment, _empty = summarize(abs_dir_path, exclude_abs_folders, include_extensions, debug)
print('Summary:', abs_dir_path)
print('-' * (len('Summary: ') + len(abs_dir_path)))
print('Total\t', _total)
print('Code\t', _code)
print('Comment\t', _comment)
print('Empty\t', _empty)
print('-' * 16)
print('Code Percentage:\t', '{:.2f}%'.format(_code/_total * 100))
print('Comment Percentage:\t', '{:.2f}%'.format(_comment/_total * 100))
print()
if __name__ == "__main__":
pretty_print('/Users/username/github/thor-devkit.js', ['node_modules', 'dist', '.git'], ['ts', 'js', 'mjs'], False)
pretty_print('/Users/username/github/thor-devkit.netcore', ['.git'], ['cs'], False)
pretty_print('/Users/username/github/thor-devkit.py', ['.pytest_cache', '__pycache__', '.vscode', 'build', 'dist', '.git'], ['py'], False)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment