Skip to content

Instantly share code, notes, and snippets.

@jsonzilla
Last active May 3, 2023 13:07
Show Gist options
  • Save jsonzilla/85086800ddc9c77c7b816092e4005742 to your computer and use it in GitHub Desktop.
Save jsonzilla/85086800ddc9c77c7b816092e4005742 to your computer and use it in GitHub Desktop.
Simple LOC

Simple LOC

This script counts the lines of code in a C++ codebase. It reads files with the extensions cpp, h, hpp, cmake and CMakeList.txt and ignores blank lines. It also has the ability to ignore certain folders by matching their names to a pattern.

To use this script, you need to have Python 3.7+ installed on your system. You can run the script from the command line by navigating to the directory where the script is located and running the following command:

python script.py path/to/codebase

Where path/to/codebase is the path to the codebase you want to count lines of code for.

By default, the script will not exclude any folders. If you want to exclude certain folders from the line count, you can edit the exclude_folders variable in the script and add the names of the folders you want to exclude.

The script will output the total number of lines of code in a readable format, displaying the number of lines in millions and hundreds.

If there is an error reading a file as utf-8, the script will try to read it again using the ansi encoding.

import os
import sys
import fnmatch
import re
def remove_comments(code, file):
if file.endswith((".cpp", ".h", ".hpp")):
code = re.sub(r"//.*", "", code) # remove single-line comments
code = re.sub(r"/\*.*?\*/", "", code, flags=re.DOTALL) # remove multi-line comments
elif file.endswith(".cmake") or file == "CMakeList.txt":
code = re.sub(r"^#.*", "", code, flags=re.MULTILINE) # remove lines starting with #
return code
def count_lines_in_file(file_path):
try:
with open(file_path, "r", encoding="utf-8") as f:
code = f.read()
code = remove_comments(code, file_path)
return len([line for line in code.split("\n") if line.strip()])
except UnicodeDecodeError:
with open(file_path, "r", encoding="ansi") as f:
code = f.read()
code = remove_comments(code, file_path)
return len([line for line in code.split("\n") if line.strip()])
def count_lines(path, exclude_folders):
total_lines = 0
for root, dirs, files in os.walk(path):
dirs[:] = [d for d in dirs if not any(fnmatch.fnmatch(d, pattern) for pattern in exclude_folders)]
for file in files:
if file.endswith((".cpp", ".h", ".hpp", ".cmake")) or file == "CMakeList.txt":
total_lines += count_lines_in_file(os.path.join(root, file))
return total_lines
def format_output(total_lines):
millions = total_lines // 1000000
hundreds = (total_lines % 1000000) // 100
if millions > 0:
return f"{millions} million {hundreds} hundred lines"
else:
return f"{hundreds} hundred lines"
if __name__ == "__main__":
path = sys.argv[1]
exclude_folders = ["Components", "folder2"] # add the folders you want to exclude here
total_lines = count_lines(path, exclude_folders)
print(f"Total lines of code: {format_output(total_lines)}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment