Skip to content

Instantly share code, notes, and snippets.

@dotSlashLu
Created April 19, 2024 03:54
Show Gist options
  • Save dotSlashLu/be1097c9152ee61cabd8bb5a67b80af5 to your computer and use it in GitHub Desktop.
Save dotSlashLu/be1097c9152ee61cabd8bb5a67b80af5 to your computer and use it in GitHub Desktop.
code tools
import subprocess
import os
import re
import json
import logging
import argparse
from datetime import datetime
def get_author_stat(path: str, since: str, until: str) -> str:
current_dir = os.getcwd()
os.chdir(path)
command = f'git log --since="{since}"'
if until:
command += f' --until="{until}"'
command += r""" --format='author: %ae' --numstat -- . ':!vendor'\
| tr '[A-Z]' '[a-z]' \
| grep -v '^$' \
| grep -v '^-' \
| awk '
{
if ($1 == "author:") {
author = $2;
commits[author]++;
} else {
insertions[author] += $1;
deletions[author] += $2;
total[author] += $1 + $2;
# if this is the first time seeing this file for this
# author, increment their file count
author_file = author ":" $3;
if (!(author_file in seen)) {
seen[author_file] = 1;
files[author]++;
}
}
}
END {
# Print a header
printf("%-30s\t%-10s\t%-10s\t%-10s\t%-10s\t%-10s\n",
"Email", "Commits", "Files",
"Insertions", "Deletions", "Total Lines");
printf("%-30s\t%-10s\t%-10s\t%-10s\t%-10s\t%-10s\n",
"-----", "-------", "-----",
"----------", "---------", "-----------");
# Print the stats for each user, sorted by total lines
n = asorti(total, sorted_emails, "@val_num_desc");
for (i = 1; i <= n; i++) {
email = sorted_emails[i];
printf("%-30s\t%-10s\t%-10s\t%-10s\t%-10s\t%-10s\n",
email, commits[email], files[email],
insertions[email], deletions[email], total[email]);
}
}
'"""
logging.debug(f"Running command: {command}")
# Running the command
result = subprocess.run(command, shell=True, text=True, capture_output=True)
os.chdir(current_dir)
# Checking if the command was executed successfully
if result.returncode == 0:
return result.stdout
else:
logging.error("Error in command execution:", result.stderr)
raise(Exception(result.stderr))
def process_output(dst, output) -> None:
# Email Commits Files Insertions Deletions Total Lines
# ----- ------- ----- ---------- --------- -----------
# xx@ss.ss 5 1454 276236 92913 369149
for i, line in enumerate(output.splitlines()):
if i <= 1:
continue
email, commits, files, insertions, deletions, total_lines = re.split(r'\s+', line.strip())
if email in dst:
dst[email]['commits'] += int(commits)
dst[email]['files'] += int(files)
dst[email]['insertions'] += int(insertions)
dst[email]['deletions'] += int(deletions)
dst[email]['total_lines'] += int(total_lines)
return
stats = {
'commits': int(commits),
'files': int(commits),
'insertions': int(insertions),
'deletions': int(deletions),
'total_lines': int(total_lines),
}
dst[email] = stats
def setup_logging(level) -> None:
numeric_level = getattr(logging, level.upper(), None)
if not isinstance(numeric_level, int):
raise ValueError(f'Invalid log level: {level}')
logging.basicConfig(level=numeric_level)
def valid_date(s) -> datetime:
try:
return datetime.strptime(s, "%Y-%m-%d")
except ValueError:
msg = f"Not a valid date: '{s}'. Date format should be YYYY-MM-DD."
raise argparse.ArgumentTypeError(msg)
def parse_args():
# Setup the argument parser
parser = argparse.ArgumentParser(description="Process some integers.")
# Log level argument
parser.add_argument('--log', metavar='LEVEL', type=str, default='WARNING',
help='Set the logging level (e.g., DEBUG, INFO, WARNING, ERROR, CRITICAL)')
# From date argument with a default value
parser.add_argument('--since', dest='from_date', type=valid_date,
default=datetime(2024, 1, 1),
help='Start date YYYY-MM-DD (default: 2024-01-01)')
# To date argument without a default value
parser.add_argument('--until', dest='to_date', type=valid_date, default=None,
help='End date YYYY-MM-DD (default: None)')
# To date argument without a default value
parser.add_argument('--format', dest='format', type=str, default='json',
help='set the output format(json, table, markdown)')
# Collect all other arguments into a list
parser.add_argument('paths', nargs=argparse.REMAINDER,
help='All other arguments that are not named specifically.')
args = parser.parse_args()
setup_logging(args.log)
# Print parsed arguments to show how they are captured
logging.debug("Log Level: %s", args.log)
logging.debug("From Date: %s", args.from_date.strftime("%Y-%m-%d") if args.from_date else "None")
logging.debug("To Date: %s", args.to_date.strftime("%Y-%m-%d") if args.to_date else "None")
logging.debug("Additional arguments: %s", args.paths)
return args
if __name__ == '__main__':
args = parse_args()
paths = args.paths
since = args.from_date.strftime("%Y-%m-%d") if args.from_date else "2024-01-01"
until = args.to_date.strftime("%Y-%m-%d") if args.to_date else datetime.now().strftime("%Y-%m-%d")
dst = {}
for path in paths:
# if this folder contains .git folder
if os.path.isdir(os.path.join(path, '.git')):
logging.debug(f'{path} is a git repo')
out = get_author_stat(path, since, until)
process_output(dst, out)
continue
# else check sub folders
for d in os.listdir(path):
try:
d = os.path.join(path, d)
if not os.path.isdir(d):
logging.debug(f'{d} is not a directory')
continue
out = get_author_stat(d, since, until)
except Exception as e:
logging.error(e)
os.exit(1)
process_output(dst, out)
if args.format == '' or args.format == 'json':
print(json.dumps(dst))
exit(0)
if args.format == 'table':
print("Email\tCommits\tFiles\tInsertions\tDeletions\tTotal Lines")
for email, stats in sorted(dst.items(), key=lambda x: x[1]['commits'], reverse=True):
print(f'{email}\t' + '\t'.join(str(value) for value in stats.values()))
if args.format == 'markdown':
print("| Email | Commits | Files | Insertions | Deletions | Total Lines |")
print("| --- | --- | --- | --- | --- | --- |")
for email, stats in sorted(dst.items(), key=lambda x: x[1]['commits'], reverse=True):
print(f'| {email} |' + ' | '.join(str(value) for value in stats.values()) + ' | ')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment