Skip to content

Instantly share code, notes, and snippets.

@nachouve
Last active May 15, 2023 08:16
Show Gist options
  • Save nachouve/09da4eaac5c2fff2fff74d5f7debeba9 to your computer and use it in GitHub Desktop.
Save nachouve/09da4eaac5c2fff2fff74d5f7debeba9 to your computer and use it in GitHub Desktop.
Git Log Parser - Parse and summarize the output of a `git log --name-status`` command.
"""
Git Log Parser - Version 0.1
Parse and summarize the output of a `git log --name-status`` command.
Extract information about commits, authors, and file modifications,
and then summarizing this information in tables that show the number of commits,
files updated, and lines modified by author and by date.
The code uses classes to represent commits and file modifications,
and functions to parse the output of the git log command and summarize the data.
The rich library is used to format and display the summary tables in the console.
This code provides a useful tool for analyzing the history of a Git repository and
understanding the contributions of different authors over time.
Classes:
- Commit: Represents a single commit in a Git repository.
- FileModification: Represents a modification to a single file in a commit.
- AuthorSummary: Represents a summary of commits by a single author.
Usage example:
python summarize_git_log.py <file_with_gitlog.txt>
MIT License
Copyright (c) 2023 Nacho Varela
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
Author: nachouve
Date: 2023-05-06
Repo: https://gist.github.com/nachouve/09da4eaac5c2fff2fff74d5f7debeba9/
"""
import datetime
import sys
from rich.console import Console
from rich.table import Table
console = Console()
ONLY_COUNT_FILES_EXTENSIONS = ('.py', '.html', '.css')
console.print(f"Warning: only count files with extensions: {ONLY_COUNT_FILES_EXTENSIONS}", style="red")
console.print("Modify 'ONLY_COUNT_FILES_EXTENSIONS' variable to specify others")
class FileModification:
def __init__(self, filename, modification_type):
self.filename = filename
self.modification_type = modification_type
self.lines_modified = 0
self.lines_added = 0
self.lines_deleted = 0
def __repr__(self):
return f"<FileModification filename={self.filename} modification_type={self.modification_type} lines_modified={self.lines_modified}>"
class Commit:
def __init__(self, commit_id, author, date, comment):
self.id = commit_id
self.author = author
self.date = date
self.comment = comment
self.added_files = []
self.modified_files = []
self.deleted_files = []
def add_file_modification(self, filename, modification_type, lines_modified=0, lines_added=0, lines_deleted=0):
file_modification = FileModification(filename, modification_type)
file_modification.lines_modified = lines_modified
file_modification.lines_added = lines_added
file_modification.lines_deleted = lines_deleted
if modification_type == 'A':
self.added_files.append(file_modification)
elif modification_type == 'M':
self.modified_files.append(file_modification)
elif modification_type == 'D':
self.deleted_files.append(file_modification)
def __repr__(self):
return f"<Commit id={self.id} author={self.author} date={self.date} comment={self.comment}>"
class AuthorSummary:
def __init__(self, author):
self.author = author
self.commits_by_day = {}
self.commits_by_week = {}
self.commits_by_month = {}
def __repr__(self):
return f"<AuthorSummary author={self.author}>"
def parse_git_log(lines):
commits = []
commit = None
state = "comment" # Setting the initial state to "comment"
start_files_modified = False
for line in lines:
line = line.strip()
#console.print(line, style="yellow")
if state == "comment":
if line.startswith("commit"):
if commit is not None:
commits.append(commit)
commit_id = line.split()[-1]
commit = Commit(commit_id, "", None, "")
state = "comment" # redirect to comment after entering state of files modified
elif line.startswith("Author:"):
commit.author = line[8:].strip()
elif line.startswith("Merge:"):
state = "merge"
elif line.startswith("Date:"):
date_str = line[6:].strip()
commit.date = datetime.datetime.strptime(date_str, "%a %b %d %H:%M:%S %Y %z")
elif line:
commit.comment = line
state = "files_modified" # redirect to files_modified state
start_files_modified = False
elif state == "merge":
if line.startswith("Author:"):
commit.author = line[8:].strip()
elif line.startswith("Date:"):
date_str = line[6:].strip()
commit.date = datetime.datetime.strptime(date_str, "%a %b %d %H:%M:%S %Y %z")
elif line:
commit.comment = line
start_files_modified = False
state = "comment" # redirect to comment after all files modified are parsed
continue
elif state == "files_modified":
if len(line)==0 and start_files_modified == False:
start_files_modified = True
continue
#console.print(commit, style="green")
if not line.startswith(('A','M','D','R','C')):
state = "comment" # redirect to comment after all files modified are parsed
continue
modification_type, filename = line.split()[:2]
modification_type = modification_type.strip()
filename = filename.strip()
lines_modified = 0
lines_added = 0
lines_deleted = 0
if filename.endswith(ONLY_COUNT_FILES_EXTENSIONS):
if modification_type in ('M', 'A'):
lines_modified, lines_added, lines_deleted = count_lines_modified(filename, commit.id)
commit.add_file_modification(filename, modification_type, lines_modified, lines_added, lines_deleted)
if commit is not None:
commits.append(commit)
return commits
def count_lines_modified(filename, commit_id):
output = run_command(["git", "diff", f"{commit_id}~1..{commit_id}", "--numstat", "--", filename])
lines_added = 0
lines_deleted = 0
for line in output.strip().splitlines():
added, deleted, _ = line.strip().split("\t")
lines_added += int(added)
lines_deleted += int(deleted)
return lines_added + lines_deleted, lines_added, lines_deleted
def summarize_commits_by_author(commits):
author_summaries = {}
for commit in commits:
if commit.author not in author_summaries:
author_summaries[commit.author] = AuthorSummary(commit.author)
author_summary = author_summaries[commit.author]
if commit.date is not None:
day = commit.date.date()
week = day - datetime.timedelta(days=day.weekday())
month = datetime.date(day.year, day.month, 1)
increment_count(author_summary.commits_by_day, day)
increment_count(author_summary.commits_by_week, week)
increment_count(author_summary.commits_by_month, month)
return author_summaries
def summarize_files_modified_by_commit(commits):
for commit in commits:
for file_modification in commit.added_files + commit.modified_files:
print(
f"Commit '{commit.id}': '{file_modification.filename}'"
f" was {file_modification.modification_type}"
f" with {file_modification.lines_modified} lines modified"
)
def increment_count(dictionary, key):
if key not in dictionary:
dictionary[key] = 0
dictionary[key] += 1
def run_command(command_args):
from subprocess import Popen, PIPE
process = Popen(command_args, stdout=PIPE, stderr=PIPE)
output, error = process.communicate()
if error:
raise Exception(error.decode())
return output.decode().strip()
def main():
if len(sys.argv) == 2:
git_log_output = open(sys.argv[1], 'r').read()
else:
print("Copy here a 'git log --name-status' output, redirect here a PIPE input or indicate a filepath when launching the script.")
git_log_output = sys.stdin.read()
commits = parse_git_log(git_log_output.splitlines())
author_summaries = summarize_commits_by_author(commits)
for author_summary in author_summaries.values():
table = Table(show_header=True, header_style="bold magenta")
table.add_column("Date", style="cyan")
table.add_column("# Commits", justify="right", style="green")
table.add_column("# Files Updated", justify="right", style="magenta")
table.add_column("# Lines Modified", justify="right", style="cyan")
table.add_column("# Lines Added", justify="right", style="blue")
table.add_column("# Lines Deleted", justify="right", style="red")
for date, num_commits in author_summary.commits_by_day.items():
num_updated_files, num_modified_lines, num_added_lines, num_deleted_lines = count_author_metrics(date, author_summary, commits)
table.add_row(
str(date.strftime('%Y/%m/%d')),
str(num_commits),
str(num_updated_files),
str(num_modified_lines),
str(num_added_lines),
str(num_deleted_lines)
)
console.print(f"\n[bold underline magenta]Author: {author_summary.author}[/bold underline magenta]")
console.print(table)
def count_author_metrics(date, author_summary, commits):
num_updated_files = 0
num_modified_lines = 0
num_added_lines = 0
num_deleted_lines = 0
for commit in commits:
if commit.author == author_summary.author and commit.date.date() == date:
for file_modification in commit.added_files + commit.modified_files + commit.deleted_files:
if file_modification.filename.endswith(ONLY_COUNT_FILES_EXTENSIONS):
num_updated_files += 1
num_modified_lines += file_modification.lines_modified
num_added_lines += file_modification.lines_added
num_deleted_lines += file_modification.lines_deleted
return num_updated_files, num_modified_lines, num_added_lines, num_deleted_lines
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment