Last active
May 15, 2023 08:16
-
-
Save nachouve/09da4eaac5c2fff2fff74d5f7debeba9 to your computer and use it in GitHub Desktop.
Git Log Parser - Parse and summarize the output of a `git log --name-status`` command.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Git Log Parser - Version 0.1 | |
Parse and summarize the output of a `git log --name-status`` command. | |
Extract information about commits, authors, and file modifications, | |
and then summarizing this information in tables that show the number of commits, | |
files updated, and lines modified by author and by date. | |
The code uses classes to represent commits and file modifications, | |
and functions to parse the output of the git log command and summarize the data. | |
The rich library is used to format and display the summary tables in the console. | |
This code provides a useful tool for analyzing the history of a Git repository and | |
understanding the contributions of different authors over time. | |
Classes: | |
- Commit: Represents a single commit in a Git repository. | |
- FileModification: Represents a modification to a single file in a commit. | |
- AuthorSummary: Represents a summary of commits by a single author. | |
Usage example: | |
python summarize_git_log.py <file_with_gitlog.txt> | |
MIT License | |
Copyright (c) 2023 Nacho Varela | |
Permission is hereby granted, free of charge, to any person obtaining a copy | |
of this software and associated documentation files (the "Software"), to deal | |
in the Software without restriction, including without limitation the rights | |
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
copies of the Software, and to permit persons to whom the Software is | |
furnished to do so, subject to the following conditions: | |
Author: nachouve | |
Date: 2023-05-06 | |
Repo: https://gist.github.com/nachouve/09da4eaac5c2fff2fff74d5f7debeba9/ | |
""" | |
import datetime | |
import sys | |
from rich.console import Console | |
from rich.table import Table | |
console = Console() | |
ONLY_COUNT_FILES_EXTENSIONS = ('.py', '.html', '.css') | |
console.print(f"Warning: only count files with extensions: {ONLY_COUNT_FILES_EXTENSIONS}", style="red") | |
console.print("Modify 'ONLY_COUNT_FILES_EXTENSIONS' variable to specify others") | |
class FileModification: | |
def __init__(self, filename, modification_type): | |
self.filename = filename | |
self.modification_type = modification_type | |
self.lines_modified = 0 | |
self.lines_added = 0 | |
self.lines_deleted = 0 | |
def __repr__(self): | |
return f"<FileModification filename={self.filename} modification_type={self.modification_type} lines_modified={self.lines_modified}>" | |
class Commit: | |
def __init__(self, commit_id, author, date, comment): | |
self.id = commit_id | |
self.author = author | |
self.date = date | |
self.comment = comment | |
self.added_files = [] | |
self.modified_files = [] | |
self.deleted_files = [] | |
def add_file_modification(self, filename, modification_type, lines_modified=0, lines_added=0, lines_deleted=0): | |
file_modification = FileModification(filename, modification_type) | |
file_modification.lines_modified = lines_modified | |
file_modification.lines_added = lines_added | |
file_modification.lines_deleted = lines_deleted | |
if modification_type == 'A': | |
self.added_files.append(file_modification) | |
elif modification_type == 'M': | |
self.modified_files.append(file_modification) | |
elif modification_type == 'D': | |
self.deleted_files.append(file_modification) | |
def __repr__(self): | |
return f"<Commit id={self.id} author={self.author} date={self.date} comment={self.comment}>" | |
class AuthorSummary: | |
def __init__(self, author): | |
self.author = author | |
self.commits_by_day = {} | |
self.commits_by_week = {} | |
self.commits_by_month = {} | |
def __repr__(self): | |
return f"<AuthorSummary author={self.author}>" | |
def parse_git_log(lines): | |
commits = [] | |
commit = None | |
state = "comment" # Setting the initial state to "comment" | |
start_files_modified = False | |
for line in lines: | |
line = line.strip() | |
#console.print(line, style="yellow") | |
if state == "comment": | |
if line.startswith("commit"): | |
if commit is not None: | |
commits.append(commit) | |
commit_id = line.split()[-1] | |
commit = Commit(commit_id, "", None, "") | |
state = "comment" # redirect to comment after entering state of files modified | |
elif line.startswith("Author:"): | |
commit.author = line[8:].strip() | |
elif line.startswith("Merge:"): | |
state = "merge" | |
elif line.startswith("Date:"): | |
date_str = line[6:].strip() | |
commit.date = datetime.datetime.strptime(date_str, "%a %b %d %H:%M:%S %Y %z") | |
elif line: | |
commit.comment = line | |
state = "files_modified" # redirect to files_modified state | |
start_files_modified = False | |
elif state == "merge": | |
if line.startswith("Author:"): | |
commit.author = line[8:].strip() | |
elif line.startswith("Date:"): | |
date_str = line[6:].strip() | |
commit.date = datetime.datetime.strptime(date_str, "%a %b %d %H:%M:%S %Y %z") | |
elif line: | |
commit.comment = line | |
start_files_modified = False | |
state = "comment" # redirect to comment after all files modified are parsed | |
continue | |
elif state == "files_modified": | |
if len(line)==0 and start_files_modified == False: | |
start_files_modified = True | |
continue | |
#console.print(commit, style="green") | |
if not line.startswith(('A','M','D','R','C')): | |
state = "comment" # redirect to comment after all files modified are parsed | |
continue | |
modification_type, filename = line.split()[:2] | |
modification_type = modification_type.strip() | |
filename = filename.strip() | |
lines_modified = 0 | |
lines_added = 0 | |
lines_deleted = 0 | |
if filename.endswith(ONLY_COUNT_FILES_EXTENSIONS): | |
if modification_type in ('M', 'A'): | |
lines_modified, lines_added, lines_deleted = count_lines_modified(filename, commit.id) | |
commit.add_file_modification(filename, modification_type, lines_modified, lines_added, lines_deleted) | |
if commit is not None: | |
commits.append(commit) | |
return commits | |
def count_lines_modified(filename, commit_id): | |
output = run_command(["git", "diff", f"{commit_id}~1..{commit_id}", "--numstat", "--", filename]) | |
lines_added = 0 | |
lines_deleted = 0 | |
for line in output.strip().splitlines(): | |
added, deleted, _ = line.strip().split("\t") | |
lines_added += int(added) | |
lines_deleted += int(deleted) | |
return lines_added + lines_deleted, lines_added, lines_deleted | |
def summarize_commits_by_author(commits): | |
author_summaries = {} | |
for commit in commits: | |
if commit.author not in author_summaries: | |
author_summaries[commit.author] = AuthorSummary(commit.author) | |
author_summary = author_summaries[commit.author] | |
if commit.date is not None: | |
day = commit.date.date() | |
week = day - datetime.timedelta(days=day.weekday()) | |
month = datetime.date(day.year, day.month, 1) | |
increment_count(author_summary.commits_by_day, day) | |
increment_count(author_summary.commits_by_week, week) | |
increment_count(author_summary.commits_by_month, month) | |
return author_summaries | |
def summarize_files_modified_by_commit(commits): | |
for commit in commits: | |
for file_modification in commit.added_files + commit.modified_files: | |
print( | |
f"Commit '{commit.id}': '{file_modification.filename}'" | |
f" was {file_modification.modification_type}" | |
f" with {file_modification.lines_modified} lines modified" | |
) | |
def increment_count(dictionary, key): | |
if key not in dictionary: | |
dictionary[key] = 0 | |
dictionary[key] += 1 | |
def run_command(command_args): | |
from subprocess import Popen, PIPE | |
process = Popen(command_args, stdout=PIPE, stderr=PIPE) | |
output, error = process.communicate() | |
if error: | |
raise Exception(error.decode()) | |
return output.decode().strip() | |
def main(): | |
if len(sys.argv) == 2: | |
git_log_output = open(sys.argv[1], 'r').read() | |
else: | |
print("Copy here a 'git log --name-status' output, redirect here a PIPE input or indicate a filepath when launching the script.") | |
git_log_output = sys.stdin.read() | |
commits = parse_git_log(git_log_output.splitlines()) | |
author_summaries = summarize_commits_by_author(commits) | |
for author_summary in author_summaries.values(): | |
table = Table(show_header=True, header_style="bold magenta") | |
table.add_column("Date", style="cyan") | |
table.add_column("# Commits", justify="right", style="green") | |
table.add_column("# Files Updated", justify="right", style="magenta") | |
table.add_column("# Lines Modified", justify="right", style="cyan") | |
table.add_column("# Lines Added", justify="right", style="blue") | |
table.add_column("# Lines Deleted", justify="right", style="red") | |
for date, num_commits in author_summary.commits_by_day.items(): | |
num_updated_files, num_modified_lines, num_added_lines, num_deleted_lines = count_author_metrics(date, author_summary, commits) | |
table.add_row( | |
str(date.strftime('%Y/%m/%d')), | |
str(num_commits), | |
str(num_updated_files), | |
str(num_modified_lines), | |
str(num_added_lines), | |
str(num_deleted_lines) | |
) | |
console.print(f"\n[bold underline magenta]Author: {author_summary.author}[/bold underline magenta]") | |
console.print(table) | |
def count_author_metrics(date, author_summary, commits): | |
num_updated_files = 0 | |
num_modified_lines = 0 | |
num_added_lines = 0 | |
num_deleted_lines = 0 | |
for commit in commits: | |
if commit.author == author_summary.author and commit.date.date() == date: | |
for file_modification in commit.added_files + commit.modified_files + commit.deleted_files: | |
if file_modification.filename.endswith(ONLY_COUNT_FILES_EXTENSIONS): | |
num_updated_files += 1 | |
num_modified_lines += file_modification.lines_modified | |
num_added_lines += file_modification.lines_added | |
num_deleted_lines += file_modification.lines_deleted | |
return num_updated_files, num_modified_lines, num_added_lines, num_deleted_lines | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment