Skip to content

Instantly share code, notes, and snippets.

@iklobato
Created March 21, 2024 19:33
Show Gist options
  • Save iklobato/8cd844b8fd5f9d680bb25cc34c83434b to your computer and use it in GitHub Desktop.
Save iklobato/8cd844b8fd5f9d680bb25cc34c83434b to your computer and use it in GitHub Desktop.
This script generates a time tracking report by parsing Git commit messages for the current user over a specified date range. It extracts JIRA card numbers and associated commit messages, groups them by date, and outputs a formatted table showing the work done each day.
#
# pip install tabulate==0.9.0
#
#
import argparse
import datetime
import logging
import os
import re
from dataclasses import dataclass
import subprocess
from tabulate import tabulate
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s [%(levelname)s] %(message)s",
handlers=[
logging.StreamHandler()
]
)
@dataclass
class GitLogParser:
script_dir: str = os.path.dirname(__file__)
jira_pattern: str = r'\b[A-Z]+-\d+\b'
git_username: str = None
last_days: int = 30
def __post_init__(self):
self.git_username = self.get_git_user()
@staticmethod
def get_git_user():
command = 'git config user.name'
try:
result = subprocess.run(command, capture_output=True, shell=True, text=True)
return result.stdout.strip()
except Exception as e:
logging.error(f"Error executing Git command: {e}")
return None
@staticmethod
def get_git_log_for_date(date):
command = (
f'git log --all --format="%h|%an|%cd|%s" --date="format-local:%Y-%m-%d %H:%M:%S" '
f'--after="{date} 00:00" --before="{date} 23:59"'
)
try:
result = subprocess.run(command, capture_output=True, shell=True, text=True)
return result.stdout.split('\n')
except Exception as e:
logging.critical(f"Error executing Git command: {e}")
raise e
@staticmethod
def get_business_days(start_date, end_date):
business_days = []
current_date = start_date
while current_date <= end_date:
if current_date.weekday() not in (5, 6): # 5 and 6 are Saturday and Sunday
business_days.append(current_date)
current_date += datetime.timedelta(days=1)
return business_days
def parse_git_log(self, log):
messages = {}
for line in log:
if not line:
continue
commit_hash, commit_owner, commit_date, commit_message = line.split('|')
if self.git_username.lower() in commit_owner.lower():
jira_cards = re.findall(self.jira_pattern, commit_message)
for card in jira_cards:
messages.setdefault(card, []).append(commit_message.replace(card, '').strip())
return {card: ' / '.join(message) for card, message in messages.items()}
def grouped_messages(self, start_date, today):
delta = datetime.timedelta(days=1)
grouped_messages = {}
current_date = start_date
while current_date <= today:
log_for_date = self.get_git_log_for_date(current_date)
parsed_messages = self.parse_git_log(log_for_date)
for card, message in parsed_messages.items():
if current_date not in grouped_messages:
grouped_messages[current_date] = []
grouped_messages[current_date].append(f"{card} {message}")
current_date += delta
return grouped_messages
def run(self):
today = datetime.date.today()
start_date = today - datetime.timedelta(days=self.last_days)
business_days = self.get_business_days(start_date, today)
logging.info(f"Getting logs from {start_date} to {today} ({len(business_days)} business days)")
grouped_messages = self.grouped_messages(start_date, today)
table_data = []
for date, messages in grouped_messages.items():
table_data.append([date.strftime('%Y-%m-%d'), date.strftime('%A'), ' / '.join(messages)])
headers = ["Date", "Day of Week", "Time Track Message"]
missing_days = [date for date in business_days if date not in grouped_messages]
table_data.extend([[date.strftime('%Y-%m-%d'), date.strftime('%A'), ''] for date in missing_days])
table_data.sort(key=lambda x: x[0])
print(tabulate(table_data, headers=headers, tablefmt="rounded_grid"))
def parse_date(date_string):
return datetime.datetime.strptime(date_string, '%Y-%m-%d').date()
def parse_arguments():
_parser = argparse.ArgumentParser()
_parser.add_argument('--project-dir', required=True, help='Directory to change to before executing the script')
_parser.add_argument('--last-days', required=False, default=30, type=int,
help='Number of business days to go back in history')
_parser.add_argument('-s', '--start', type=parse_date, help='Set a start date (YYYY-MM-DD)')
_args = _parser.parse_args()
return _args
if __name__ == "__main__":
args = parse_arguments()
os.chdir(args.project_dir)
if args.start:
parser = GitLogParser(last_days=(datetime.date.today() - args.start).days)
else:
parser = GitLogParser(last_days=args.last_days)
parser.run()
@iklobato
Copy link
Author

Here are some usage examples for the Git log parsing script:

  1. Parse Git log for a specific repository directory:
python git_log_parser.py --project-dir /path/to/repo

This will parse the Git log for the repository located at /path/to/repo and generate a time tracking report for the last 30 days.

  1. Parse Git log for a specific date range:
python git_log_parser.py --project-dir /path/to/repo --start 2023-01-01

This will parse the Git log starting from January 1, 2023 up until today for the repository at /path/to/repo.

  1. Parse Git log for a custom number of days:
python git_log_parser.py --project-dir /path/to/repo --last-days 60 

This will parse the Git log for the last 60 days instead of the default 30 days.

  1. Pipe the script output to a file:
python git_log_parser.py --project-dir /path/to/repo > time_report.txt

This will save the generated time tracking report to a file named time_report.txt.

  1. Use the script in a Git alias:
[alias]
    timereport = !python /path/to/git_log_parser.py --project-dir . 

Adding this to your .gitconfig file allows running the script directly from Git using git timereport.

The key points are:

  • Always specify the repository directory with --project-dir
  • Optionally set a custom start date with --start in 'YYYY-MM-DD' format
  • Optionally set a custom number of days to look back with --last-days
  • The script output can be redirected to a file
  • The script can be set up as a Git alias for convenience

This provides flexibility in generating time tracking reports for different repositories and date ranges. The script encourages writing descriptive commit messages prefixed with JIRA ticket numbers to enable useful reporting.

Citations:
[1] https://www.nushell.sh/cookbook/parsing_git_log.html
[2] https://github.com/gaborantal/git-log-parser
[3] https://stackoverflow.com/questions/6327276/parsing-git-log-output-preferably-as-xml
[4] https://github.com/sbaltes/git-log-parser
[5] https://git-scm.com/docs/git-log
[6] https://git-scm.com/book/en/v2/Git-Basics-Viewing-the-Commit-History
[7] https://www.npmjs.com/package/git-log-parser
[8] http://chrisparnin.github.io/articles/2013/09/parse-git-log-output-in-c/
[9] https://opensource.com/article/20/1/bash-scripts-git
[10] https://www.reddit.com/r/git/comments/19fb4ri/parse_git_log_p_into_a_gui_such_as_a_difftool/

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment