Created
June 17, 2012 20:17
-
-
Save indygreg/2945604 to your computer and use it in GitHub Desktop.
Find oldest lines in git repository
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl | |
# This script parses Git blame's "porcelain" output format and | |
# ascertains the oldest lines of code seen. | |
# | |
# If you want to perform a custom report, just define your own callback | |
# function and invoke parse_porcelain() with it. | |
# | |
# The expected input format is slightly modified from raw `git blame | |
# -p`. Here is an example script for producing input: | |
# | |
# for f in `git ls-tree -r --name-only HEAD`; do \ | |
# echo "BEGIN_RECORD $f"; \ | |
# git blame -l -t -M -C -n -w -p $f; \ | |
# echo "END_RECORD $f"; \ | |
# done | |
use strict; | |
use warnings FATAL => "all"; | |
use POSIX qw(strftime); | |
our @STATES = qw(global header_first header_metadata); | |
our $RE_BEGIN_RECORD = qr/^BEGIN_RECORD\s(.*)$/msx; | |
our $RE_END_RECORD = qr/^END_RECORD\s(.*)$/msx; | |
our $RE_LINE_HEADER = qr/ | |
^ | |
([a-z0-9]{40}) # SHA | |
\s(\d+) # Original line number | |
\s(\d+) # Current line number | |
(?:\s(\d+))? # Number of lines in group (optional) | |
$/msx; | |
our $RE_HEADER_METADATA = qr/^([a-z-]+)\s(.*)$/msx; | |
our $RE_LINE_DATA = qr/^\t(.*)$/msx; | |
# Parses Git blame's porcelain output. | |
# Calls the supplied $onBlock callback function when a full block of | |
# code has been parsed. The function receives a hashref describing the | |
# block. | |
sub parse_porcelain { | |
my ($fh, $onBlock) = @_; | |
my $state = "global"; | |
my $metadata = {}; | |
my @lines; | |
my ($commit, $original_line, $current_line); | |
my $current_file; | |
my $callOnBlock = sub { | |
my $data = {}; | |
$data->{'filename'} = $current_file; | |
$data->{'lines'} = \@lines; | |
$data->{'metadata'} = $metadata; | |
$data->{'commit'} = $commit; | |
&$onBlock($data); | |
@lines = (); | |
}; | |
while (my $line = <$fh>) { | |
chomp $line; | |
if ($line =~ $RE_BEGIN_RECORD) { | |
$state eq "global" or die "Parser error. Unexpected BEGIN_RECORD."; | |
$current_file = $1; | |
$state = "header_first"; | |
next; | |
} | |
elsif ($line =~ $RE_END_RECORD) { | |
$1 eq $current_file or die "Parser error. END_RECORD mismatch!"; | |
if ($onBlock and scalar(@lines)) { | |
&$callOnBlock(); | |
} | |
$state = "global"; | |
next; | |
} | |
if ($state eq "header_first") { | |
$line =~ $RE_LINE_HEADER or die "Invalid initial header line! $line"; | |
my ($new_commit, $new_original_line, $new_current_line, $block_count); | |
($new_commit, $new_original_line, $new_current_line, $block_count) = ($1, $2, $3, $4); | |
if ($block_count and $onBlock and scalar(@lines)) { | |
&$callOnBlock(); | |
} | |
$commit = $new_commit; | |
$original_line = $new_original_line; | |
$current_line = $new_current_line; | |
$state = "header_metadata"; | |
next; | |
} | |
if ($state eq "header_metadata") { | |
# Lines beginning with a tab denote line content. Subsequent line(s) | |
# will be metadata for that line. | |
if ($line =~ $RE_LINE_DATA) { | |
my $content = $1; | |
push @lines, [$content, $original_line, $current_line]; | |
$state = "header_first"; | |
next; | |
} | |
next if $line eq "boundary"; | |
$line =~ $RE_HEADER_METADATA or die "Could not parse header metadata."; | |
my ($k, $v) = ($1, $2); | |
$metadata->{$k} = $v; | |
next; | |
} | |
die "Unknown state!"; | |
} | |
} | |
# onBlock callback that collects oldest commit times for blocks. | |
my $old_lines = {}; | |
sub collect_times { | |
my ($data) = @_; | |
# We filter non-relevant lines. | |
my $have_content = 0; | |
foreach my $line (@{$data->{'lines'}}) { | |
my $s = $line->[0]; | |
# Skip empty and whitespace. | |
next if $s =~ m/^\s*$/; | |
# Skip things looking like comments. | |
next if $s =~ m/^\s*(#|\/\/|\/\*|\*\/)/; | |
if ($s =~ m/[a-z0-9]/) { | |
$have_content = 1; | |
last; | |
} | |
} | |
if (!$have_content) { | |
return; | |
} | |
my $time = $data->{'metadata'}->{'committer-time'}; | |
my $metadata = {}; | |
$metadata->{'commit'} = $data->{'commit'}; | |
$metadata->{'author'} = $data->{'metadata'}->{'author'}; | |
$metadata->{'filename'} = $data->{'filename'}; | |
$metadata->{'lines'} = []; | |
foreach my $line (@{$data->{'lines'}}) { | |
push @{$metadata->{'lines'}}, $line->[2]; | |
} | |
push @{$old_lines->{$time}}, $metadata; | |
} | |
sub print_oldest_blocks { | |
my ($times) = @_; | |
foreach my $time (sort { $a <=> $b } keys %$times) { | |
my $blocks = $times->{$time}; | |
my $date = strftime("%Y-%m-%d %H:%M:%S", gmtime($time)); | |
print "Time: $time ($date)\n"; | |
foreach my $data (@$blocks) { | |
print " Commit: " . $data->{'commit'} . "\n"; | |
print " Author: " . $data->{'author'} . "\n"; | |
print " Filename: " . $data->{'filename'} . "\n"; | |
print " Lines: " . join(', ', @{$data->{'lines'}}) . "\n"; | |
} | |
} | |
} | |
parse_porcelain(*STDIN, \&collect_times); | |
print_oldest_blocks($old_lines); |
Just what I've been looking for! :D Here is my wrapper with a bit more informative output:
#!/bin/sh
num_files=`git ls-tree -r --name-only HEAD | wc -l`
i=0
for f in `git ls-tree -r --name-only HEAD`; do \
i=$((i+1))
printf "%d/%d %s\r" $i $num_files $f > /dev/tty
echo "BEGIN_RECORD $f"; \
git blame -l -t -M -C -n -w -p $f; \
echo "END_RECORD $f"; \
done | perl ~/bin/git-find-oldest-lines.pl
also added a --no-pager
to git --no-pager blame -l -t -M -C -n -w -p $f;
cant u use git --no-pager blame -ltMCnwp $f
?
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Usage
Put this in some other file, like
find-oldest-line.sh