public
Last active

a script that collects statistics on perl code to aid in prioritizing while refactoring

  • Download Gist
_output
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
D:\Web-Dev\Greenphyl_v2\perlcodegreenphyl\apps\GreenPhyl\test>perl code_statistics.pl
 
### Collecting block statistics
 
 
### Average Block Length
18.5932553337922
 
### Average Block Size
670.863041982106
 
### Average Block Depth
2.39917412250516
 
### Top Ten Longest Blocks
File Line Lines Size Depth Dev.
================================================================================
lib/Greenphyl/Load/Family.pm 57 294 16570 1 15.81
lib/Greenphyl/Load/Uniprot.pm 103 287 11540 1 15.44
cgi-bin/update_pipeline.cgi 121 260 7608 1 13.98
lib/Greenphyl/Load/Family.pm 79 260 15030 2 13.98
cgi-bin/job_process.cgi 46 250 10726 1 13.45
lib/Greenphyl/Load/Family.pm 91 247 14549 3 13.28
lib/Greenphyl/Load/Interproscan.pm 33 236 10354 1 12.69
lib/Greenphyl/Load/Family.pm 120 217 13306 4 11.67
lib/Greenphyl/Load/Family.pm 134 202 12659 5 10.86
lib/Greenphyl/Load/Family.pm 140 195 12400 6 10.49
lib/Greenphyl/Load/Go.pm 27 195 8422 1 10.49
 
### Top Ten Biggest Blocks
File Line Lines Size Depth Dev.
================================================================================
lib/Greenphyl/Load/Family.pm 57 294 16570 1 24.70
lib/Greenphyl/Load/Family.pm 79 260 15030 2 22.40
lib/Greenphyl/Load/Family.pm 91 247 14549 3 21.69
lib/Greenphyl/Load/Family.pm 120 217 13306 4 19.83
lib/Greenphyl/Load/Family.pm 134 202 12659 5 18.87
lib/Greenphyl/Load/Family.pm 140 195 12400 6 18.48
lib/Greenphyl/Load/Family.pm 145 189 12138 7 18.09
lib/Greenphyl/Load/Uniprot.pm 103 287 11540 1 17.20
cgi-bin/job_process.cgi 46 250 10726 1 15.99
lib/Greenphyl/Load/Interproscan.pm 33 236 10354 1 15.43
cgi-bin/job_process.cgi 98 191 8454 2 12.60
 
### Top Ten Deepest Located Blocks
File Line Lines Size Depth Dev.
================================================================================
lib/Greenphyl/Load/Family.pm 189 8 639 12 5.00
lib/Greenphyl/Load/Family.pm 242 8 617 12 5.00
lib/Greenphyl/Load/Family.pm 182 5 203 12 5.00
lib/Greenphyl/Load/Family.pm 167 31 2073 11 4.58
lib/Greenphyl/Load/Family.pm 238 13 954 11 4.58
lib/Greenphyl/Load/Go.pm 104 5 330 11 4.58
lib/Greenphyl/Load/Uniprot.pm 523 5 301 11 4.58
lib/Greenphyl/Load/Family.pm 234 3 166 11 4.58
lib/Greenphyl/Load/Family.pm 272 55 3317 10 4.17
lib/Greenphyl/Load/Family.pm 164 35 2243 10 4.17
lib/Greenphyl/Load/Family.pm 228 24 1685 10 4.17
 
D:\Web-Dev\Greenphyl_v2\perlcodegreenphyl\apps\GreenPhyl\test>
code_statistics.pl
Perl
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216
#!/usr/bin/perl
 
use strict;
use warnings;
 
use feature 'say';
 
use File::Find::Rule;
use File::Grep qw( fgrep fmap fdo );
use PPI;
use Smart::Comments;
use List::Util qw( reduce );
use HTML::Template;
use Term::ProgressBar::Simple;
 
my @ignore_list = qw(
 
 
site/lib/Perl/APIReference site/lib/Bio/MolEvol/CodonModel.pm
Syntax/Highlight/Engine/Kate site/lib/ExtUtils/XSpp/Grammar.pm
);
 
$|++;
 
run();
 
exit;
 
sub run
{
my @files = grep is_not_ignored(), find_code_files();
 
### Collecting block statistics
my $progress = Term::ProgressBar::Simple->new( progress_config( count => scalar(@files) ) );
my @blocks = map find_subs( $_, sub { $progress->increment; } ), @files;
 
### Average Block Length
my $avg_length = get_and_say_average( 'line_count', \@blocks );
 
### Average Block Size
my $avg_size = get_and_say_average( 'size', \@blocks );
 
### Average Block Depth
my $avg_depth = get_and_say_average( 'indent', \@blocks );
 
### Top Ten Longest Blocks
print_top_ten_table( 'line_count', $avg_length, \@blocks );
 
### Top Ten Biggest Blocks
print_top_ten_table( 'size', $avg_size, \@blocks );
 
### Top Ten Deepest Located Blocks
print_top_ten_table( 'indent', $avg_depth, \@blocks );
 
return;
}
 
sub find_code_files
{
return File::Find::Rule->file->name( '*.pm', '*.pl', '*.phtml', '*.cgi' )->in('.');
}
 
sub progress_config {
my %config = @_;
die "no count given" if !defined $config{count};
$config{ETA} ||= 'linear';
#$config{minor_char} ||= '';
$config{max_update_rate} ||= 0.1;
return \%config;
}
 
sub is_not_ignored {
my $ignore = join '|', @ignore_list;
return 1 if !$ignore;
 
return 0 if $_ =~ m/$ignore/;
return 1;
}
 
sub get_and_say_average {
my ( $column, $blocks ) = @_;
my $avg = 1 / @{$blocks} * reduce { $a + $b->{$column} } 0, @{$blocks};
say $avg;
return $avg;
}
 
sub print_top_ten_table {
my ( $column, $avg, $blocks ) = @_;
 
@{$blocks} = sort { $b->{$column} <=> $a->{$column} } @{$blocks};
print_statistics( $avg, $column, @{$blocks} );
 
return;
}
 
sub find_subs
{
my ( $file, $post_process_hook ) = @_;
 
my $code = PPI::Document->new($file) || return;
my $blocks = $code->find('PPI::Structure::Block');
$blocks ||= [];
 
my @block_data = map collect_block_data( $_, $file ), @{$blocks};
$post_process_hook->();
 
return @block_data;
}
 
sub collect_block_data {
my ( $block, $file ) = @_;
 
my @lines = split( '\n', $block->content );
my $location = $block->location;
my $size = length $block->content;
 
# blocks with only one line are probably just hash accessors and thus irrelevant
# however we want to keep huge examples of those around, just in case they're interesting
return if @lines == 1 and $size < 150;
 
my %block_data;
$block_data{size} = $size;
$block_data{line_count} = @lines;
$block_data{line} = $location->[0];
$block_data{file} = $file;
$block_data{indent} = first_child_indent( $block, $block_data{line} );
 
return \%block_data;
}
 
sub first_child_indent {
my ( $block, $first_line ) = @_;
 
my $first_child = $block->find_first( sub { is_indent_relevant_element( @_, $first_line ) } );
 
# blocks without a child give us the indentation of the first brace, which is not reliable
# so we null the indent to keep them out of the top ten
return 0 if !$first_child;
 
my $child_loc = $first_child->location;
my $child_indent = int( $child_loc->[1] / 4 );
 
return $child_indent;
}
 
=head2 is_indent_relevant_element
This is an element evaluation routine. It is used by the PPI element finder.
Its return codes mean:
- 1 : return this
- 0 : skip this, look at children
- undef : skip this, ignore children
 
It instructs the finder to return the first element inside a block that is
relevant in determining the indentation depth of a block.
 
This means it triggers either either on actual code on the first line; or
anything non-whitespace and non-comment on the following ones.
 
Brace stacking in the first line is ignored, as well as "sub {"-stacking.
 
It is slightly complicated by the fact that the location detection for
PPI::Statement objects is broken, which means their children have to be
examined.
 
=cut
 
sub is_indent_relevant_element {
my ( $parent, $element, $first_line ) = @_;
my $class = $element->class;
return 0 if $class eq 'PPI::Statement'; # location detection on these is broken,
return 0 if $class eq 'PPI::Statement::Compound'; # so we need to look at their children
 
# this is stuff we ignore on the first line
if( $element->location->[0] == $first_line ) {
return 0 if $class eq 'PPI::Structure::Block'; # brace stacking and "sub {"-stacking constructs need to be inspected
return undef if $class eq 'PPI::Token::Structure'; # the braces themselves are skipped
return undef if $class eq 'PPI::Token::Word' and $element->content eq 'sub'; # sub as well
}
 
return 1 if $element->significant; # this is an actual significant element, either on first line or later, we take it
 
return undef; # other sorts of children of the block get ignored, as well as their children
}
 
sub print_statistics {
my ( $avg, $column ) = ( shift, shift );
 
my $template = "A47 A7 A6 A7 A6 A7";
 
say pack( $template, ' File', ' Line', 'Lines', ' Size', 'Depth', ' Dev.' );
say pack( $template, ('==================================================') x 6 );
 
print_block_line( $_, $template, $avg, $column ) for @_[ 0 .. 10 ];
 
return;
}
 
sub print_block_line {
my ( $block, $template, $avg, $column ) = @_;
 
my $file = $block->{file};
my $line = sprintf( "%6d ", $block->{line} );
my $line_count = sprintf( "%5d ", $block->{line_count} );
my $size = sprintf( "%6d ", $block->{size} );
my $indent = sprintf( "%5d ", $block->{indent} );
my $deviation = sprintf( "%7.2f", $block->{$column} / $avg );
 
if ( length $file > 47 ) {
$file = substr( $file, -43, 43 );
$file = "...$file";
}
 
say pack( $template, $file, $line, $line_count, $size, $indent, $deviation );
 
return;
}
structure.txt
1 2 3 4 5 6 7 8 9 10 11 12 13 14
Code::Statistics
 
Code::Statistics::Target::Block
 
Code::Statistics::Metric::Indentation
Code::Statistics::Metric::Length
Code::Statistics::Metric::Size
 
Code::Statistics::View::HTML
Code::Statistics::View::Shell
 
cstat_collect.pl -> cstat.db
cstat.db -> cstat_html.pl
cstat.db -> cstat_shell.pl ? c_stat.pl

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.