dmoney/columnStats.pl

## columnStats.pl
#!/usr/bin/perl
use strict;
use warnings;

# columnStats.pl
# A script to count lines of comma-separated input, grouped by some column value.
#
# Author: Dustin King (cathodion@gmail.com)
#
# Usage:  perl columnStats.pl [COLNUM]
# COLNUM defaults to 0, and is zero-based.
#
# Example input (from STDIN):
#    a
#    b,c
#    a,b
#
#  Example output:
#    a	2
#    b	1


# zero-based number of the column on which to run stats
my $colnum = (defined $ARGV[0]) ? $ARGV[0] : 0;

# a hash where:
#   key = contents of the column
#   value = occurrences of those contents
my %stats;

foreach my $line (<STDIN>) {
	chomp $line;

	# count blank lines
	if ("$line" eq ""){
		$stats{"<BLANK>"}++;
		next;
	}

	# count lines where specified column doesn't exist
	my @fields = split(/,/, $line);
	if ($#fields < $colnum){
		$stats{"<TOO_FEW_COLUMNS>"}++;
		next;
	}

	# count lines by column contents
	my $fieldVal = $fields[$colnum];
	chomp $fieldVal;
	$stats{$fieldVal}++;
}

# print the stats
foreach my $key (sort keys %stats){
	print $key, "\t", $stats{$key}, "\n";
}
	#!/usr/bin/perl
	use strict;
	use warnings;

	# columnStats.pl
	# A script to count lines of comma-separated input, grouped by some column value.
	#
	# Author: Dustin King (cathodion@gmail.com)
	#
	# Usage: perl columnStats.pl [COLNUM]
	# COLNUM defaults to 0, and is zero-based.
	#
	# Example input (from STDIN):
	# a
	# b,c
	# a,b
	#
	# Example output:
	# a 2
	# b 1


	# zero-based number of the column on which to run stats
	my $colnum = (defined $ARGV[0]) ? $ARGV[0] : 0;

	# a hash where:
	# key = contents of the column
	# value = occurrences of those contents
	my %stats;

	foreach my $line (<STDIN>) {
	chomp $line;

	# count blank lines
	if ("$line" eq ""){
	$stats{"<BLANK>"}++;
	next;
	}

	# count lines where specified column doesn't exist
	my @fields = split(/,/, $line);
	if ($#fields < $colnum){
	$stats{"<TOO_FEW_COLUMNS>"}++;
	next;
	}

	# count lines by column contents
	my $fieldVal = $fields[$colnum];
	chomp $fieldVal;
	$stats{$fieldVal}++;
	}

	# print the stats
	foreach my $key (sort keys %stats){
	print $key, "\t", $stats{$key}, "\n";
	}