dansmith01/peptide_rollup.pl

## peptide_rollup.pl
#!/usr/bin/perl

#--------------------------------------------------------------------------#
# Description: Combines multiple peptide readings into protein-            #
#              centric data                                                #
#                                                                          #
# Author:      Daniel Patrick Smith                                        #
# Version:     2.0 / November 29th, 2011                                   #
# Affiliation: Giovannoni Laboratory, Oregon State University              #
# License:     GNU General Public License v3                               #
#                                                                          #
# Reference:   Smith, D.P. et al. Proteomic and Transcriptomic Analysis of #
#              Candidatus Pelagibacter ubique Describes the First PII-     #
#              Independent Response to Nitrogen Limitation in a Free-      #
#              Living Alphaproteobacterium. In preparation.                #
#--------------------------------------------------------------------------#

use strict;
use warnings;

use Statistics::Distributions;


#-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
# Define Data File and Treatment Groups ( --> EDIT THIS SECTION <-- )
#-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-

my $file = "Peptides.txt";

# Data File - tab-delimited peptide readings.
# 1st column is the peptide sequence (ignored)
# 2nd column is the protein ID
# Remaining columns are abundance measurements for each sample
# 1st row contains sample IDs for each sample
# Technical replicates should be given the same sample ID

# Example (partial) data file:
# Peptide           Protein    SL-1   SL-1   SL-1   SL-2   SL-2   SL-2
# -.MKMFYEKD.A      C134_0001  NA     NA     NA     22.38  22.02  22.26
# K.ARDLALSYASA.I   C134_0001  20.85  21.75  21.87  21.07  22.00  21.74
# K.DADVDLIK.S      C134_0001  24.52  23.25  24.60  24.09  NA     24.55
# K.DLADHPIEK.V     C134_0001  24.66  26.44  25.98  26.19  26.54  26.01
# K.DLDVFMVAPK.G    C134_0001  24.20  23.81  24.27  24.27  22.54  24.57
# K.DSGAKEVVVALR.D  C134_0001  23.62  23.25  23.68  23.31  23.80  23.36
# K.EVLADIQSGK.F    C134_0001  NA     21.32  23.36  NA     NA     22.55

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

my %sets = ( "Le" => ["SL-2", "SL-3", "SL-5"],
             "Lt" => ["SL-6", "SL-8", "SL-9", "SL-10", "SL-11", "SL-12"],
             "Ls" => ["SL-20", "SL-21", "SL-22", "SL-23", "SL-24"],
             "Ce" => ["SL-1", "SL-4", "SL-7", "SL-14"],
             "Ct" => ["SL-15", "SL-16", "SL-17", "SL-25"],
             "Cs" => ["SL-26", "SL-27", "SL-28", "SL-29"]
           );

# Sets are used to group biological replicates together into treatments.
# Above, "Le" and "Cs" represent "Limited exponential" and "control
# stationary", respectively. However, any treatment identifier may be used.

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

my @compare = (["Ce", "Le"], ["Ce","Ls"], ["Ce", "Cs"], ["Cs", "Ls"]);

# These are the treatments that should be compared. E.g., the first pair,
# ["Ce", "Le"] will compute the fold change in expression between those
# two treatments by dividing the abundance in one by the other: Le / Ce

#-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
#                 --> DO NOT EDIT BELOW THIS LINE <--
#-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-


# Read in peptide abundances from user-specified file
#-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
my @raw_data = ();
open(FILE, $file) or die ("Can't open file '$file': $!\n");

my $line = <FILE>;
chomp $line;
my @headers = split("\t", $line);

while ($line = <FILE>) {
  chomp $line;
	my @f = split("\t", $line);

	my $pep_readings = { 'protein' => $f[1] };

	push @{$pep_readings->{$headers[$_]}}, $f[$_] foreach (2..$#f);
	push @raw_data, $pep_readings;
}
close FILE;


# Average peptides together by treatments, set undef if >=50% are NA
#-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
my @avg_data = ();
my $prot2idx = {};
foreach my $raw (@raw_data) {
	my $new = {};
	$new->{'protein'} = $raw->{'protein'};

	while (my ($cond, $SL_ID_arr) = each %sets) {
		my @vals = ();
		push @vals, @{$raw->{$_}} foreach (@{$SL_ID_arr});
		@vals = grep(/^[\d\.]+$/, @vals);

		if (scalar(@vals) < 3) {
			$new->{$cond} = undef;
			$raw->{$cond} = undef;
		} else {
			$new->{$cond} = 2 ** avg(@vals);
			$raw->{$cond} = [@vals];
		}
	}

	push @{$prot2idx->{$raw->{'protein'}}}, scalar(@avg_data);
	push @avg_data, $new;
}


# Output headers
#-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
print "Protein  ";
foreach my $cond_pair (@compare) {
	my ($ref_condition, $exp_condition) = @{$cond_pair};
	print "\t". $ref_condition ."->". $exp_condition ."\tP-Value\t";
}
print "\n";


# Calculate fold changes in protein abundances
#-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
my @protlist = sort {$a cmp $b} (keys %{$prot2idx});
foreach my $protein (@protlist) {

	print $protein;

	# Iterate over each pair of treatments specified by the user
	#-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
	foreach my $cond_pair (@compare) {
		my ($ref_condition, $exp_condition) = @{$cond_pair};

		my @ratios = ();
		my @ttests = ();
		my $expONLY = 0;
		my $refONLY = 0;
		my $fold_change = undef;
		my $p_value = 0;


		# Loop over each peptide for the current protein
		#-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
		foreach my $idx (@{$prot2idx->{$protein}}) {

			my $ref_val = $avg_data[$idx]->{$ref_condition};
			my $ref_arr = $raw_data[$idx]->{$ref_condition};

			my $exp_val = $avg_data[$idx]->{$exp_condition};
			my $exp_arr = $raw_data[$idx]->{$exp_condition};

			# If the peptide was detected in both treatments,
			# calculate the ratio (fold change).
			#-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
			if ($ref_val && $exp_val) {
				if ($exp_val < $ref_val) {
					($exp_arr, $ref_arr) = ($ref_arr, $exp_arr);
				}
				push @ratios, log10($exp_val / $ref_val);
				push @ttests, ttest($exp_arr, $ref_arr);
			}

			# Keep track of peptides only occuring in one treatment
			#-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
			elsif ($ref_val) { $refONLY++; }
			elsif ($exp_val) { $expONLY++; }
		}


		# Loop over each peptide for the current protein
		#-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
		if (@ratios) {

			# Set p-value to 1 where the ratio is opposite the average
			#-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
			my $avg_ratio = avg(@ratios);
			foreach (0..$#ratios) {
				$ttests[$_] = 1 if ($ratios[$_] > 0 xor $avg_ratio > 0);
			}

			$p_value     = $ttests[0];
			$fold_change = sprintf("%.2f", (10 ** $avg_ratio));

			if (scalar(@ttests) > 1) {

				# Bonferroni correction
				#-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
				foreach (0..$#ttests) {
					$ttests[$_] = $ttests[$_] * scalar(@ttests);
					$ttests[$_] = 1 if ($ttests[$_] > 1);
				}

				# Combine multiple p-values into one
				#-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
				my $df      = 2 * scalar(@ttests);
				my $chi_sq  = 0;
				   $chi_sq += log($_) foreach (@ttests);
				   $chi_sq *= -2;
				$p_value = Statistics::Distributions::chisqrprob($df,$chi_sq);
			}

		}

		# Report instances where the peptides for a given protein were
		# only detected in one treatment
		#-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
		elsif ($expONLY > $refONLY && $expONLY >= 3) {
			$fold_change = "INF";
			$p_value     = "0*";
		}
		elsif ($expONLY < $refONLY && $refONLY >= 3) {
			$fold_change = "0";
			$p_value     = "0*";
		}

		# Also report instances where the peptides for a given protein were
		# never detected in either treatment
		#-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
		else {
			$fold_change = "ND";
			$p_value     = "1*";
		}

		print "\t$fold_change\t$p_value";
	}

	print "\n";
}


# Two-tailed Student's t-test calculation
# Statistical Sleuth, 2nd ed. pg 44
#-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
sub ttest {
	my @i_arr = @{$_[0]};
	my @j_arr = @{$_[1]};
	my $i_num = scalar(@i_arr) || return 1;
	my $j_num = scalar(@j_arr) || return 0;
	my $i_df  = ($i_num - 1)   || return 1;
	my $j_df  = ($j_num - 1)   || return 0;
	my $i_ave = 0; $i_ave += $_ foreach (@i_arr); $i_ave /= $i_num;
	my $j_ave = 0; $j_ave += $_ foreach (@j_arr); $j_ave /= $j_num;
	return 1 unless ($i_ave > $j_ave);

	my $i_sd  = 0; $i_sd += ($_ - $i_ave) ** 2 foreach (@i_arr);
	   $i_sd  = sqrt($i_sd / $i_df);
	my $j_sd  = 0; $j_sd += ($_ - $j_ave) ** 2 foreach (@j_arr);
	   $j_sd  = sqrt($j_sd / $j_df);

	my $pooled_df = $i_df + $j_df;
	my $pooled_sd = ($i_df * ($i_sd ** 2)) + ($j_df * ($j_sd ** 2));
	   $pooled_sd = sqrt($pooled_sd / $pooled_df);
	my $pooled_se = $pooled_sd * sqrt((1 / $i_num) + (1 / $j_num));

	return 1 unless ($pooled_se != 0);
	my $t_statistic = ($i_ave - $j_ave) / $pooled_se;
	my $p_value = Statistics::Distributions::tprob($pooled_df, $t_statistic);

	return $p_value;
}


# Helper Functions
#-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
sub avg {
	my $sum = 0;
	   $sum += $_ foreach (@_);
	my $avg = scalar(@_) ? $sum / scalar(@_) : 0;
	return $avg;
}
sub log10 {
	my $n = shift;
	return log($n)/log(10);
}
	#!/usr/bin/perl

	#--------------------------------------------------------------------------#
	# Description: Combines multiple peptide readings into protein- #
	# centric data #
	# #
	# Author: Daniel Patrick Smith #
	# Version: 2.0 / November 29th, 2011 #
	# Affiliation: Giovannoni Laboratory, Oregon State University #
	# License: GNU General Public License v3 #
	# #
	# Reference: Smith, D.P. et al. Proteomic and Transcriptomic Analysis of #
	# Candidatus Pelagibacter ubique Describes the First PII- #
	# Independent Response to Nitrogen Limitation in a Free- #
	# Living Alphaproteobacterium. In preparation. #
	#--------------------------------------------------------------------------#

	use strict;
	use warnings;

	use Statistics::Distributions;



	#-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
	# Define Data File and Treatment Groups ( --> EDIT THIS SECTION <-- )
	#-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-

	my $file = "Peptides.txt";

	# Data File - tab-delimited peptide readings.
	# 1st column is the peptide sequence (ignored)
	# 2nd column is the protein ID
	# Remaining columns are abundance measurements for each sample
	# 1st row contains sample IDs for each sample
	# Technical replicates should be given the same sample ID

	# Example (partial) data file:
	# Peptide Protein SL-1 SL-1 SL-1 SL-2 SL-2 SL-2
	# -.MKMFYEKD.A C134_0001 NA NA NA 22.38 22.02 22.26
	# K.ARDLALSYASA.I C134_0001 20.85 21.75 21.87 21.07 22.00 21.74
	# K.DADVDLIK.S C134_0001 24.52 23.25 24.60 24.09 NA 24.55
	# K.DLADHPIEK.V C134_0001 24.66 26.44 25.98 26.19 26.54 26.01
	# K.DLDVFMVAPK.G C134_0001 24.20 23.81 24.27 24.27 22.54 24.57
	# K.DSGAKEVVVALR.D C134_0001 23.62 23.25 23.68 23.31 23.80 23.36
	# K.EVLADIQSGK.F C134_0001 NA 21.32 23.36 NA NA 22.55

	# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

	my %sets = ( "Le" => ["SL-2", "SL-3", "SL-5"],
	"Lt" => ["SL-6", "SL-8", "SL-9", "SL-10", "SL-11", "SL-12"],
	"Ls" => ["SL-20", "SL-21", "SL-22", "SL-23", "SL-24"],
	"Ce" => ["SL-1", "SL-4", "SL-7", "SL-14"],
	"Ct" => ["SL-15", "SL-16", "SL-17", "SL-25"],
	"Cs" => ["SL-26", "SL-27", "SL-28", "SL-29"]
	);

	# Sets are used to group biological replicates together into treatments.
	# Above, "Le" and "Cs" represent "Limited exponential" and "control
	# stationary", respectively. However, any treatment identifier may be used.

	# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

	my @compare = (["Ce", "Le"], ["Ce","Ls"], ["Ce", "Cs"], ["Cs", "Ls"]);

	# These are the treatments that should be compared. E.g., the first pair,
	# ["Ce", "Le"] will compute the fold change in expression between those
	# two treatments by dividing the abundance in one by the other: Le / Ce

	#-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
	# --> DO NOT EDIT BELOW THIS LINE <--
	#-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-




	# Read in peptide abundances from user-specified file
	#-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
	my @raw_data = ();
	open(FILE, $file) or die ("Can't open file '$file': $!\n");

	my $line = <FILE>;
	chomp $line;
	my @headers = split("\t", $line);

	while ($line = <FILE>) {
	chomp $line;
	my @f = split("\t", $line);

	my $pep_readings = { 'protein' => $f[1] };

	push @{$pep_readings->{$headers[$_]}}, $f[$_] foreach (2..$#f);
	push @raw_data, $pep_readings;
	}
	close FILE;


	# Average peptides together by treatments, set undef if >=50% are NA
	#-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
	my @avg_data = ();
	my $prot2idx = {};
	foreach my $raw (@raw_data) {
	my $new = {};
	$new->{'protein'} = $raw->{'protein'};

	while (my ($cond, $SL_ID_arr) = each %sets) {
	my @vals = ();
	push @vals, @{$raw->{$_}} foreach (@{$SL_ID_arr});
	@vals = grep(/^[\d\.]+$/, @vals);

	if (scalar(@vals) < 3) {
	$new->{$cond} = undef;
	$raw->{$cond} = undef;
	} else {
	$new->{$cond} = 2 ** avg(@vals);
	$raw->{$cond} = [@vals];
	}
	}

	push @{$prot2idx->{$raw->{'protein'}}}, scalar(@avg_data);
	push @avg_data, $new;
	}


	# Output headers
	#-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
	print "Protein ";
	foreach my $cond_pair (@compare) {
	my ($ref_condition, $exp_condition) = @{$cond_pair};
	print "\t". $ref_condition ."->". $exp_condition ."\tP-Value\t";
	}
	print "\n";


	# Calculate fold changes in protein abundances
	#-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
	my @protlist = sort {$a cmp $b} (keys %{$prot2idx});
	foreach my $protein (@protlist) {

	print $protein;

	# Iterate over each pair of treatments specified by the user
	#-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
	foreach my $cond_pair (@compare) {
	my ($ref_condition, $exp_condition) = @{$cond_pair};

	my @ratios = ();
	my @ttests = ();
	my $expONLY = 0;
	my $refONLY = 0;
	my $fold_change = undef;
	my $p_value = 0;


	# Loop over each peptide for the current protein
	#-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
	foreach my $idx (@{$prot2idx->{$protein}}) {

	my $ref_val = $avg_data[$idx]->{$ref_condition};
	my $ref_arr = $raw_data[$idx]->{$ref_condition};

	my $exp_val = $avg_data[$idx]->{$exp_condition};
	my $exp_arr = $raw_data[$idx]->{$exp_condition};

	# If the peptide was detected in both treatments,
	# calculate the ratio (fold change).
	#-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
	if ($ref_val && $exp_val) {
	if ($exp_val < $ref_val) {
	($exp_arr, $ref_arr) = ($ref_arr, $exp_arr);
	}
	push @ratios, log10($exp_val / $ref_val);
	push @ttests, ttest($exp_arr, $ref_arr);
	}

	# Keep track of peptides only occuring in one treatment
	#-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
	elsif ($ref_val) { $refONLY++; }
	elsif ($exp_val) { $expONLY++; }
	}


	# Loop over each peptide for the current protein
	#-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
	if (@ratios) {

	# Set p-value to 1 where the ratio is opposite the average
	#-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
	my $avg_ratio = avg(@ratios);
	foreach (0..$#ratios) {
	$ttests[$_] = 1 if ($ratios[$_] > 0 xor $avg_ratio > 0);
	}

	$p_value = $ttests[0];
	$fold_change = sprintf("%.2f", (10 ** $avg_ratio));

	if (scalar(@ttests) > 1) {

	# Bonferroni correction
	#-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
	foreach (0..$#ttests) {
	$ttests[$_] = $ttests[$_] * scalar(@ttests);
	$ttests[$_] = 1 if ($ttests[$_] > 1);
	}

	# Combine multiple p-values into one
	#-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
	my $df = 2 * scalar(@ttests);
	my $chi_sq = 0;
	$chi_sq += log($_) foreach (@ttests);
	$chi_sq *= -2;
	$p_value = Statistics::Distributions::chisqrprob($df,$chi_sq);
	}

	}

	# Report instances where the peptides for a given protein were
	# only detected in one treatment
	#-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
	elsif ($expONLY > $refONLY && $expONLY >= 3) {
	$fold_change = "INF";
	$p_value = "0*";
	}
	elsif ($expONLY < $refONLY && $refONLY >= 3) {
	$fold_change = "0";
	$p_value = "0*";
	}

	# Also report instances where the peptides for a given protein were
	# never detected in either treatment
	#-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
	else {
	$fold_change = "ND";
	$p_value = "1*";
	}

	print "\t$fold_change\t$p_value";
	}

	print "\n";
	}



	# Two-tailed Student's t-test calculation
	# Statistical Sleuth, 2nd ed. pg 44
	#-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
	sub ttest {
	my @i_arr = @{$_[0]};
	my @j_arr = @{$_[1]};
	my $i_num = scalar(@i_arr) \|\| return 1;
	my $j_num = scalar(@j_arr) \|\| return 0;
	my $i_df = ($i_num - 1) \|\| return 1;
	my $j_df = ($j_num - 1) \|\| return 0;
	my $i_ave = 0; $i_ave += $_ foreach (@i_arr); $i_ave /= $i_num;
	my $j_ave = 0; $j_ave += $_ foreach (@j_arr); $j_ave /= $j_num;
	return 1 unless ($i_ave > $j_ave);

	my $i_sd = 0; $i_sd += ($_ - $i_ave) ** 2 foreach (@i_arr);
	$i_sd = sqrt($i_sd / $i_df);
	my $j_sd = 0; $j_sd += ($_ - $j_ave) ** 2 foreach (@j_arr);
	$j_sd = sqrt($j_sd / $j_df);

	my $pooled_df = $i_df + $j_df;
	my $pooled_sd = ($i_df * ($i_sd ** 2)) + ($j_df * ($j_sd ** 2));
	$pooled_sd = sqrt($pooled_sd / $pooled_df);
	my $pooled_se = $pooled_sd * sqrt((1 / $i_num) + (1 / $j_num));

	return 1 unless ($pooled_se != 0);
	my $t_statistic = ($i_ave - $j_ave) / $pooled_se;
	my $p_value = Statistics::Distributions::tprob($pooled_df, $t_statistic);

	return $p_value;
	}




	# Helper Functions
	#-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
	sub avg {
	my $sum = 0;
	$sum += $_ foreach (@_);
	my $avg = scalar(@_) ? $sum / scalar(@_) : 0;
	return $avg;
	}
	sub log10 {
	my $n = shift;
	return log($n)/log(10);
	}