Skip to content

Instantly share code, notes, and snippets.

@slavailn
Created April 26, 2016 06:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save slavailn/821198792bf86ac58f539cba8ca6e9d2 to your computer and use it in GitHub Desktop.
Save slavailn/821198792bf86ac58f539cba8ca6e9d2 to your computer and use it in GitHub Desktop.
#! /usr/bin/perl
use strict; use warnings;
# Get length distribution of sequences for any
# bioperl compatible format. Prints to STDOUT
use Bio::SeqIO;
use Number::Range;
use Getopt::Long;
my $file; # input file
my $format; #file format
my $help;
my $min; # minimum length
my $max; # maximum length
my $range; # stores range of sequence lengths to be analyzed
my @nt_len; # intermediate array for hash initialization
my %nt_len; # initiate hash to store sequence length counts
if ( @ARGV != 8 || defined($help) )
{
die <<END;
##########################################
This script outputs length distribution
for any bioperl compatible format that
contains primary sequence
##########################################
USAGE:
perl length_distr.pl --in-file <file_name> --format <file_format> --min <INT> --max <INT>
END
}
GetOptions(
'in-file=s' => \$file,
'format=s' => \$format,
'min=i' => \$min,
'max=i' => \$max,
'help' => \$help,
) or die "Incorrect usage!\n$help\n";
my $inseq = Bio::SeqIO->new(-file => "<$file",
-format => $format,);
$range = Number::Range->new( "$min..$max" );
my @numbers = $range->range;
foreach my $nt ( @numbers )
{
push (@nt_len, $nt);
push (@nt_len, 0);
}
%nt_len = @nt_len;
while ( my $seq = $inseq->next_seq )
{
my $length = $seq->length;
$nt_len{$length}++;
}
foreach my $key ( sort {$a<=>$b} keys(%nt_len) )
{
print "$key\t$nt_len{$key}\n";
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment