Skip to content

Instantly share code, notes, and snippets.

@slavailn
Created April 26, 2016 06:01
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save slavailn/1d0e9343fc07efdc662685f02ace8307 to your computer and use it in GitHub Desktop.
Save slavailn/1d0e9343fc07efdc662685f02ace8307 to your computer and use it in GitHub Desktop.
#! /usr/bin/perl
# This script will extract a subset of fasta or fastq reads
# in a specified size range and save them to file
use strict; use warnings;
use Bio::SeqIO;
use Number::Range;
use Getopt::Long;
my $infile; # input file
my $outfile; # output file
my $format; #file format
my $help; # print help
my $min; # minimum length
my $max; # maximum length
if ( @ARGV != 10 || defined($help) )
{
die <<END;
####################################################
This script prints out sequences of specified
length and their ids in fasta format for any bioperl
object containing primary sequence
####################################################
USAGE:
perl filter_by_length.pl --in-file <file_name> --out-file <file_name> --format <file_format> --min <INT> --max <INT>
END
}
GetOptions(
'in-file=s' => \$infile,
'out-file=s' => \$outfile,
'format=s' => \$format,
'min=i' => \$min,
'max=i' => \$max,
'help' => \$help,
) or die "Incorrect usage!\n$help\n";
my $in_seq = Bio::SeqIO->new(-file => "<$infile",
-format => $format,
);
my $seq_out = Bio::SeqIO->new(
-file => ">$outfile",
-format => $format,
);
while ( my $seq = $in_seq->next_seq )
{
my $length = $seq->length;
if ( $length >= $min && $length <= $max )
{
$seq_out->write_seq($seq);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment