Created
April 26, 2016 06:01
-
-
Save slavailn/1d0e9343fc07efdc662685f02ace8307 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/perl | |
# This script will extract a subset of fasta or fastq reads | |
# in a specified size range and save them to file | |
use strict; use warnings; | |
use Bio::SeqIO; | |
use Number::Range; | |
use Getopt::Long; | |
my $infile; # input file | |
my $outfile; # output file | |
my $format; #file format | |
my $help; # print help | |
my $min; # minimum length | |
my $max; # maximum length | |
if ( @ARGV != 10 || defined($help) ) | |
{ | |
die <<END; | |
#################################################### | |
This script prints out sequences of specified | |
length and their ids in fasta format for any bioperl | |
object containing primary sequence | |
#################################################### | |
USAGE: | |
perl filter_by_length.pl --in-file <file_name> --out-file <file_name> --format <file_format> --min <INT> --max <INT> | |
END | |
} | |
GetOptions( | |
'in-file=s' => \$infile, | |
'out-file=s' => \$outfile, | |
'format=s' => \$format, | |
'min=i' => \$min, | |
'max=i' => \$max, | |
'help' => \$help, | |
) or die "Incorrect usage!\n$help\n"; | |
my $in_seq = Bio::SeqIO->new(-file => "<$infile", | |
-format => $format, | |
); | |
my $seq_out = Bio::SeqIO->new( | |
-file => ">$outfile", | |
-format => $format, | |
); | |
while ( my $seq = $in_seq->next_seq ) | |
{ | |
my $length = $seq->length; | |
if ( $length >= $min && $length <= $max ) | |
{ | |
$seq_out->write_seq($seq); | |
} | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment