Created
April 26, 2016 05:54
-
-
Save slavailn/e67c602ba74c69b54919a1c93e0dff2a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/perl | |
# This script will extract fasta entries ids matching | |
# user specified regex. | |
# The list of regexes to be matched to fasta ids is | |
# stored in a different file. | |
use strict; use warnings; | |
my $fasta_file = shift or die "Please provide fasta file\n USAGE: $0 fastaFile queryFile\n"; | |
my $query_file = shift or die "Please provide file with patterns to be matched\n USAGE: $0 fastaFile queryFile\n"; | |
my @patterns; # initialize array storing patterns | |
my %id2seq = (); | |
my $id = (); | |
open( my $fasta_fh, "<", $fasta_file) or die "Cannot open fasta file: $!\n"; | |
open( my $pattern_fh, "<", $query_file ) or die "Cannot open file with patterns\n"; | |
while ( my $line=<$fasta_fh> ) | |
{ | |
chomp $line; | |
if ( $line =~ m/^>(.+)/ ) | |
{ | |
$id = $1; | |
} | |
else | |
{ | |
$id2seq{$id} .= $line | |
} | |
} | |
while ( my $pattern = <$pattern_fh> ) | |
{ | |
chomp $pattern; | |
push(@patterns, $pattern); | |
} | |
for my $id ( keys %id2seq ) | |
{ | |
for my $pattern ( @patterns ) | |
{ | |
if ( $id =~ m/\Q$pattern/ ) | |
{ | |
print ">$id\n"; | |
print $id2seq{$id},"\n"; | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment