Created
June 6, 2017 20:11
-
-
Save laceysanderson/12b1de6784413cd69cbb064666063b08 to your computer and use it in GitHub Desktop.
Prefix all records names in a FASTA file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/user/bin/perl -w | |
use strict; | |
use warnings; | |
# Purpose: Prefix all records names in a FASTA file. | |
# Arguements: | |
# 1. Input FASTA file | |
# 2. Output FASTA File | |
# 3. Prefix that you would like added to all the FASTA records. | |
my $filein = $ARGV[0]; | |
my $fileout = $ARGV[1]; | |
my $prefix = $ARGV[2]; | |
print "\nInput File: ".$filein."\n"; | |
print "Output File: ".$fileout."\n"; | |
print "Prefix: ".$prefix."\n"; | |
# Determine filesize, etc. to help with reporting of progress. | |
my $wclines = `wc -l $filein`; | |
$wclines =~ s/ .*$//g; | |
chomp($wclines); | |
my $grepc = `grep -c ">" $filein`; | |
$grepc =~ s/^\s+|\s+$//g; | |
print "\nExpected:\n - $wclines lines\n - $grepc headers\n"; | |
# Open files. | |
open(IN, '<', $filein) || die "Error: $!\n"; | |
open(OUT, '>', $fileout) || die "Error: $!\n"; | |
# For each line of the input file... | |
my $lines = 0; | |
my $headers = 0; | |
while (<IN>) { | |
$lines++; | |
# Note: If $2 doesn't match anything (ie. no whitespace in seq IDs) then there will be | |
# a trailing whitespace after each seq ID in the new file. | |
if (/^>(\S+)\s*(.*)/) { | |
$headers++; | |
print OUT ">".$prefix.$1.' '.$2."\n"; | |
} | |
# This is not a FASTA header so just print it out. | |
else { | |
print OUT $_; | |
} | |
} | |
print "\nProcessed:\n - $lines lines\n - $headers headers\n"; | |
# Basic error reporting. | |
if ($wclines != $lines) { | |
print "ERROR: the number of LINES processed differs from the number of lines expected ($lines versus $wclines)\n"; | |
} | |
if ($grepc != $headers) { | |
print "ERROR: the number of FASTA HEADERS processed differs from the number of headers expected ($headers versus $grepc)\n"; | |
} | |
close(IN); | |
close(OUT); | |
exit(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment