Created
August 19, 2014 16:11
-
-
Save pcantalupo/9c30709fe802c96ea2b3 to your computer and use it in GitHub Desktop.
transform colorspace fastq to sanger fastq
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env perl | |
# Based on scripts and discussion found in http://seqanswers.com/forums/showthread.php?t=1425 | |
# and https://www.biostars.org/p/43855/ | |
# This script changes the color space sequence on the 2nd line of each fastq | |
# to base space. It does not convert the qual string since SOLID qual is | |
# already in Sanger format. The first base and quality value (primer) are | |
# discarded. | |
use strict; | |
use warnings; | |
while (<>) { | |
chomp ( my $id1 = $_ ); | |
chomp ( my $csfa = <> ); | |
chomp ( my $id2 = <> ); | |
chomp ( my $qual = <> ); | |
my $fa = csfa2fa($csfa); | |
my $qual = substr($qual, 1); | |
print join ("\n", $id1, $fa, $id2, $qual), "\n"; | |
} | |
sub csfa2fa { | |
my ($seq) = @_; | |
my %cs = ( | |
"T0" => "T", | |
"T1" => "G", | |
"T2" => "C", | |
"T3" => "A", | |
"T." => "N", | |
"C0" => "C", | |
"C1" => "A", | |
"C2" => "T", | |
"C3" => "G", | |
"C." => "N", | |
"G0" => "G", | |
"G1" => "T", | |
"G2" => "A", | |
"G3" => "C", | |
"G." => "N", | |
"A0" => "A", | |
"A1" => "C", | |
"A2" => "G", | |
"A3" => "T", | |
"A." => "N", | |
"N0" => "N", | |
"N1" => "N", | |
"N2" => "N", | |
"N3" => "N", | |
"N." => "N", | |
); | |
my @letters = split( //, $seq ); | |
my $first_base = $letters[0]; | |
for( my $i = 1; $i < @letters ; $i++ ) { | |
my $colour = $letters[$i]; | |
my $encoding = $first_base . $colour; | |
$first_base = $cs{ $encoding }; | |
$letters[ $i ] = $first_base; | |
} | |
shift( @letters ); | |
$" = ""; | |
return join ("", @letters); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
it's not a problem, just a warning in perl script that declare a variable twice
to modify, just change first $qual in line 23 to anything else like $qualnew
also need change $qual in line 25 print jorn() to the new one
this is a nice script that helps me a lot!