Skip to content

Instantly share code, notes, and snippets.

@audy
Created June 3, 2011 18:13
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save audy/1006830 to your computer and use it in GitHub Desktop.
Save audy/1006830 to your computer and use it in GitHub Desktop.
trim2.pl (QSEQ)
# !/usr/bin/perl -w
#
# Trim2_Illumina.pl
# Written by: David B. Crabb
# Eric Triplett's Group
# University of Florida
# Last Modified: November 30, 2010
###################################################
#
# Parameters:
# -a raw illumina input file read 1
# -b raw illumina input file read 2
#
###################################################
$LENGTH_CUTOFF = 70;
$QUALITY_CUTOFF = 20;
$TRUNCATE = 11;
# If Length < 70; throw out.
# If throw out 1; then throw out both.
# Delete first 11 on both sides
# Number of reads in each barcode
# Number thrown out
use Getopt::Std;
use File::Basename;
# PARSE ARGUMENTS
my %parameters;
getopts( 'a:b:', \%parameters ); #Takes parameters
unless ( $parameters{a} && $parameters{b} ) {
print "Usage: perl trim2.pl
-a raw illumina input file read 1
-b raw illumina input file read 2\n";
exit;
}
$read1 = $parameters{a};
$read2 = $parameters{b};
unless ( open( READ1, "$read1" ) ) {
print "Unable to open $read1\n";
exit;
}
unless ( open( READ2, "$read2" ) ) {
print "Unable to open $read2\n";
exit;
}
@split_out1 = split( /\./, $prefix );
$out_dir = dirname($read1);
$prefix = basename($read1);
# Make Singletons Directory
system("mkdir -p $out_dir/singletons/");
open SINGLETONS, ">$out_dir/singletons/$prefix" . "_trim_single.txt" or die $!;
while ( $read1_line = <READ1> ) {
chomp($read1_line);
chomp( $read2_line = <READ2> );
@line1 = split( /\t/, $read1_line );
@line2 = split( /\t/, $read2_line );
$line1[8] =~ s/\./N/g;
$line2[8] =~ s/\./N/g;
$line1[8] = trim( $line1[8], $line1[9] );
$line2[8] = trim( $line2[8], $line2[9] );
$line1[9] = "";
$line2[9] = "";
next if ( ( $line1[8] eq "0" ) && ( $line2[8] eq "0" ) );
# Print Singletons
if ( $line1[8] eq "0" ) {
print SINGLETONS join( "\t", @line2 ) . "\n";
}
elsif ( $line2[8] eq "0" ) {
print SINGLETONS join( "\t", @line1 ) . "\n";
}
# Or, print entire thing (interleaved QSEQ)
else {
# Print in QSEQ format, uncomment (and comment FASTQ line)
# print join( "\t", @line1 ) . "\n" . join( "\t", @line2 ) . "\n";
# Print in FASTQ (interleaved) format:
# TODO shorten this!
$header = join(':', @line1[0..7]);
$sequence_quality = @line1[8];
@sq = split( /\t/, $sequence_quality);
$sequence = @sq[0];
$quality = @sq[1];
print "\@$header:A\n$sequence\n\+$header:A\n$quality\n";
$header = join(':', @line2[0..7]);
$sequence_quality = @line2[8];
@sq = split( /\t/, $sequence_quality);
$sequence = @sq[0];
$quality = @sq[1];
print "\@$header:B\n$sequence\n\+$header:B\n$quality\n";
}
}
close SINGLE1;
close SINGLE2;
# trim($seq, $qual);
sub trim() {
$seq = shift;
$qual = shift;
#trim off primer
$seq = substr( $seq, $TRUNCATE );
$qual = substr( $qual, $TRUNCATE );
$max = 0;
$sum = 0;
$first = 0;
$start = 0;
$end = 0;
@qualities = split( //, $qual );
$size = scalar @qualities;
for ( $a = 0 ; $a < $size ; $a++ ) {
$sum += ( ord( $qualities[$a] ) - 64 - $QUALITY_CUTOFF );
if ( $sum > $max ) {
$max = $sum;
$end = $a;
$start = $first;
}
if ( $sum < 0 ) {
$sum = 0;
$first = $a;
}
}
$seq = substr( $seq, $start, $end - $start );
return 0 if ( length($seq) < $LENGTH_CUTOFF );
$seq = $seq . "\t" . substr( $qual, $start, $end - $start );
return $seq;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment