Skip to content

Instantly share code, notes, and snippets.

@laurianesimon
Last active October 17, 2017 08:57
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save laurianesimon/a9fc44aa83305c576e914710cae75f87 to your computer and use it in GitHub Desktop.
Save laurianesimon/a9fc44aa83305c576e914710cae75f87 to your computer and use it in GitHub Desktop.
extract and count polymophisms ( use the Main script file)
#!/usr/bin/perl
use strict;
use warnings;
use Getopt::Long;
use SamUtils;
my $VERSION='0.4';
my $lastModif='2016-Aug-29';
my($limMin,$limSup,$softclip);
# set the options
&GetOptions("h|help" => \&help,
"minstart:i" => \$limMin, # beginning of the read must be <= to  limmin
"minend:i" => \$limSup, # end of the read must be >= limsup
"S|softclip" => \$softclip # activate the checkIfIsCoveringIntervalWithSEnd
);
@ARGV or &help;
# verification of the arg
(-e "$ARGV[0]") or &help2($ARGV[0]);
#########################################################################################################
# main #
#########################################################################################################
if(defined($softclip)){
&mainWithS($ARGV[0]); # analyze a file
}
else{
&main($ARGV[0]); # analyze a file
}
#########################################################################################################
# SUBS #
#########################################################################################################
sub main { # read the file
my $samfile = shift @_;
my ($totalreads,$coveringreads);
open (F1, $samfile) || die("opening of the file fails\n");
while (my$li=<F1>){
#next if ($li=~/^@/); # get ride of the header
chomp $li;
if ($li=~/^@/){
print "$li\n";
}
else{
$totalreads++;
my@tab=split(/\t/,$li);
my($read,$ref,$start,$cigar,$seq)=($tab[0],$tab[2],$tab[3],$tab[5],$tab[9]);
if(SamUtils::checkIfIsCoveringInterval($start,$cigar,$limMin,$limSup)){ # (<=$limMin && >=$limSup)
$coveringreads++;
print "$li\n";
}
}
}
close F1;
print STDERR "total processed reads : $totalreads\n";
print STDERR "total covering reads : $coveringreads\n";
}
#########################################################################################################
sub mainWithS { # reqd the file
my $samfile = shift @_;
my ($totalreads,$coveringreads);
open (F1, $samfile) || die("opening of the file fails\n");
while (my$li=<F1>){
#next if ($li=~/^@/); # get ride of the header
chomp $li;
if ($li=~/^@/){
print "$li\n";
}
else{
$totalreads++;
my@tab=split(/\t/,$li);
my($read,$ref,$start,$cigar,$seq)=($tab[0],$tab[2],$tab[3],$tab[5],$tab[9]);
if(SamUtils::checkIfIsCoveringIntervalWithSEnd($start,$cigar,$limMin,$limSup)){ # (<=$limMin && >=$limSup)
$coveringreads++;
print "$li\n";
}
}
}
close F1;
print STDERR "total processed reads : $totalreads\n";
print STDERR "total covering reads : $coveringreads\n";
}
#########################################################################################################
sub help {
my@tab = split(/\//,$0);
my $name = pop(@tab);
print STDERR "\n";
print STDERR join("-", $0, $VERSION, $lastModif), "\n";
print STDERR "USAGE: $name [-h|--help] <sam file>\n";
exit(1) ;
}
sub help2 {
my $file = shift @_;
print STDERR "The file \"$file\" doesn't exist or is not at the specified/current location\n";
exit(1) ;
}
# end
#!/usr/bin/perl
use strict;
use warnings;
use Getopt::Long;
use SamUtils;
my $VERSION='0.2';
my $lastModif='2016-Mar-04';
my $startref=0;
my $endref;
my $meandepth=1;
my $bruit=10;
# set the options
&GetOptions("h|help" => \&help,
"startref:i" => \$startref, # position of the beginning of the ref (after the N)
"endref:i" => \$endref, # position of the end of the ref (before the N)
"meandepth:i" => \$meandepth
);
@ARGV or &help;
# verification of the arg
(-e "$ARGV[0]") or &help2($ARGV[0]);
#########################################################################################################
# info #
#########################################################################################################
# count the polymorphims presents for each ref for each position
# arg0 = ref.fasta
# arg1 = file.sam
### apport // v1 => elimination of the background
#########################################################################################################
# main #
#########################################################################################################
my@tabname = split(/\//,$0);
my $scriptname = pop(@tabname);
#############################
&main($ARGV[0],$ARGV[1]); # analyze a file
#########################################################################################################
# SUBS #
#########################################################################################################
sub main { # read the file
my ($refFile,$samfile) = @_;
# 1/ stockage des refs
my%ref_seq;
my%ref_pos_snp_nb;
my%ref_pos_totalsnps;
my%totalreadsByRef;
my%normalbasebypos;
my%ref_pos_depth;
open (F1, "$refFile") || die ("$scriptname --sub main : opening fails $refFile");
my$ref="";
my$switch=0;
while (my$li=<F1>) {
if($li=~/^>/){
chomp $li;
$li=~s/^>//;
$li=~s/^[\W]+//;
$li=~s/[\W]+$//;
$li=~s/ .+$//;
#print STDERR "ref : $li memorised\n";
$ref=$li;
$switch=1;
$ref_seq{$ref}="";
next;
}
elsif ($switch==1) {
chomp $li;
$ref_seq{$ref}.=$li;
}
}
close F1;
foreach my$ref (keys %ref_seq){
my@tabseq=split(//,$ref_seq{$ref});
for (my$i=0;$i<scalar(@tabseq);$i++){
my$pos=$i+1;
$normalbasebypos{$ref}{$pos}=$tabseq[$i];
$ref_pos_depth{$ref}{$pos}=0;
if($pos>=$startref && $pos<=$endref){
$ref_pos_totalsnps{$ref}{$pos}=0;
}
}
}
# 2/ reading of the sam file
open (F2, $samfile) || die("$scriptname --sub main : opening fails $samfile\n");
print STDERR "file $samfile opened\n";
while (my$li=<F2>){
next if ($li=~/^@/); # get ride of the header
chomp $li;
my@tab=split(/\t/,$li);
my($read,$flag,$ref,$start,$mapq,$cigar,$seq)=($tab[0],$tab[1],$tab[2],$tab[3],$tab[4],$tab[5],$tab[9]);
$totalreadsByRef{$ref}++;
my$res_comparaison=SamUtils::compareReadToRef($cigar,$start,$seq,$ref_seq{$ref});
#print "$res_comparaison\n";
my$listOfSnps=SamUtils::trimSubseqOfSnplist($res_comparaison,$startref,$endref);
if(defined($listOfSnps)){
#print "$listOfSnps\n";
my@tab_snps= split(/;/,$listOfSnps);
#print "## @tab_snps\n";
foreach my$element (@tab_snps){
my($pos,$snp)=split(/:/,$element);
$ref_pos_snp_nb{$ref}{$pos}{$snp}++;
$ref_pos_totalsnps{$ref}{$pos}++;
}
}
### incrementation depth
my$endpos=SamUtils::returnEndPos($start,$cigar);
for (my$i=$start;$i<=$endpos;$i++){
$ref_pos_depth{$ref}{$i}++;
}
}
close F2;
# # # # # Elimination of background
foreach my$refc (keys %ref_pos_snp_nb){ # for each ref
foreach my$posc (keys $ref_pos_snp_nb{$refc}){ # for each position
$ref_pos_totalsnps{$refc}{$posc}=0;
foreach my$snpc (keys $ref_pos_snp_nb{$refc}{$posc}){ # for each SNP
if ($ref_pos_snp_nb{$refc}{$posc}{$snpc}<=$bruit){ # if snp< noize snp=0
#$ref_pos_snp_nb{$refc}{$posc}{$snpc}=0;
$ref_pos_depth{$refc}{$posc}-=$ref_pos_snp_nb{$refc}{$posc}{$snpc};
delete($ref_pos_snp_nb{$refc}{$posc}{$snpc});
}
else { # if not snp = snp-bruit
$ref_pos_snp_nb{$refc}{$posc}{$snpc}-=$bruit;
$ref_pos_totalsnps{$refc}{$posc}+=$ref_pos_snp_nb{$refc}{$posc}{$snpc}; # total pos += snp
$ref_pos_depth{$refc}{$posc}-=$bruit;
}
}
}
}
# # # # #
# 3/ printing of result
my$outname=$samfile;
$outname=~s/^.*\///;
$outname=~s/\.sam$//i;
foreach my$refc (sort(keys %ref_pos_totalsnps)){ # for each ref
my$outfile=$refc."_counts_snp_from_".$outname.".txt";
open (F3, "> $outfile") || die("$scriptname --sub main : pb ouverture $outfile\n");
print F3 "position\tnumber of reads with snp\ttotal reads\t% snp";
print F3 "\tnormalised snp\tnormalised total\tnormalised % snp";
print F3 "\tnormal base\tsnp:number of reads\n";
foreach my$posc (sort {$a<=>$b}(keys $ref_pos_totalsnps{$refc})){ # pour chaque position
my$proportion;
### non normalised
if($ref_pos_depth{$refc}{$posc}>0){
$proportion=int($ref_pos_totalsnps{$refc}{$posc}/$ref_pos_depth{$refc}{$posc}*10000)/100; # calcul de la proportion de reads mutés
}
else{ $proportion=0;}
my$newpos=$posc-$startref+1;
print F3 "$newpos\t";
print F3 "$ref_pos_totalsnps{$refc}{$posc}\t";
print F3 "$ref_pos_depth{$refc}{$posc}\t";
print F3 "$proportion\t";
### normalised
my$newtotalsnps=int($ref_pos_totalsnps{$refc}{$posc}/$meandepth*100)/100;
#if($newtotalsnps<1){$newtotalsnps=0;}
my$newdepth=$ref_pos_depth{$refc}{$posc}/$meandepth;
if($newdepth<1){$newdepth=0;}
if($newdepth>0){
$proportion=int($newtotalsnps/$newdepth*10000)/100; # calculqtion of the proportion of mutated reads }
else{ $proportion=0;}
$newdepth=int($newdepth*100)/100;
print F3 "$newtotalsnps\t";
print F3 "$newdepth\t";
print F3 "$proportion\t";
### other
print F3 "$normalbasebypos{$refc}{$posc}";
if(defined($ref_pos_snp_nb{$refc}{$posc})){
foreach my$snpc (sort(keys $ref_pos_snp_nb{$refc}{$posc})){ # pour chaque snp
print F3 "\t$snpc:$ref_pos_snp_nb{$refc}{$posc}{$snpc}";
}
}
print F3 "\n";
}
close F3;
}
}
#########################################################################################################
sub help {
my@tab = split(/\//,$0);
my $name = pop(@tab);
print STDERR "\n";
print STDERR join("-", $0, $VERSION, $lastModif), "\n";
print STDERR "USAGE: $name [-h|--help] --startref:int --endref:int <ref.fasta> <file.sam>\n\n";
exit(1) ;
}
sub help2 {
my $file = shift @_;
print STDERR "The file \"$file\" doesn't exist or is not at the specified/current location\n";
exit(1) ;
}
# end
#!/usr/bin/perl
use strict;
use warnings;
use Getopt::Long;
use SamUtils;
my $VERSION='0.4';
my $lastModif='2016-Aug-21';
my($refFile,$limMin,$limSup,$size,$endRef,$list,$softclip,$tstretchmode);
my$mismatches=0;
# set the options
&GetOptions("h|help" => \&help,
# required
"r|ref:s" => \$refFile,
"m|mismatches:i" => \$mismatches,
"minstart:i" => \$limMin, # start of the reads must be <= to  limmin
"minend:i" => \$limSup, # end of the read must be >= limsup
"size:i" => \$size, # size of the fragment extracted
"endref:i" => \$endRef, # position of the end of the ref(before the N)
"l|list:s" => \$list, # list of tstretchs
# optional
"t|tstretchmode" => \$tstretchmode, # if we want the name of the T-stretch
"S|softclip" => \$softclip # activate checkIfIsCoveringIntervalWithSEnd
);
@ARGV or &help;
# verification of arg
(-e "$ARGV[0]") or &help2($ARGV[0]);
#########################################################################################################
# info #
#########################################################################################################
# input = sam file
# -r = ref fasta
#########################################################################################################
# main #
#########################################################################################################
my@tab = split(/\//,$0);
my $scriptname = pop(@tab);
my %tstretch_chr;
my %tstretch_name;
&readTstretchsList($list);
if(defined($softclip)){
&mainWithS($ARGV[0],$refFile,$mismatches); # analyze a file
}
else{
&main($ARGV[0],$refFile,$mismatches); # analyze a file
}
#########################################################################################################
# SUBS #
#########################################################################################################
sub main { # read the fichier
my ($samfile,$refFile,$mismatches) = @_;
# 1/ stockage of refs
my%ref_seq;
my%tstretch_count;
my$totalreads=0;
my$coveringreads=0;
my$okreads=0;
open (F1, "$refFile") || die ("$scriptname --sub main : openig fails $refFile");
my$ref="";
my$switch=0;
while (my$li=<F1>) {
if($li=~/^>/){
chomp $li;
$li=~s/^>//;
$li=~s/^[\W]+//;
$li=~s/[\W]+$//;
$li=~s/ .+$//;
print STDERR "ref : $li memorised\n";
$ref=$li;
$switch=1;
$ref_seq{$ref}="";
next;
}
elsif ($switch==1) {
chomp $li;
$ref_seq{$ref}.=$li;
#$switch=0;
}
}
close F1;
# 2/ reading of the sam file
open (F2, $samfile) || die("$scriptname --sub main : opening fails $samfile\n");
print STDERR "file $samfile opened\n";
while (my$li=<F2>){
next if ($li=~/^@/); # get ride of the header
chomp $li;
my@tab=split(/\t/,$li);
#my($read,$flag,$ref,$start,$mapq,$cigar,$seq)=($tab[0],$tab[1],$tab[2],$tab[3],$tab[4],$tab[5],$tab[9]);
my$read= shift @tab;
my$flag= shift @tab;
my$ref= shift @tab;
my$start= shift @tab;
my$mapq= shift @tab;
my$cigar= shift @tab;
my$other1= shift @tab;
my$other2= shift @tab;
my$other3= shift @tab;
my$other= join ("\t", $other1,$other2,$other3);
my$seq= shift @tab;
my$last= join ("\t", @tab);
if(SamUtils::checkIfIsCoveringInterval($start,$cigar,$limMin,$limSup)){
my$before=$endRef-$start; # lenght to pull out before
# extraire fragment
my$fragment="youpi";
$fragment=SamUtils::extractFrag($before,$cigar,$seq,$size);
if(defined($tstretchmode)){
if(defined($tstretch_name{$fragment})){
$ref=$tstretch_name{$fragment};
}
else{
$ref="*";
}
}
else{
if(defined($tstretch_chr{$fragment})){
$ref=$tstretch_chr{$fragment};
}
else{
$ref="*";
}
}
print "$read\t$flag\t$ref\t$start\t$mapq\t$cigar\t$other\t$seq\t$last\n";
}
}
close F2;
print STDERR "end Sam file\n";
}
#########################################################################################################
sub mainWithS { # read the file
my ($samfile,$refFile,$mismatches) = @_;
# 1/ stockage of refs
my%ref_seq;
my%tstretch_count;
my$totalreads=0;
my$coveringreads=0;
my$okreads=0;
open (F1, "$refFile") || die ("$scriptname --sub mainWithS : opening fails $refFile");
my$ref="";
my$switch=0;
while (my$li=<F1>) {
if($li=~/^>/){
chomp $li;
$li=~s/^>//;
$li=~s/^[\W]+//;
$li=~s/[\W]+$//;
$li=~s/ .+$//;
$ref=$li;
$switch=1;
$ref_seq{$ref}="";
next;
}
elsif ($switch==1) {
chomp $li;
$ref_seq{$ref}.=$li;
}
}
close F1;
# 2/ reading of sam
open (F2, $samfile) || die("$scriptname --sub mainWithS : pb ouverture $samfile\n");
print STDERR "file $samfile opened\n";
while (my$li=<F2>){
next if ($li=~/^@/); # get ride of the header
chomp $li;
my@tab=split(/\t/,$li);
#my($read,$flag,$ref,$start,$mapq,$cigar,$seq)=($tab[0],$tab[1],$tab[2],$tab[3],$tab[4],$tab[5],$tab[9]);
my$read= shift @tab;
my$flag= shift @tab;
my$ref= shift @tab;
my$start= shift @tab;
my$mapq= shift @tab;
my$cigar= shift @tab;
my$other1= shift @tab;
my$other2= shift @tab;
my$other3= shift @tab;
my$other= join ("\t", $other1,$other2,$other3);
my$seq= shift @tab;
my$last= join ("\t", @tab);
if(SamUtils::checkIfIsCoveringIntervalWithSEnd($start,$cigar,$limMin,$limSup)){
my$before=$endRef-$start; # lenght to pull out before
# extract fragment
my$fragment="youpi";
$fragment=SamUtils::extractFrag($before,$cigar,$seq,$size);
if(defined($tstretchmode)){
if(defined($tstretch_name{$fragment})){
$ref=$tstretch_name{$fragment};
}
else{
$ref="*";
}
}
else{
if(defined($tstretch_chr{$fragment})){
$ref=$tstretch_chr{$fragment};
}
else{
$ref="*";
}
}
#print "$read\t$flag\t$ref\t$start\t$mapq\t$cigar\t$seq\n";
print "$read\t$flag\t$ref\t$start\t$mapq\t$cigar\t$other\t$seq\t$last\n";
}
}
close F2;
print STDERR "end Sam file\n";
}
#########################################################################################################
sub readTstretchsList { # read thefichier
my $file = shift @_;
open (F1, $file) || die("$scriptname --sub readTstretchsList : pb ouverture $file\n");
while (my$li=<F1>){
next if ($li=~/^#/); # get rie of the header
chomp $li;
my@tab=split(/\t/,$li);
$tstretch_chr{$tab[0]}=$tab[1];
if(scalar(@tab)>2){$tstretch_name{$tab[0]}=$tab[2];}
}
close F1;
}
#########################################################################################################
sub help {
my@tab = split(/\//,$0);
my $name = pop(@tab);
print STDERR "\n";
print STDERR join("-", $0, $VERSION, $lastModif), "\n";
print STDERR "USAGE: $name\t[options] <sam file>\n";
print STDERR "\t# required\n";
print STDERR "\t\t[-r|--ref]<fasta reference file>\n";
print STDERR "\t\t[-m|--mismatches]<mismatches>\n";
print STDERR "\t\t[--minstart:int] = minimal start of reads\n";
print STDERR "\t\t[--minend:int] = minimal end of reads\n";
print STDERR "\t\t[--size:int] = tstretchs size +1\n";
print STDERR "\t\t[--endref:int] = end of reference sequence (endpos of tstretch)\n";
print STDERR "\t\t[-l|--list] <tstretchs tabulated list>\n";
print STDERR "\t# optional\n";
print STDERR "\t\t[-h|--help]\n";
print STDERR "\t\t[-t|--tstretchmode] if tstretchs name wanted for output refs\n";
print STDERR "\t\t[-S|--softclip] if reads longer than reference\n";
exit(1) ;
}
sub help2 {
my $file = shift @_;
print STDERR "The file \"$file\" doesn't exist or is not at the specified/current location\n";
exit(1) ;
}
# end
#!/usr/bin/perl
use strict;
use warnings;
use Getopt::Long;
my $VERSION='0.2';
my $lastModif='2016-Mar-04';
my($tstretchList,$fastqFile);
# set the options
&GetOptions("h|help" => \&help,
"l|list:s" => \$tstretchList,
"f|fastq:s" => \$fastqFile
);
#@ARGV or &help;
# verification of the arg
(-e "$tstretchList") or &help2($tstretchList);
(-e "$fastqFile") or &help2($fastqFile);
#########################################################################################################
# main #
#########################################################################################################
# goal : make group of reads depending on the T-Stretchs
my %tstretch_chr; # list of tstretchs to analyze
&readTstretchList($tstretchList); # analyze a file
foreach my$element (keys %tstretch_chr){
&searchTstretch($tstretch_chr{$element},$element,$fastqFile); # analyze @_
}
#########################################################################################################
# SUBS #
#########################################################################################################
sub readTstretchList { # reads the t-stretch list format [0]=tstretch [1]=chr
my $file = shift @_;
open (F1, $file) || die("file opening failed\n");
while (my$li=<F1>){
next if ($li=~/^#/); # get ride off the header
chomp $li;
my@tab=split(/\t/,$li);
$tstretch_chr{$tab[0]}=$tab[1]; # stock the tstretch
print STDERR "Tstretch $tab[1] : $tab[0]\n";
}
close F1;
print STDERR "#-#-# Tstretch list memorized #-#-#\n";
}
#########################################################################################################
sub searchTstretch {
my($chr,$tstretch,$file) = @_; # => in the order of @_
my$filename = $file;
$filename =~ s/.*\///;
my$fileout = $chr."_".$tstretch."_from_".$filename;
`grep -B1 -A2 "$tstretch" $file | grep -v "^--\$" > $fileout`;
#print "$chr : $tstretch =";
#`grep -c "$tstretch" $file`;
#print " occurences\n";
print STDERR "Tstretch $chr : $tstretch done\n";
}
#########################################################################################################
sub help {
my@tab = split(/\//,$0);
my $name = pop(@tab);
print STDERR "\n";
print STDERR join("-", $0, $VERSION, $lastModif), "\n";
print STDERR "USAGE: $name [-h|--help] <all_snp file>\n";
print STDERR "The snp file made by ListSnp.pl needs to be sliced by refs => one ref at the same time !\n\n";
exit(1) ;
}
sub help2 {
my $file = shift @_;
print STDERR "The file \"$file\" doesn't exist or is not at the specified/current location\n";
exit(1) ;
}
# end
#!/usr/bin/perl
use strict;
use warnings;
use Getopt::Long;
use SamUtils;
my $VERSION='0.8';
my $lastModif='2016-Aug-21';
my@t = split(/\//,$0);
my $scriptname = pop(@t);
my($liminf,$limsup);
# set the options
&GetOptions("h|help" => \&help,
"liminf:i" => \$liminf,
"limsup:i" => \$limsup,
);
# arg0 = ref fasta files
@ARGV or &help;
#########################################################################################################
# infos #
#########################################################################################################
# list of all SNP on all reads
#########################################################################################################
# main #
#########################################################################################################
# stockage of refs
my%ref_seq;
open (F1, "$ARGV[0]") || die("$scriptname --step open Refs : opening fails $ARGV[0]\n");
my$ref="";
my$switch=0;
while (my$li=<F1>) {
if($li=~/^>/){
chomp $li;
$li=~s/^>//;
$li=~s/^[\W]+//;
$li=~s/[\W]+$//;
$li=~s/ .+$//;
print STDERR "ref : $li memorised\n";
$ref=$li;
$switch=1;
$ref_seq{$ref}="";
next;
}
elsif ($switch==1) {
chomp $li;
$ref_seq{$ref}.=$li;
#$switch=0;
}
}
close F1;
#########################################################################################################
my%countSnps;
my$totalreads=0;
my%ref_totalreads;
my%ref_countSnps;
print "#reference\tread\tstart_pos\tend_pos\tSNPs\n";
# while :
while (my$li=<STDIN>) {
next if ($li=~/^@/);
chomp $li;
my@tab=split(/\t/, $li);
my($read,$flag,$ref,$start,$cigar,$seq)=($tab[0],$tab[1],$tab[2],$tab[3],$tab[5],$tab[9]);
if ($flag != 12 && $flag != 13 && $flag != 69 && $flag != 77 && $flag != 117 && $flag != 133 && $flag != 141 && $flag != 181 && $flag < 512 || $flag >= 1024){
$totalreads++;
$ref_totalreads{$ref}++;
#if($totalreads/10000==int($totalreads/10000)){print STDERR "$totalreads reads processed\n";}
if(!(defined($ref_seq{$ref}))){print STDERR "error : no refseq available for \"$ref\"\n";}
my$res_comparaison;
$res_comparaison=SamUtils::compareReadToRef($cigar,$start,$seq,$ref_seq{$ref});
## cut res_comparaison
if(defined($res_comparaison) && defined($liminf) && defined($limsup)){
my$newres_comparaison=SamUtils::trimSubseqOfSnplist($res_comparaison,$liminf,$limsup);
$res_comparaison=$newres_comparaison;
}
##
my$nbSnp=SamUtils::countSnpFromSnplist($res_comparaison);
$countSnps{$nbSnp}++;
$ref_countSnps{$ref}{$nbSnp}++;
my$endpos=SamUtils::returnEndPos($start,$cigar);
## new start new end
=head
if(defined($liminf) && defined($limsup)){
$start=$liminf;
my$newEnd=$endpos-$liminf;
if($newEnd>$limsup){$newEnd=$limsup;}
$endpos=$newEnd;
}
=cut
##
if(defined($res_comparaison)){
print "$ref\t$read\t$start\t$endpos\t$res_comparaison\n"; # ref ; read ; start ; stop ; SNPs
}
else{
print "$ref\t$read\t$start\t$endpos\t\n"; # ref ; read ; start ; stop ; SNPs
}
}
}# end while
my$multiSnp=0;
print STDERR "total reads :\t$totalreads\n";
foreach my$k (sort(keys %countSnps)){
my$pourcentagec=$countSnps{$k}/$totalreads*100;
if($k<=2){print STDERR "reads with $k snps :\t$countSnps{$k}\t$pourcentagec%\n";}
if($k>2){
$multiSnp+=$countSnps{$k};
}
}
my$pourcentagec2=$multiSnp/$totalreads*100;
print STDERR "reads with more than 2 snps :\t$multiSnp\t$pourcentagec2%\n";
#########################################################################################################
foreach my$refc (sort(keys %ref_totalreads)){
my$multiSnp2=0;
my$totalR=$ref_totalreads{$refc};
print STDERR "------------------------------------------\n";
print STDERR "reference : $refc\n";
print STDERR "total reads :\t$ref_totalreads{$refc}\n";
foreach my$k (sort(keys $ref_countSnps{$refc})){
my$pourcentagec=$ref_countSnps{$refc}{$k}/$totalR*100;
if($k<=2){print STDERR "reads with $k snps :\t$ref_countSnps{$refc}{$k}\t$pourcentagec%\n";}
if($k>2){
$multiSnp2+=$ref_countSnps{$refc}{$k};
}
}
my$pourcentagec2b=$multiSnp2/$totalR*100;
print STDERR "reads with more than 2 snps :\t$multiSnp2\t$pourcentagec2b%\n";
}
#########################################################################################################
# SUBS #
#########################################################################################################
sub help {
print STDERR join("-", $0, $VERSION, $lastModif), "\n";
print STDERR "USAGE: STDIN \| $0 [-h|--help] <reference_sequences_used_in_mapping.fasta>\n";
print STDERR "USAGE: STDIN = cat/awk from .sam file ; filtered by quality\n";
print STDERR "Example :\n";
print STDERR "awk '{ if (\$5>=30) { print \$0 ; } }' filein.sam | scripts/ListSnp.pl ref.fasta > fileout.txt\n";
exit(1) ;
}
bash Script
#!/bin/sh
##########################################################################################
# Info #
##########################################################################################
#
# required perl scripts :
#
# GrepTstretchsInFastq.pl
# CheckIfReadIsCoveringInterval.pl
# GetSamWithRefReplacedByTstretchs.pl
# CountPolymorphisms.pl
# ListSnpModules_v2.pl
#
# >> Samutils.pm required !
#
#
##########################################################################################
# Environment variables #
##########################################################################################
refTstretchs=/PATH/liste_tstretchs.csv
sizeTstretchs=16
fastq1=/PATH/File_score10_paired_fused.fastq
fastqName1=File_score10_paired_fused
fastq2=/PATH/File_score10_1P_notFused.fastq
fastqName2=File_score10_1P_notFused
fastq3=/PATH/File_score10_2P_notFused.fastq
fastqName3=File_score10_2P_notFused
fastq4=/PATH/File_score10_1U.fastq
fastqName4=File_score10_1U
fastq5=/PATH/File_score10_2U.fastq
fastqName5=File_score10_2U
##########################################################################################
refFasta=/PATH/reference_transcribed_sequence.fasta
startRef=21
endref=140
endTstretch=156
lim=20
refFasta2=/PATH/reference_transcribed_sequence.fasta
##########################################################################################
# PIPELINE #
##########################################################################################
# A/ Raw reads Grep :
# a)
GrepTstretchsInFastq.pl \
-l $refTstretchs \
-f $fastq1
GrepTstretchsInFastq.pl \
-l $refTstretchs \
-f $fastq2
GrepTstretchsInFastq.pl \
-l $refTstretchs \
-f $fastq3
GrepTstretchsInFastq.pl \
-l $refTstretchs \
-f $fastq4
GrepTstretchsInFastq.pl \
-l $refTstretchs \
-f $fastq5
# b)
fastqName1b=$fastqName1"_withT-stretchs.fastq"
fastqName2b=$fastqName2"_withT-stretchs.fastq"
fastqName3b=$fastqName3"_withT-stretchs.fastq"
fastqName4b=$fastqName4"_withT-stretchs.fastq"
fastqName5b=$fastqName5"_withT-stretchs.fastq"
cat *$fastqName1.fastq > $fastqName1b
cat *$fastqName2.fastq > $fastqName2b
cat *$fastqName3.fastq > $fastqName3b
cat *$fastqName4.fastq > $fastqName4b
cat *$fastqName5.fastq > $fastqName5b
mkdir fastq_par_Tstretchs
mv *$fastqName*.fastq fastq_par_Tstretchs/.
echo "step A done"
##########################################################################################
#B/ Mapping
#a)
bwa mem -t32 -v2 $refFasta $fastqName1b 2> log_mapping.txt | grep -v ^@ \
| awk '{if($2 != 4 && $2 != 12 && $2 != 13 && $2 != 20 && $2 != 69 && $2 != 77 && $2 != 117 && $2 !=
133 && $2 != 141 && $2 != 181 && ($2 < 512 || $2 >= 1024)){print $0;}}' \
> $fastqName1"_vs_transcrit_mapped.sam"
bwa mem -t32 -v2 $refFasta $fastqName2b 2>> log_mapping.txt | grep -v ^@ \
| awk '{if($2 != 4 && $2 != 12 && $2 != 13 && $2 != 20 && $2 != 69 && $2 != 77 && $2 != 117 && $2 !=
133 && $2 != 141 && $2 != 181 && ($2 < 512 || $2 >= 1024)){print $0;}}' \
> $fastqName2"_vs_transcrit_mapped.sam"
bwa mem -t32 -v2 $refFasta $fastqName3b 2>> log_mapping.txt | grep -v ^@ \
| awk '{if($2 != 4 && $2 != 12 && $2 != 13 && $2 != 20 && $2 != 69 && $2 != 77 && $2 != 117 && $2 !=
133 && $2 != 141 && $2 != 181 && ($2 < 512 || $2 >= 1024)){print $0;}}' \
> $fastqName3"_vs_transcrit_mapped.sam"
bwa mem -t32 -v2 $refFasta $fastqName4b 2>> log_mapping.txt | grep -v ^@ \
| awk '{if($2 != 4 && $2 != 12 && $2 != 13 && $2 != 20 && $2 != 69 && $2 != 77 && $2 != 117 && $2 !=
133 && $2 != 141 && $2 != 181 && ($2 < 512 || $2 >= 1024)){print $0;}}' \
> $fastqName4"_vs_transcrit_mapped.sam"
bwa mem -t32 -v2 $refFasta $fastqName5b 2>> log_mapping.txt | grep -v ^@ \
| awk '{if($2 != 4 && $2 != 12 && $2 != 13 && $2 != 20 && $2 != 69 && $2 != 77 && $2 != 117 && $2 !=
133 && $2 != 141 && $2 != 181 && ($2 < 512 || $2 >= 1024)){print $0;}}' \
> $fastqName5"_vs_transcrit_mapped.sam"
#b)
cat *_mapped.sam > all_reads_mapped.sam
echo "step B done"
##########################################################################################
#C/ Selection of reads covering all the transcribed sequence
CheckIfReadIsCoveringInterval.pl --minstart $startRef --minend $endref all_reads_mapped.sam >
all_reads_coveringTranscript_mapped.sam
echo "step C done"
##########################################################################################
#D/ Replace of the mapped references by the corresponding chromosome (give the result by cluster)
#a)
GetSamWithRefReplacedByTstretchs.pl \
-r $refFasta \
-m 0 --minstart $startRef --minend $endTstretch --size $sizeTstretchs --endref $endref \
-l $refTstretchs \
-S all_reads_coveringTranscript_mapped.sam > all_reads_splitByTstretchs_mapped.sam
#(to check)
awk '{print $3;}' all_reads_splitByTstretchs_mapped.sam | sort | uniq -c
#b) elimination of reads « * » in a) = don't have the t-stretch in the right place
awk '{if($3~"chr"){print $0;}}' all_reads_splitByTstretchs_mapped.sam >
all_reads_splitByTstretchs_mapped_filtered.sam
echo "step D done"
##########################################################################################
#E/ Polymorphisms analyze
CountPolymorphisms.pl --startref $startRef --endref $endref $refFasta2
all_reads_splitByTstretchs_mapped_filtered.sam
cat all_reads_splitByTstretchs_mapped_filtered.sam | ListSnpModules_v2.pl --liminf $lim --limsup $endref
$refFasta2 \
> all_reads_splitByTstretchs_polymorphisms.txt 2> all_reads_splitByTstretchs_stats.txt
echo "step E done"
##########################################################################################
##########################################################################################
echo "end"
Script perl CheckIfReadIsCoveringInterval.pl
#!/usr/bin/perl
use strict;
use warnings;
use Getopt::Long;
use SamUtils;
my $VERSION='0.3';
my $lastModif='2016-Jun-09';
my($limMin,$limSup,$softclip);
# set the options
&GetOptions("h|help" => \&help,
"minstart:i" => \$limMin, # read start must be <= limmin
"minend:i" => \$limSup, # read end must be >= limsup
"S|softclip" => \$softclip # if reads mapped exceed the reference sequence
);
@ARGV or &help;
# verification of arg
(-e "$ARGV[0]") or &help2($ARGV[0]);
##########################################################################################
# main #
##########################################################################################
if(defined($softclip)){
&mainWithS($ARGV[0]);
}
else{
&main($ARGV[0]);
}
##########################################################################################
# SUBS #
##########################################################################################
sub main { # read the file
my $samfile = shift @_;
my ($totalreads,$coveringreads); # nb of total reads in sam file, nb of covering reads
open (F1, $samfile) || die("can't open the file\n");
while (my$li=<F1>){
next if ($li=~/^@/); # filter header
$totalreads++;
chomp $li;
my@tab=split(/\t/,$li);
# needed informations
my($read,$ref,$start,$cigar,$seq)=($tab[0],$tab[2],$tab[3],$tab[5],$tab[9]);
# if(<=$limMin && >=$limSup) ==1
if(SamUtils::checkIfIsCoveringInterval($start,$cigar,$limMin,$limSup)){
$coveringreads++; # count read as covering
print "$li\n"; # let read pass through
}
}
close F1;
print STDERR "total processed reads : $totalreads\n";
print STDERR "total covering reads : $coveringreads\n";
}
##########################################################################################
sub mainWithS { # lit le fichier
my $samfile = shift @_;
my ($totalreads,$coveringreads); # nb of total reads in
sam file, nb of covering reads
open (F1, $samfile) || die("pb ouverture fichier\n");
while (my$li=<F1>){
next if ($li=~/^@/); # filter header
$totalreads++;
chomp $li;
my@tab=split(/\t/,$li);
# needed informations
my($read,$ref,$start,$cigar,$seq)=($tab[0],$tab[2],$tab[3],$tab[5],$tab[9]);
# if(<=$limMin && >=$limSup) ==1
if(SamUtils::checkIfIsCoveringIntervalWithSEnd($start,$cigar,$limMin,$limSup)){
$coveringreads++; # count read as covering
print "$li\n"; # let read pass through
}
}
close F1;
print STDERR "total processed reads : $totalreads\n";
print STDERR "total covering reads : $coveringreads\n";
}
##########################################################################################
sub help {
my@tab = split(/\//,$0);
my $name = pop(@tab);
print STDERR "\n";
print STDERR join("-", $0, $VERSION, $lastModif), "\n";
print STDERR "USAGE: $name [-h|--help] --minstart:int --minend:int [-S|--softclip] <sam file>\n";
exit(1) ;
}
sub help2 {
my $file = shift @_;
print STDERR "The file \"$file\" doesn't exist or is not at the specified/current location\n";
exit(1) ;
}
# end
Script perl GetSamWithRefReplacedByTstretchs.pl
#!/usr/bin/perl
use strict;
use warnings;
use Getopt::Long;
use SamUtils;
my $VERSION='0.4';
my $lastModif='2016-Aug-21';
my@tab = split(/\//,$0);
my$scriptname = pop(@tab);
my($refFile,$limMin,$limSup,$size,$endRef,$list,$softclip,$tstretchmode);
my$mismatches=0;
# set the options
&GetOptions("h|help" => \&help,
# required
"r|ref:s" => \$refFile,
"m|mismatches:i" => \$mismatches,
"minstart:i" => \$limMin, # read start must be <= limmin
"minend:i" => \$limSup, # read end must be >= limsup
"size:i"=> \$size, # required extract fragment size
"endref:i" => \$endRef, # end reference position
"l|list:s" => \$list, # tstretchs list
# optional
"t|tstretchmode" => \$tstretchmode, # if we want Tstretch name in new refs
# // normal mode is chr name
"S|softclip" => \$softclip # if wanted fragment is out of reference boundaries
);
@ARGV or &help;
# sam file existence control
(-e "$ARGV[0]") or &help2($ARGV[0]);
##########################################################################################
# info #
##########################################################################################
# input = sam file
# -r = ref.fasta
# -l = tstretchlist.csv/tsv/txt
##########################################################################################
# main #
##########################################################################################
my %tstretch_chr; # store corresponding chr for each T-stretch
my %tstretch_name; # store corresponding T-stretch name for each T-stretch
&readTstretchsList($list);
if(defined($softclip)){
&mainWithS($ARGV[0],$refFile,$mismatches);
}
else{
&main($ARGV[0],$refFile,$mismatches);
}
##########################################################################################
# SUBS #
##########################################################################################
sub main {
my ($samfile,$refFile,$mismatches) = @_;
# 1/ references recording
my%ref_seq; # key=reference name ; value=sequence
my%tstretch_count; # key=Tstretch ; value=counts
my$totalreads=0; # total reads in Sam file
my$coveringreads=0; # total reads covering the interval
open (F1, "$refFile") || die ("$scriptname --sub main : pb opening $refFile");
my$ref="";
my$switch=0;
while (my$li=<F1>) {
if($li=~/^>/){
chomp $li;
$li=~s/^>//;
$li=~s/^[\W]+//;
$li=~s/[\W]+$//;
$li=~s/ .+$//;
print STDERR "ref : $li memorised\n";
$ref=$li;
$switch=1;
$ref_seq{$ref}=""; # store current ref
next;
}
elsif ($switch==1) {
chomp $li;
$ref_seq{$ref}.=$li; # add seq to current ref
}
}
close F1;
# 2/ Sam file processing
open (F2, $samfile) || die("$scriptname --sub main : pb ouverture $samfile\n");
print STDERR "file $samfile opened\n";
while (my$li=<F2>){
next if ($li=~/^@/); # header
chomp $li;
my@tab=split(/\t/,$li);
my$read= shift @tab;
my$flag= shift @tab;
my$ref= shift @tab;
my$start= shift @tab;
my$mapq= shift @tab;
my$cigar= shift @tab;
my$other1= shift @tab;
my$other2= shift @tab;
my$other3= shift @tab;
my$other= join ("\t", $other1,$other2,$other3);
my$seq= shift @tab;
my$last= join ("\t", @tab);
# check if current read is covering the defined interval
if(SamUtils::checkIfIsCoveringInterval($start,$cigar,$limMin,$limSup)){
my$before=$endRef-$start; # read length : start to endref
# extract fragment
my$fragment="";
$fragment=SamUtils::extractFrag($before,$cigar,$seq,$size);
if(defined($tstretchmode)){ # if we want Tstretch name as new refs
if(defined($tstretch_name{$fragment})){
$ref=$tstretch_name{$fragment};
}
else{
$ref="*";
}
}
else{ # if we want chr name as new refs
if(defined($tstretch_chr{$fragment})){
$ref=$tstretch_chr{$fragment};
}
else{
$ref="*";
}
}
print "$read\t$flag\t$ref\t$start\t$mapq\t$cigar\t$other\t$seq\t$last\n";
}
}
close F2;
print STDERR "end Sam file\n";
}
##########################################################################################
sub mainWithS { # lit le fichier
my ($samfile,$refFile,$mismatches) = @_;
# 1/ references recording
my%ref_seq; # key=reference name ; value=sequence
my%tstretch_count; # key=Tstretch ; value=counts
my$totalreads=0; # total reads in Sam file
my$coveringreads=0; # total reads covering the interval
open (F1, "$refFile") || die ("$scriptname --sub main : pb opening $refFile");
my$ref="";
my$switch=0;
while (my$li=<F1>) {
if($li=~/^>/){
chomp $li;
$li=~s/^>//;
$li=~s/^[\W]+//;
$li=~s/[\W]+$//;
$li=~s/ .+$//;
print STDERR "ref : $li memorised\n";
$ref=$li;
$switch=1;
$ref_seq{$ref}=""; # store current ref
next;
}
elsif ($switch==1) {
chomp $li;
$ref_seq{$ref}.=$li; # add seq to current ref
}
}
close F1;
# 2/ Sam file processing
open (F2, $samfile) || die("$scriptname --sub main : pb ouverture $samfile\n");
print STDERR "file $samfile opened\n";
while (my$li=<F2>){
next if ($li=~/^@/); # header
chomp $li;
my@tab=split(/\t/,$li);
my$read= shift @tab;
my$flag= shift @tab;
my$ref= shift @tab;
my$start= shift @tab;
my$mapq= shift @tab;
my$cigar= shift @tab;
my$other1= shift @tab;
my$other2= shift @tab;
my$other3= shift @tab;
my$other= join ("\t", $other1,$other2,$other3);
my$seq= shift @tab;
my$last= join ("\t", @tab);
# check if current read is covering the defined interval
if(SamUtils::checkIfIsCoveringIntervalWithSEnd($start,$cigar,$limMin,$limSup)){
my$before=$endRef-$start; # read length : start to endref
# extract fragment
my$fragment="";
$fragment=SamUtils::extractFrag($before,$cigar,$seq,$size);
if(defined($tstretchmode)){ # if we want Tstretch name as new refs
if(defined($tstretch_name{$fragment})){
$ref=$tstretch_name{$fragment};
}
else{
$ref="*";
}
}
else{ # if we want chr name as new refs
if(defined($tstretch_chr{$fragment})){
$ref=$tstretch_chr{$fragment};
}
else{
$ref="*";
}
}
print "$read\t$flag\t$ref\t$start\t$mapq\t$cigar\t$other\t$seq\t$last\n";
}
}
close F2;
print STDERR "end Sam file\n";
}
##########################################################################################
sub readTstretchsList {
my $file = shift @_;
open (F1, $file) || die("$scriptname --sub readTstretchsList : pb ouverture $file\n");
while (my$li=<F1>){
next if ($li=~/^#/); # for header
chomp $li;
my@tab=split(/\t/,$li);
$tstretch_chr{$tab[0]}=$tab[1]; # store : key=current Tstretch ; val=chr name
if(scalar(@tab)>2){
# store : key=current Tstretch ; val=Tstretch name
$tstretch_name{$tab[0]}=$tab[2];
}
}
close F1;
}
##########################################################################################
sub help {
my@tab = split(/\//,$0);
my $name = pop(@tab);
print STDERR "\n";
print STDERR join("-", $0, $VERSION, $lastModif), "\n";
print STDERR "USAGE: $name\t[options] <sam file>\n";
print STDERR "\t# required\n";
print STDERR "\t\t[-r|--ref]<fasta reference file>\n";
print STDERR "\t\t[-m|--mismatches]<mismatches>\n";
print STDERR "\t\t[--minstart:int] = minimal start of reads\n";
print STDERR "\t\t[--minend:int] = minimal end of reads\n";
print STDERR "\t\t[--size:int] = tstretchs size +1\n";
print STDERR "\t\t[--endref:int] = end of reference sequence (endpos of tstretch)\n";
print STDERR "\t\t[-l|--list] <tstretchs tabulated list>\n";
print STDERR "\t# optional\n";
print STDERR "\t\t[-h|--help]\n";
print STDERR "\t\t[-t|--tstretchmode] if tstretchs name wanted for output refs\n";
print STDERR "\t\t[-S|--softclip] if reads longer than reference\n";
exit(1) ;
}
sub help2 {
my $file = shift @_;
print STDERR "The file \"$file\" doesn't exist or is not at the specified/current location\n";
exit(1) ;
}
# end
Script perl CountPolymorphisms_v4.pl
#!/usr/bin/perl
use strict;
use warnings;
use Getopt::Long;
use SamUtils;
my $VERSION='0.2';
my $lastModif='2016-Aug-21';
my@tabname = split(/\//,$0);
my $scriptname = pop(@tabname);
my $startref=0;
my $endref;
my $meandepth=1;
my $bruit=10;
# set the options
&GetOptions("h|help" => \&help,
"startref:i" => \$startref, # start reference position
"endref:i" => \$endref, # end reference position
"meandepth:i" => \$meandepth # if we want to normalise by sequencing depth
);
@ARGV or &help;
# file existence ctrl
(-e "$ARGV[0]") or &help2($ARGV[0]);
###########################################################################################
# info
###########################################################################################
# count polymorphisms present at each position l
# arg0 = ref.fasta
# arg1 = file.sam
### apport // v1
###########################################################################################
# main
###########################################################################################
&main($ARGV[0],$ARGV[1]);
###########################################################################################
# SUBS
###########################################################################################
sub main {
my ($refFile,$samfile) = @_;
# 1/ references recording
my%ref_seq; # key=reference name ; value=sequence
my%ref_pos_snp_nb; # key=reference name ; key2=position ; key3=snp ;
value=count
my%ref_pos_totalsnps; # key=reference name ; key2=position ; value=total of all snps
my%totalreadsByRef; # key=rererence name ; value= total of correspondig reads
my%normalbasebypos; # key=reference name ; key2=position ; value=base
my%ref_pos_depth; # key=reference name ; key2=position ; value=position
depth
open (F1, "$refFile") || die ("$scriptname --sub main : pb ouverture $refFile");
my$ref="";
my$switch=0;
while (my$li=<F1>) {
if($li=~/^>/){
chomp $li;
$li=~s/^>//;
$li=~s/^[\W]+//;
$li=~s/[\W]+$//;
$li=~s/ .+$//;
$ref=$li;
$switch=1;
$ref_seq{$ref}=""; # store current ref
next;
}
elsif ($switch==1) {
chomp $li;
$ref_seq{$ref}.=$li; # add seq to current ref
}
}
close F1;
foreach my$ref (keys %ref_seq){
my@tabseq=split(//,$ref_seq{$ref});
for (my$i=0;$i<scalar(@tabseq);$i++){
my$pos=$i+1;
$normalbasebypos{$ref}{$pos}=$tabseq[$i];
$ref_pos_depth{$ref}{$pos}=0;
if($pos>=$startref && $pos<=$endref){
$ref_pos_totalsnps{$ref}{$pos}=0;
}
}
}
# 2/ sam file processing
open (F2, $samfile) || die("$scriptname --sub main : pb ouverture $samfile\n");
print STDERR "file $samfile opened\n";
while (my$li=<F2>){
next if ($li=~/^@/);
chomp $li;
my@tab=split(/\t/,$li);
my($read,$flag,$ref,$start,$mapq,$cigar,$seq)=($tab[0],$tab[1],$tab[2],$tab[3],
$tab[4],$tab[5],$tab[9]);
$totalreadsByRef{$ref}++;
# search all existing SNPs
my$res_comparaison=SamUtils::compareReadToRef($cigar,$start,$seq,
$ref_seq{$ref});
# keap only covering interval SNPs
my$listOfSnps=SamUtils::trimSubseqOfSnplist($res_comparaison,$startref,$endref);
if(defined($listOfSnps)){
my@tab_snps= split(/;/,$listOfSnps);
foreach my$element (@tab_snps){
my($pos,$snp)=split(/:/,$element);
$ref_pos_snp_nb{$ref}{$pos}{$snp}++;
$ref_pos_totalsnps{$ref}{$pos}++;
}
}
### depth incrementation
my$endpos=SamUtils::returnEndPos($start,$cigar);
for (my$i=$start;$i<=$endpos;$i++){
$ref_pos_depth{$ref}{$i}++;
}
}
close F2;
# # # # # Sequencing noise elimination
foreach my$refc (keys %ref_pos_snp_nb){
# foreach ref
foreach my$posc (keys $ref_pos_snp_nb{$refc}){
# foreach position
$ref_pos_totalsnps{$refc}{$posc}=0;
foreach my$snpc (keys $ref_pos_snp_nb{$refc}{$posc}){
# foreach existing snp
if ($ref_pos_snp_nb{$refc}{$posc}{$snpc}<=$bruit){
# if snp< bruit snp=0
$ref_pos_depth{$refc}{$posc}-=$ref_pos_snp_nb{$refc}{$posc}
{$snpc};
delete($ref_pos_snp_nb{$refc}{$posc}{$snpc});
}
else {
# else snp = snp-bruit
$ref_pos_snp_nb{$refc}{$posc}{$snpc}-=$bruit;
$ref_pos_totalsnps{$refc}{$posc}+=$ref_pos_snp_nb{$refc}
{$posc}{$snpc}; # total pos += snp
$ref_pos_depth{$refc}{$posc}-=$bruit;
}
}
}
}
# # # # #
# 3/ Results impression
my$outname=$samfile;
$outname=~s/^.*\///;
$outname=~s/\.sam$//i;
foreach my$refc (sort(keys %ref_pos_totalsnps)){
# foreach ref
my$outfile=$refc."_counts_snp_from_".$outname.".txt";
open (F3, "> $outfile") || die("$scriptname --sub main : pb ouverture $outfile\n");
print F3 "position\tnumber of reads with snp\ttotal reads\t% snp";
print F3 "\tnormalised snp\tnormalised total\tnormalised % snp";
print F3 "\tnormal base\tsnp:number of reads\n";
foreach my$posc (sort {$a<=>$b}(keys $ref_pos_totalsnps{$refc})){
# foreach position
my$proportion;
### non normalise
if($ref_pos_depth{$refc}{$posc}>0){
# mutated reads proportion computing
$proportion=int($ref_pos_totalsnps{$refc}{$posc}/
$ref_pos_depth{$refc}{$posc}*10000)/100;
}
else{ $proportion=0;}
my$newpos=$posc-$startref+1;
print F3 "$newpos\t";
print F3 "$ref_pos_totalsnps{$refc}{$posc}\t";
print F3 "$ref_pos_depth{$refc}{$posc}\t";
print F3 "$proportion\t";
### normalise
my$newtotalsnps=int($ref_pos_totalsnps{$refc}{$posc}/
$meandepth*100)/100;
my$newdepth=$ref_pos_depth{$refc}{$posc}/$meandepth;
if($newdepth<1){$newdepth=0;}
if($newdepth>0){
# mutated reads proportion computing
$proportion=int($newtotalsnps/$newdepth*10000)/100;
}
else{ $proportion=0;}
$newdepth=int($newdepth*100)/100;
print F3 "$newtotalsnps\t";
print F3 "$newdepth\t";
print F3 "$proportion\t";
### other
print F3 "$normalbasebypos{$refc}{$posc}";
if(defined($ref_pos_snp_nb{$refc}{$posc})){
foreach my$snpc (sort(keys $ref_pos_snp_nb{$refc}{$posc})){
# foreach existing snp
print F3 "\t$snpc:$ref_pos_snp_nb{$refc}{$posc}{$snpc}";
}
}
print F3 "\n";
}
close F3;
}
}
###########################################################################################
sub help {
my@tab = split(/\//,$0);
my $name = pop(@tab);
print STDERR "\n";
print STDERR join("-", $0, $VERSION, $lastModif), "\n";
print STDERR "USAGE: $name [-h|--help] --startref:int --endref:int <ref.fasta> <file.sam>\n\n";
exit(1) ;
}
sub help2 {
my $file = shift @_;
print STDERR "The file \"$file\" doesn't exist or is not at the specified/current location\n";
exit(1) ;
}
# end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment