#Flags and scores in Tophat output
There are two functions 'bool rewrite_sam_record()', with different sets of parameters.
The first one is used for singletons the second for paired alignements:
This function is called to generate SAM records of singletons tophat_reports.cpp:656
bool rewrite_sam_record(GBamWriter& bam_writer,
const RefSequenceTable& rt,
const BowtieHit& bh,
const char* bwt_buf,
const char* read_alt_name,
FragmentType insert_side,
int num_hits,
const BowtieHit* next_hit,
bool primary,
int hitIndex)
{}
This function is called to generate SAM records of paired reads tophat_reports.cpp:783
bool rewrite_sam_record(GBamWriter& bam_writer,
const RefSequenceTable& rt,
const BowtieHit& bh,
const char* bwt_buf,
const char* read_alt_name,
const InsertAlignmentGrade& grade,
FragmentType insert_side,
const BowtieHit* partner,
int num_hits,
const BowtieHit* next_hit,
bool primary,
int hitIndex)
{}
int flag=atoi(sam_toks[1].c_str()); //FLAG
if (insert_side != FRAG_UNPAIRED) {
//flag = atoi(sam_toks[1].c_str());
// mark this as a singleton mate
flag |= 0x0001;
if (insert_side == FRAG_LEFT)
flag |= 0x0040;
else if (insert_side == FRAG_RIGHT)
flag |= 0x0080;
flag |= 0x0008;
//char flag_buf[64];
//sprintf(flag_buf, "%d", flag);
//sam_toks[t] = flag_buf;
}
if (!primary)
flag |= 0x100;
The MAPQ reports the number of mapping locations. It's set to 50 for single mappers and decreases at increasing number of mapping locations. (see below)
int mapQ = 50;
if (num_hits > 1) {
double err_prob = 1 - (1.0 / num_hits);
mapQ = (int)(-10.0 * log(err_prob) / log(10.0));
}
int flag = atoi(sam_toks[1].c_str());
// 0x0010 (strand of query) is assumed to be set correctly
// to begin with
flag |= BAM_FPAIRED; //it must be paired
if (insert_side == FRAG_LEFT)
flag |= BAM_FREAD1;
else if (insert_side == FRAG_RIGHT)
flag |= BAM_FREAD2;
if (!primary)
flag |= BAM_FSECONDARY;
...
if (partner) {
...
mate_pos = partner->left() + 1;
if (grade.happy())
flag |= BAM_FPROPER_PAIR;
if (partner->antisense_align())
flag |= BAM_FMREVERSE;
...
}
else {
mate_pos = 0;
flag |= BAM_FMUNMAP;
}
The flag constants are defined in bam.h of samtools
/*! @abstract the read is paired in sequencing, no matter whether it is mapped in a pair */
#define BAM_FPAIRED 1
/*! @abstract the read is mapped in a proper pair */
#define BAM_FPROPER_PAIR 2
/*! @abstract the read itself is unmapped; conflictive with BAM_FPROPER_PAIR */
#define BAM_FUNMAP 4
/*! @abstract the mate is unmapped */
#define BAM_FMUNMAP 8
/*! @abstract the read is mapped to the reverse strand */
#define BAM_FREVERSE 16
/*! @abstract the mate is mapped to the reverse strand */
#define BAM_FMREVERSE 32
/*! @abstract this is read1 */
#define BAM_FREAD1 64
/*! @abstract this is read2 */
#define BAM_FREAD2 128
/*! @abstract not primary alignment */
#define BAM_FSECONDARY 256
/*! @abstract QC failure */
#define BAM_FQCFAIL 512
/*! @abstract optical or PCR duplicate */
#define BAM_FDUP 1024
The BAM_FPROPER_PAIR flag (i.e. flag 2) is set if the following function returns true:
inserts.h:218
bool happy() const
{
return num_mapped == 2 && opposite_strands && (num_spliced != 2 || consistent_splices) && !too_far;
}
This implements the function that produces the SAM MAPQ at varying number of mapping locations scores.c
#include <stdio.h>
#include <math.h>
int main(){
printf("num_hits\tmapQ\n");
for(int num_hits=1; num_hits<=10; num_hits++){
int mapQ = 50;
if (num_hits > 1) {
double err_prob = 1 - (1.0 / num_hits);
mapQ = (int)(-10.0 * log(err_prob) / log(10.0));
}
printf("%d\t%d\n", num_hits,mapQ);
}
return 0;
}
Output:
num_hits mapQ
1 50
2 3
3 1
4 1
5 0
6 0
7 0
8 0
9 0
10 0