Created
July 14, 2011 13:05
-
-
Save lindenb/1082406 to your computer and use it in GitHub Desktop.
plot SIFT vs Polyphen2 / amino acid
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* Author: | |
* Pierre Lindenbaum PhD | |
* Date: | |
* 2011-07-014 | |
* Contact: | |
* plindenbaum@yahoo.fr | |
* WWW: | |
* http://plindenbaum.blogspot.com | |
* Reference: | |
* http://plindenbaum.blogspot.com | |
* Motivation: | |
* plot SIFT vs Polyphen2 / amino acid | |
* Compile and exec: | |
* g++ -I /usr/include/cairo predictions.cpp -lcairo | |
* curl -s "http://dl.dropbox.com/u/17001647/dbNSFP/dbNSFP1.1.chr1-22XY.zip" | zcat |\ | |
* cut -d ' ' -f 5,6,19,20,21,22 | egrep '^[A-Z] [A-Z]'| ./a.out | |
* History: | |
* 2011-07-014 1st version | |
*/ | |
#include <cairo.h> | |
#include <cstdio> | |
#include <cctype> | |
#include <cassert> | |
using namespace std; | |
#define NUM_AA 20 | |
static const char allaminoacids[]="ARNDCQEGHILKMFPSTWYV"; | |
static const int blosum62[24][24]={ | |
{4,-1,-2,-2,0,-1,-1,0,-2,-1,-1,-1,-1,-2,-1,1,0,-3,-2,0,-2,-1,0,-4}, | |
{-1,5,0,-2,-3,1,0,-2,0,-3,-2,2,-1,-3,-2,-1,-1,-3,-2,-3,-1,0,-1,-4}, | |
{-2,0,6,1,-3,0,0,0,1,-3,-3,0,-2,-3,-2,1,0,-4,-2,-3,3,0,-1,-4}, | |
{-2,-2,1,6,-3,0,2,-1,-1,-3,-4,-1,-3,-3,-1,0,-1,-4,-3,-3,4,1,-1,-4}, | |
{0,-3,-3,-3,9,-3,-4,-3,-3,-1,-1,-3,-1,-2,-3,-1,-1,-2,-2,-1,-3,-3,-2,-4}, | |
{-1,1,0,0,-3,5,2,-2,0,-3,-2,1,0,-3,-1,0,-1,-2,-1,-2,0,3,-1,-4}, | |
{-1,0,0,2,-4,2,5,-2,0,-3,-3,1,-2,-3,-1,0,-1,-3,-2,-2,1,4,-1,-4}, | |
{0,-2,0,-1,-3,-2,-2,6,-2,-4,-4,-2,-3,-3,-2,0,-2,-2,-3,-3,-1,-2,-1,-4}, | |
{-2,0,1,-1,-3,0,0,-2,8,-3,-3,-1,-2,-1,-2,-1,-2,-2,2,-3,0,0,-1,-4}, | |
{-1,-3,-3,-3,-1,-3,-3,-4,-3,4,2,-3,1,0,-3,-2,-1,-3,-1,3,-3,-3,-1,-4}, | |
{-1,-2,-3,-4,-1,-2,-3,-4,-3,2,4,-2,2,0,-3,-2,-1,-2,-1,1,-4,-3,-1,-4}, | |
{-1,2,0,-1,-3,1,1,-2,-1,-3,-2,5,-1,-3,-1,0,-1,-3,-2,-2,0,1,-1,-4}, | |
{-1,-1,-2,-3,-1,0,-2,-3,-2,1,2,-1,5,0,-2,-1,-1,-1,-1,1,-3,-1,-1,-4}, | |
{-2,-3,-3,-3,-2,-3,-3,-3,-1,0,0,-3,0,6,-4,-2,-2,1,3,-1,-3,-3,-1,-4}, | |
{-1,-2,-2,-1,-3,-1,-1,-2,-2,-3,-3,-1,-2,-4,7,-1,-1,-4,-3,-2,-2,-1,-2,-4}, | |
{1,-1,1,0,-1,0,0,0,-1,-2,-2,0,-1,-2,-1,4,1,-3,-2,-2,0,0,0,-4}, | |
{0,-1,0,-1,-1,-1,-1,-2,-2,-1,-1,-1,-1,-2,-1,1,5,-2,-2,0,-1,-1,0,-4}, | |
{-3,-3,-4,-4,-2,-2,-3,-2,-2,-3,-2,-3,-1,1,-4,-3,-2,11,2,-3,-4,-3,-2,-4}, | |
{-2,-2,-2,-3,-2,-1,-2,-3,2,-1,-1,-2,-1,3,-3,-2,-2,2,7,-1,-3,-2,-1,-4}, | |
{0,-3,-3,-3,-1,-2,-2,-3,-3,3,1,-2,1,-1,-2,-2,0,-3,-1,4,-3,-2,-1,-4}, | |
{-2,-1,3,4,-3,0,1,-1,0,-3,-4,0,-3,-3,-2,0,-1,-4,-3,-3,4,1,-1,-4}, | |
{-1,0,0,1,-3,3,4,-2,0,-3,-3,1,-1,-3,-1,0,-1,-3,-2,-2,1,4,-1,-4}, | |
{0,-1,-1,-1,-2,-1,-1,-1,-1,-1,-1,-1,-1,-1,-2,0,0,-2,-1,-1,-1,-1,-1,-4}, | |
{-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,1} | |
}; | |
static int aa2index(char aa) | |
{ | |
switch(toupper(aa)) | |
{ | |
case 'A' : return 0; | |
case 'R' : return 1; | |
case 'N' : return 2; | |
case 'D' : return 3; | |
case 'C' : return 4; | |
case 'Q' : return 5; | |
case 'E' : return 6; | |
case 'G' : return 7; | |
case 'H' : return 8; | |
case 'I' : return 9; | |
case 'L' : return 10; | |
case 'K' : return 11; | |
case 'M' : return 12; | |
case 'F' : return 13; | |
case 'P' : return 14; | |
case 'S' : return 15; | |
case 'T' : return 16; | |
case 'W' : return 17; | |
case 'Y' : return 18; | |
case 'V' : return 19; | |
//case 'B' : return 20; | |
//case 'Z' : return 21; | |
//case 'X' : return 22; | |
default: return -1; | |
} | |
} | |
static char index2aa(int index) | |
{ | |
return allaminoacids[index]; | |
} | |
/* | |
g++ -I /usr/include/cairo jeter.cpp -lcairo | |
curl -s "http://dl.dropbox.com/u/17001647/dbNSFP/dbNSFP1.1.chr1-22XY.zip" | funzip -t | cut -d ' ' -f 5,6,19,20,21,22 | egrep '^[A-Z] [A-Z]'| ./a.out */ | |
int main(int argc, char** argv) | |
{ | |
double alpha=0.1; | |
double min_blosum=1000; | |
double max_blosum=-1000; | |
for(int i=0;i<NUM_AA;++i) | |
{ | |
for(int j=0;j<NUM_AA;++j) | |
{ | |
double b=blosum62[i][j]; | |
min_blosum=(b< min_blosum ? b:min_blosum); | |
max_blosum=(b> max_blosum ? b:max_blosum); | |
} | |
} | |
const int margin=100; | |
const int font_size=margin-10; | |
const int aa_width=100; | |
cairo_surface_t *surface = cairo_image_surface_create (CAIRO_FORMAT_ARGB32,margin+ NUM_AA*aa_width, margin+ NUM_AA*aa_width); | |
if(surface==NULL) return -1; | |
cairo_t *cr = cairo_create (surface); | |
if(cr==NULL) return -1; | |
cairo_select_font_face (cr, "serif", CAIRO_FONT_SLANT_NORMAL, CAIRO_FONT_WEIGHT_BOLD); | |
cairo_set_font_size (cr, font_size); | |
cairo_set_line_width (cr, 1); | |
for(int i=0;i<NUM_AA;++i) | |
{ | |
double x=margin+i*aa_width; | |
for(int j=0;j<NUM_AA;++j) | |
{ | |
double b=blosum62[i][j]; | |
double y=margin+j*aa_width; | |
double gray= 1.0-((b-min_blosum)/(max_blosum-min_blosum))*0.4; | |
cairo_set_source_rgba (cr,gray,gray, gray, 1); | |
cairo_rectangle (cr, x, y, aa_width,aa_width); | |
cairo_fill (cr); | |
} | |
} | |
cairo_set_source_rgba (cr, 0.1, 0.1, 0.1, 1.0); | |
for(int i=0;i<= NUM_AA;++i) | |
{ | |
double n=i*aa_width; | |
char text[]={index2aa(i),'\0'}; | |
cairo_move_to (cr,margin-font_size,margin+n+font_size); | |
cairo_show_text (cr, text); | |
cairo_move_to (cr,margin+n,margin-1); | |
cairo_show_text (cr, text); | |
cairo_move_to (cr,margin,margin+n); | |
cairo_line_to (cr,(margin+ NUM_AA*aa_width),margin+n); | |
cairo_stroke (cr); | |
cairo_move_to (cr,margin+n,margin); | |
cairo_line_to (cr,margin+n,(margin+ NUM_AA*aa_width)); | |
cairo_stroke (cr); | |
} | |
cairo_set_source_rgba (cr, 0, 0, 0, 0.2); | |
cairo_set_line_width (cr, 1); | |
char line[BUFSIZ]; | |
while(fgets(line,BUFSIZ,stdin)!=NULL) | |
{ | |
char aa1,aa2,psift,ppph2; | |
float ssift,spph2; | |
if(sscanf(line,"%c %c %f %c %f %c",&aa1,&aa2,&ssift,&psift,&spph2,&ppph2)!=6) continue; | |
int indexaa1= aa2index(aa1); | |
if(aa1==-1) continue; | |
int indexaa2= aa2index(aa2); | |
if(aa2==-1) continue; | |
assert(ssift>=0 && ssift<=1.0); | |
assert(spph2>=0 && spph2<=1.0); | |
double x=margin+indexaa1*aa_width+(ssift*(aa_width-1)); | |
double y=margin+indexaa2*aa_width+aa_width-(spph2*(aa_width-1)); | |
//fprintf(stderr,"OK %f %f\n",x,y); | |
if(psift=='D' && (ppph2=='D' || ppph2=='P')) | |
{ | |
cairo_set_source_rgba (cr, 1, 0, 0, alpha); | |
} | |
else | |
{ | |
cairo_set_source_rgba (cr, 0, 0.5, 0,alpha); | |
} | |
cairo_move_to (cr,x-5,y); | |
cairo_line_to (cr,x+5,y); | |
cairo_stroke (cr); | |
cairo_move_to (cr,x,y-5); | |
cairo_line_to (cr,x,y+5); | |
cairo_stroke (cr); | |
} | |
cairo_destroy (cr); | |
cairo_surface_write_to_png (surface, "jeter.png"); | |
cairo_surface_destroy (surface); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment