Skip to content

Instantly share code, notes, and snippets.

@jxtx
Last active October 14, 2020 17:29
Show Gist options
  • Save jxtx/b1c170bc5a35487c35fb to your computer and use it in GitHub Desktop.
Save jxtx/b1c170bc5a35487c35fb to your computer and use it in GitHub Desktop.
Oldest nucleotide blast.c I can find...
/*
* BLAST - Search two DNA sequences for locally maximal segment pairs. The basic
* command syntax is
*
* BLAST sequence1 sequence2
*
* where sequence1 and sequence2 name files containing DNA sequences. Lines
* at the beginnings of the files that don't start with 'A', 'C', 'T' or 'G'
* are discarded. Thus a typical sequence file might begin:
*
* >BOVHBPP3I Bovine beta-globin psi-3 pseudogene, 5' end.
* GGAGAATAAAGTTTCTGAGTCTAGACACACTGGATCAGCCAATCACAGATGAAGGGCACT
* GAGGAACAGGAGTGCATCTTACATTCCCCCAAACCAATGAACTTGTATTATGCCCTGGGC
*
* If sequence1 and sequence2 are identical file names, then matches are
* computed without "mirror image" pairs and without the trivial long match.
*
* The BLAST command permits optional additional arguments, such as X=50,
* that reset certain internal parameters. The available parameters are:
*
* M or m gives the score for a match.
* I or i gives the score for a transition (A <--> G, C <--> T).
* V or v gives the score for a tranversion (non-transition change).
* W or w gives the word size.
* X or x gives the value for terminating word extensions.
* K or k give the cutoff.
* (defaults: M = 1, I = -1, V = -1, W = 8, X = 10*M,
* and K = 5% significance level)
*
* If W is set to 0, then all locally maximum segment pairs (LMSPs) are
* computed; in this case the value of X is irrelevant.
*
* In addition, the word "noalign" requests that no alignments be printed
* (summary statistics for each locally maximal segment pair are reported)
* and the word "stats" requests that statistics concerning the performance
* of BLAST be printed. Thus a typical command is
*
* BLAST SEQ1 SEQ2 M=1 I=0 V=-1 noalign
*/
#include <stdio.h>
#include <math.h>
#include <ctype.h>
#define max(x,y) ((x > y) ? (x) : (y))
#define min(x,y) ((x < y) ? (x) : (y))
#define SUBSTITUTION_SCORE(x,y) S[x][y]
#define DEFAULT_M 1 /* default score for match */
#define DEFAULT_I -1 /* default score for transition */
#define DEFAULT_V -1 /* default score for transversion */
#define DEFAULT_W 8 /* default word size */
#define DEFAULT_SIG 0.05 /* default significance level for setting K */
char
S[128][128],
*s1, *seq1, *s2, *seq2;
int
alignments,
print_stats,
K, /* cutoff score */
M, /* score for a match */
I, /* score for a transition */
V, /* score for a transversion */
W, /* word length */
X, /* cutoff for hit extensions */
sig99, sig90, sig50, /* number of MPSs above given significance */
numMSPs, /* total number of MSPs recorded */
numhits, /* number of word hits */
missW[15][3],
missX[25][3];
double
param_K, param_lambda;
long
*diag_lev, /* extent of discovered matches on a given diagonal */
len1, len2; /* sequence lengths */
typedef struct msp {
long len, pos1, pos2;
int score;
struct msp *next_msp;
} Msp, *Msp_ptr;
Msp_ptr msp_list, *msp;
main(argc, argv)
int argc;
char *argv[];
{
int i, v;
double x, log(), sqrt(), significance();
char *ckalloc();
if (argc < 3)
fatalf("%s seq1 seq2 [M=] [I=] [V=] [K=] [W=] [X=] [noalign] [stats]",
argv[0]);
M = DEFAULT_M;
I = DEFAULT_I;
V = DEFAULT_V;
W = DEFAULT_W;
X = -1;
alignments = 1;
print_stats = 0;
while (--argc > 2) {
if (strcmp(argv[argc],"noalign") == 0) {
alignments = 0;
continue;
}
if (strcmp(argv[argc],"stats") == 0) {
print_stats = 1;
continue;
}
if (argv[argc][1] != '=')
fatalf("argument %d has improper form", argc);
v = atoi(argv[argc] + 2);
switch (argv[argc][0]) {
case 'M':
case 'm':
M = v;
break;
case 'I':
case 'i':
I = v;
break;
case 'V':
case 'v':
V = v;
break;
case 'K':
case 'k':
K = v;
break;
case 'W':
case 'w':
W = v;
break;
case 'X':
case 'x':
X = v;
break;
default:
fatal("Options are M, I, V, K, W, X.");
}
}
if (X == -1)
X = 10*M;
if (V > I)
fatal("transversions score higher than transitions?");
len1 = get_seq(argv[1], &seq1);
if (strcmp(argv[1],argv[2]) == 0) {
if (W == 0)
fatal("file1 = file2 not implemented when W=0");
seq2 = seq1;
len2 = len1;
} else
len2 = get_seq(argv[2], &seq2);
s1 = seq1 - 1; /* so subscripts start with 1 */
s2 = seq2 - 1;
diag_lev = (long *)ckalloc((len1+len2+1)*sizeof(long)) + len1;
/* set up scoring matrix */
S['A']['A'] = S['C']['C'] = S['G']['G'] = S['T']['T'] = M;
S['A']['G'] = S['G']['A'] = S['C']['T'] = S['T']['C'] = I;
S['A']['C'] = S['A']['T'] = S['C']['A'] = S['C']['G'] =
S['G']['C'] = S['G']['T'] = S['T']['A'] = S['T']['G'] = V;
get_params();
if (K == 0) {
x = log(DEFAULT_SIG)/(-param_K*len1*len2);
K = 2*log(sqrt(x))/(-param_lambda);
while (significance(K-1) >= DEFAULT_SIG)
--K;
while (significance(K) < DEFAULT_SIG)
++K;
}
if (alignments) {
printf("#:lav\n\n");
printf("d {\n\"BLAST output with parameters:\n");
printf("M = %d, I = %d, V = %d\n", M, I, V);
if (W > 0)
printf("W = %d, X = %d\"", W, X);
else
printf("W = 0 (computes all LMSPs)\"");
printf("\n}\n");
printf("s {\n \"%s\" 1 %d\n \"%s\" 1 %d\n}\n",
argv[1], len1, argv[2], len2);
printf("k {\n \"significance\"\n}\n");
}
if (W > 0) {
bld_table();
search();
} else
get_msps();
sort_msps();
if (!alignments)
printf("\n\n Seq. 1 Seq. 2 len score signif W X\n");
/* print the matches for each sequence */
for (i = 0; i < numMSPs; ++i)
print_msp(i);
if (W == 0 && print_stats) {
printf("\n%3d MSPs above significance level 0.99\n", sig99);
printf("%3d MSPs above significance level 0.90\n", sig90);
printf("%3d MSPs above significance level 0.50\n", sig50);
printf("\nPercent of MSPs missed by BLAST at various");
printf(" W and significance levels:\n");
printf(" W: 0.99 0.90 0.50\n");
for (i = 4; i <= 12; ++i)
printf("%2d: %4.1f%% %4.1f%% %4.1f%%\n",
i,
100*missW[i][0]/((float)sig99),
100*missW[i][1]/((float)sig90),
100*missW[i][2]/((float)sig50));
printf("\nPercent of MSPs missed by BLAST at various");
printf(" X and significance levels:\n");
printf(" X: 0.99 0.90 0.50\n");
for (i = 1; i <= 20; ++i)
printf("%2d: %4.1f%% %4.1f%% %4.1f%%\n",
i,
100*missX[i][0]/((float)sig99),
100*missX[i][1]/((float)sig90),
100*missX[i][2]/((float)sig50));
}
if (W > 0 && print_stats) {
printf("\n\nStatistics:\n");
printf(" %s: M = %d, I = %d, V = %d, K = %d, W = %d, X = %d\n",
argv[0], M, I, V, K, W, X);
printf(" # of word hits = %d\n", numhits);
printf(" # of matches found = %d\n", numMSPs);
}
if (strcmp(argv[1], argv[2]) == 0) {
/* self-comparison; insert trivial diagonal */
printf("a {\n s 0.0\n b 1 1\n e %d %d\n", len1, len1);
printf(" l 1 1 %d %d 0.0\n}\n", len1, len1);
}
exit (0);
}
/* get_seq - read a sequence */
int get_seq(file_name, seqptr)
char *file_name, **seqptr;
{
FILE *qp, *ckopen();
char *p, *fgets(), *strchr(), *ckalloc();
int c;
long n;
qp = ckopen(file_name, "r");
for (n = 0; (c = getc(qp)) != EOF; )
if (c != '\n')
++n;
*seqptr = ckalloc(n+1);
rewind(qp);
if (n == 0)
return 0;
for (p = *seqptr; ; ) {
if (fgets(p, (int)n+1, qp) == NULL) {
fclose(qp);
*p = '\0';
break;
}
if (p == *seqptr && strchr("ACTG", *p) == NULL)
continue;
while (isupper(*p))
++p;
if (*p != '\n' && *p != '\0')
fatalf("Illegal character %c in query sequence.", *p);
}
return p - *seqptr;
}
/* ------------- build table of W-tuples in the first sequence ------------*/
/* The data structure could be simplified if we limited W to, say, W <= 8. */
struct hash_node {
long ecode; /* integer encoding of the word */
int pos; /* positions where word hits query sequence */
struct hash_node *link; /* next word with same last 7.5 letters */
};
#define HASH_SIZE 32767 /* 2**15 - 1 */
struct hash_node *hashtab[HASH_SIZE+1];
int encoding[128];
int mask;
long *next_pos;
bld_table()
{
long ecode;
int i;
char *q, *ckalloc();
/* perform initializations */
encoding['C'] = 1;
encoding['G'] = 2;
encoding['T'] = 3;
mask = (1 << (W+W-2)) - 1;
next_pos = (long *)ckalloc(len1*sizeof(long));
q = seq1 - 1;
ecode = 0L;
for (i = 1; i < W; ++i)
ecode = (ecode << 2) + encoding[*++q];
while (*++q) {
ecode = ((ecode & mask) << 2) + encoding[*q];
add_word(ecode, (long)(q - seq1 +1));
}
}
/* add_word - add a word to the table of critical words */
add_word(ecode, pos)
long ecode, pos;
{
struct hash_node *h;
long hval;
char *ckalloc();
hval = ecode & HASH_SIZE;
for (h = hashtab[hval]; h; h = h->link)
if (h->ecode == ecode)
break;
if (h == NULL) {
h = (struct hash_node *) ckalloc (sizeof(struct hash_node));
h->link = hashtab[hval];
hashtab[hval] = h;
h->ecode = ecode;
h->pos = -1;
}
next_pos[pos] = h->pos;
h->pos = pos;
}
/* ----------------------- search the second sequence --------------------*/
search()
{
register struct hash_node *h;
register char *s;
register long ecode, hval;
int i;
long p;
s = seq2 - 1;
ecode = 0L;
for (i = 1; i < W; ++i)
ecode = (ecode << 2) + encoding[*++s];
while (*++s) {
ecode = ((ecode & mask) << 2) + encoding[*s];
hval = ecode & HASH_SIZE;
for (h = hashtab[hval]; h; h = h->link)
if (h->ecode == ecode) {
for (p = h->pos; p >= 0; p = next_pos[p])
extend_hit(p, (long)(s+1-seq2));
break;
}
}
}
/* extend_hit - extend a word-sized hit to a longer match */
extend_hit(pos1, pos2)
long pos1, pos2;
{
char *beg2, *beg1, *end1, *q, *s;
long min_sum, max_sum, sum, diag, score;
Msp_ptr mp;
if (seq1 == seq2 && pos1 >= pos2)
return;
numhits++;
diag = pos2 - pos1;
if (diag_lev[diag] > pos1)
return;
/* extend to the right */
max_sum = sum = 0;
end1 = q = seq1 + pos1;
s = seq2 + pos2;
while (*s != '\0' && *q != '\0' && sum >= max_sum - X) {
sum += SUBSTITUTION_SCORE(*s++,*q++);
if (sum > max_sum) {
max_sum = sum;
end1 = q;
}
}
/* extend to the left */
min_sum = sum = 0;
beg1 = q = (seq1 + pos1) - W;
beg2 = s = seq2 + pos2 - W;
while (s > seq2 && q > seq1 && sum >= min_sum - X) {
sum += SUBSTITUTION_SCORE(*--s,*--q);
if (sum > min_sum) {
min_sum = sum;
beg2 = s;
beg1 = q;
}
}
score = M*W + max_sum + min_sum;
if (score >= K) {
++numMSPs;
mp = (Msp_ptr)ckalloc(sizeof(Msp));
mp->len = end1 - beg1;
mp->pos1 = beg1 - seq1;
mp->pos2 = beg2 - seq2;
mp->score = score;
mp->next_msp = msp_list;
msp_list = mp;
}
diag_lev[diag] = (end1 - seq1) + W;
}
/* ---------------- compute locally maximal sequence pairs ---------------*/
get_msps()
{
long d, i, score, best_score, end1, beg1, lower, upper;
for (d = 1-len1; d < len2; ++d) {
beg1 = best_score = score = 0;
lower = max(1, 1-d);
upper = min(len1, len2-d);
for (i = lower; i <= upper; ++i)
if (score > 0) {
score += SUBSTITUTION_SCORE(s1[i], s2[i+d]);
if (score > best_score) {
best_score = score;
end1 = i;
}
} else {
check_score(best_score, d, beg1, end1);
best_score = score =
SUBSTITUTION_SCORE(s1[i], s2[i+d]);
beg1 = end1 = i;
}
check_score(best_score, d, beg1, end1);
}
}
check_score(score, d, beg1, end1)
long score, d, beg1, end1;
{
char *ckalloc();
Msp_ptr mp;
if (score >= K) {
++numMSPs;
mp = (Msp_ptr)ckalloc(sizeof(Msp));
mp->len = end1 - beg1 + 1;
mp->pos1 = beg1 - 1;
mp->pos2 = beg1 + d - 1;
mp->score = score;
mp->next_msp = msp_list;
msp_list = mp;
}
}
/* sort_msps - order database sequence for printing */
sort_msps()
{
char *ckalloc();
int i;
Msp_ptr mp;
msp = (Msp_ptr *) ckalloc(numMSPs*sizeof(Msp_ptr));
for (i = 0, mp = msp_list; i < numMSPs; ++i, mp = mp->next_msp)
msp[i] = mp;
for (i = (numMSPs/2) - 1; i >= 0; --i)
heapify(i, (int) numMSPs-1);
for (i = numMSPs-1; i > 0; --i) {
mp = msp[0];
msp[0] = msp[i];
msp[i] = mp;
if (i > 1)
heapify(0, i-1);
}
}
/* heapify - core procedure for heapsort */
heapify(i, last)
int i, last;
{
int lim = (last-1)/2, left_son, small_son;
Msp_ptr mp;
while (i <= lim) {
left_son = 2*i + 1;
if (left_son == last)
small_son = left_son;
else
small_son = smaller(left_son, left_son+1);
if (smaller(i, small_son) == small_son) {
mp = msp[i];
msp[i] = msp[small_son];
msp[small_son] = mp;
i = small_son;
} else
break;
}
}
/* smaller - determine which of two sequences should be printed first */
int smaller(i, j)
int i, j;
{
Msp_ptr ki = msp[i], kj = msp[j];
if (ki->score == kj->score)
return ((ki->pos1 >= kj->pos1) ? i : j);
/* print one with larger score first */
return ((ki->score <= kj->score) ? i : j);
}
/* -------------------------- compute significance parameters ---------------*/
get_params()
{
double _p[1000], *p , log(), sqrt(), pMatch, pTransit, pTransver, N;
long A1, A2, C1, C2, G1, G2, T1, T2;
int c, i;
A1 = A2 = C1 = C2 = G1 = G2 = T1 = T2 = 0;
for (i = 0; i < len1; ++i)
if ((c = seq1[i]) == 'A')
++A1;
else if (c == 'C')
++C1;
else if (c == 'G')
++G1;
else if (c == 'T')
++T1;
for (i = 0; i < len2; ++i)
if ((c = seq2[i]) == 'A')
++A2;
else if (c == 'C')
++C2;
else if (c == 'G')
++G2;
else if (c == 'T')
++T2;
N = ((double)len1)*((double)len2);
pMatch = ((double)A1)*((double)A2)
+ ((double)C1)*((double)C2)
+ ((double)G1)*((double)G2)
+ ((double)T1)*((double)T2);
pTransit = ((double)A1)*((double)G2)
+ ((double)C1)*((double)T2)
+ ((double)G1)*((double)A2)
+ ((double)T1)*((double)C2);
pTransver = N - pMatch - pTransit;
for (i = 0; i < V+M; ++i)
_p[i] = 0.0;
p = _p - V;
p[V] = pTransver/N;
p[I] += (pTransit/N);
p[M] += (pMatch/N);
if (karlin(V, M, _p, &param_lambda, &param_K) == 0)
fatal("compution of significance levels failed");
}
/**************** Statistical Significance Parameter Subroutine ****************
Version 1.0 February 2, 1990
Version 1.1 July 5, 1990
Program by: Stephen Altschul
Address: National Center for Biotechnology Information
National Library of Medicine
National Institutes of Health
Bethesda, MD 20894
Internet: altschul@ncbi.nlm.nih.gov
See: Karlin, S. & Altschul, S.F. "Methods for Assessing the Statistical
Significance of Molecular Sequence Features by Using General Scoring
Schemes," Proc. Natl. Acad. Sci. USA 87 (1990), 2264-2268.
Computes the parameters lambda and K for use in calculating the
statistical significance of high-scoring segments or subalignments.
The scoring scheme must be integer valued. A positive score must be
possible, but the expected (mean) score must be negative.
A program that calls this routine must provide the value of the lowest
possible score, the value of the greatest possible score, and a pointer
to an array of probabilities for the occurence of all scores between
these two extreme scores. For example, if score -2 occurs with
probability 0.7, score 0 occurs with probability 0.1, and score 3
occurs with probability 0.2, then the subroutine must be called with
low = -2, high = 3, and pr pointing to the array of values
{ 0.7, 0.0, 0.1, 0.0, 0.0, 0.2 }. The calling program must also provide
pointers to lambda and K; the subroutine will then calculate the values
of these two parameters. In this example, lambda=0.330 and K=0.154.
The parameters lambda and K can be used as follows. Suppose we are
given a length N random sequence of independent letters. Associated
with each letter is a score, and the probabilities of the letters
determine the probability for each score. Let S be the aggregate score
of the highest scoring contiguous segment of this sequence. Then if N
is sufficiently large (greater than 100), the following bound on the
probability that S is greater than or equal to x applies:
P( S >= x ) <= 1 - exp [ - KN exp ( - lambda * x ) ].
In other words, the p-value for this segment can be written as
1-exp[-KN*exp(-lambda*S)].
This formula can be applied to pairwise sequence comparison by assigning
scores to pairs of letters (e.g. amino acids), and by replacing N in the
formula with N*M, where N and M are the lengths of the two sequences
being compared.
In addition, letting y = KN*exp(-lambda*S), the p-value for finding m
distinct segments all with score >= S is given by:
2 m-1 -y
1 - [ 1 + y + y /2! + ... + y /(m-1)! ] e
Notice that for m=1 this formula reduces to 1-exp(-y), which is the same
as the previous formula.
*******************************************************************************/
#define MAXIT 150 /* Maximum number of iterations used in calculating K */
karlin(low,high,pr,lambda,K)
int low; /* Lowest score (must be negative) */
int high; /* Highest score (must be positive) */
double *pr; /* Probabilities for various scores */
double *lambda; /* Pointer to parameter lambda */
double *K; /* Pointer to parmeter K */
{
int i,j,range,lo,hi,first,last;
double up,new,Sum,av;
register double sum;
double *p,*P,*ptrP,exp();
register double *ptr1,*ptr2,*ptr1e;
char *calloc();
/* Check that scores and their associated probabilities are valid */
if (low>=0) {
fprintf(stderr,"Lowest score must be negative.\n");
return 0;
}
for (i=range=high-low;i> -low && !pr[i];--i);
if (i<= -low) {
fprintf(stderr,"A positive score must be possible.\n");
return 0;
}
for (sum=i=0;i<=range;sum+=pr[i++]) if (pr[i]<0) {
fprintf(stderr,"Negative probabilities not allowed.\n");
return 0;
}
if (sum<0.99995 || sum>1.00005) fprintf(stderr,"Probabilities sum to %.4f. Normalizing.\n",sum);
p= (double *) calloc(range+1,sizeof(double));
for (Sum=low,i=0;i<=range;++i) Sum+=i*(p[i]=pr[i]/sum);
if (Sum>=0) {
fprintf(stderr,"Invalid (non-negative) expected score: %.3f\n",Sum);
free(p);
return 0;
}
/* Calculate the parameter lambda */
up=0.5;
do {
up*=2;
ptr1=p;
for (sum=0,i=low;i<=high;++i) sum+= *ptr1++ * exp(up*i);
}
while (sum<1.0);
for (*lambda=j=0;j<25;++j) {
new=(*lambda+up)/2.0;
ptr1=p;
for (sum=0,i=low;i<=high;++i) sum+= *ptr1++ * exp(new*i);
if (sum>1.0) up=new;
else *lambda=new;
}
/* Calculate the pamameter K */
ptr1=p;
for (av=0,i=low;i<=high;++i) av+= *ptr1++ *i*exp(*lambda*i);
if (low == -1 || high == 1) {
*K = high==1 ? av : Sum*Sum/av;
*K *= 1.0 - exp(- *lambda);
free(p);
return 1; /* Parameters calculated successfully */
}
Sum=lo=hi=0;
P= (double *) calloc(MAXIT*range+1,sizeof(double));
for (*P=sum=j=1;j<=MAXIT && sum>0.00001;Sum+=sum/=j++) {
first=last=range;
for (ptrP=P+(hi+=high)-(lo+=low);ptrP>=P;*ptrP-- =sum) {
ptr1=ptrP-first;
ptr1e=ptrP-last;
ptr2=p+first;
for (sum=0;ptr1>=ptr1e;) sum+= *ptr1-- * *ptr2++;
if (first) --first;
if (ptrP-P<=range) --last;
}
for (sum=0,i=lo;i;++i) sum+= *++ptrP * exp(*lambda*i);
for (;i<=hi;++i) sum+= *++ptrP;
}
if (j>MAXIT) fprintf(stderr,"Value for K may be too large due to insufficient iterations.\n");
for (i=low;!p[i-low];++i);
for (j= -i;i<high && j>1;) if (p[++i-low]) j=gcd(j,i);
*K = (j*exp(-2*Sum))/(av*(1.0-exp(- *lambda*j)));
free(p);
free(P);
return 1; /* Parameters calculated successfully */
}
gcd(a,b)
int a,b;
{
int c;
if (b<0) b= -b;
if (b>a) { c=a; a=b; b=c; }
for (;b;b=c) { c=a%b; a=b; }
return a;
}
/* ---------------------------------------------------------------------------*/
/* print_msp - display the i-th MSP */
print_msp(i)
int i;
{
long len, pos1, pos2;
Msp_ptr mp = msp[i];
int j, start_j, W, X, sc;
double prob, significance();
pos1 = mp->pos1;
pos2 = mp->pos2;
len = mp->len;
prob = significance(mp->score);
if (!alignments || print_stats) {
if (prob > .99)
++sig99;
if (prob > .90)
++sig90;
if (prob > .50)
++sig50;
for (W = start_j = j = 0; j < len; ++j)
if (seq1[pos1+j] != seq2[pos2+j]) {
W = max(W, j-start_j);
start_j = j+1;
}
W = max(W, len-start_j);
for (j = 4; j <= 12; ++j) {
if (prob > .99 && W < j)
++missW[j][0];
if (prob > .90 && W < j)
++missW[j][1];
if (prob > .50 && W < j)
++missW[j][2];
}
for (sc = X = j = 0; j < len; ++j)
if (sc < 0) {
sc += SUBSTITUTION_SCORE(seq1[pos1+j], seq2[pos2+j]);
if (sc < X)
X = sc;
} else
sc = SUBSTITUTION_SCORE(seq1[pos1+j], seq2[pos2+j]);
X = -X;
for (j = 1; j <= 20; ++j) {
if (prob > .99 && X > j)
++missX[j][0];
if (prob > .90 && X > j)
++missX[j][1];
if (prob > .50 && X > j)
++missX[j][2];
}
}
if (!alignments) {
printf("%4d: %6d %7d %5d %6d %5.1lf%% %4d %4d\n",
i+1, pos1+1, pos2+1, len, mp->score, 100*prob, W, X);
return;
}
printf("a {\n s %d\n b %d %d\n e %d %d\n l %d %d %d %d %1.1lf\n}\n",
mp->score, pos1+1, pos2+1, pos1+len, pos2+len,
pos1+1, pos2+1, pos1+len, pos2+len, 100*prob);
}
double significance(score)
int score;
{
double y, exp(), N;
N = ((double)len1)*((double)len2);
y = -param_lambda*(score);
y = param_K*N*exp(y);
return exp(-y);
}
/* ---------------------------- utility routines -------------------------*/
/* fatal - print message and die */
fatal(msg)
char *msg;
{
fprintf(stderr, "%s\n", msg);
exit(1);
}
/* fatalf - format message, print it, and die */
fatalf(msg, val)
char *msg, *val;
{
fprintf(stderr, msg, val);
putc('\n', stderr);
exit(1);
}
/* ckopen - open file; check for success */
FILE *ckopen(name, mode)
char *name, *mode;
{
FILE *fopen(), *fp;
if ((fp = fopen(name, mode)) == NULL)
fatalf("Cannot open %s.", name);
return(fp);
}
/* ckalloc - allocate space; check for success */
char *ckalloc(amount)
int amount;
{
char *malloc(), *p;
if ((p = malloc( (unsigned) amount)) == NULL)
fatal("Ran out of memory.");
return(p);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment