Skip to content

Instantly share code, notes, and snippets.

@lh3
Created July 6, 2017 14:22
Show Gist options
  • Save lh3/0e2866e788ff4f8f1199bee6fb2fb8b9 to your computer and use it in GitHub Desktop.
Save lh3/0e2866e788ff4f8f1199bee6fb2fb8b9 to your computer and use it in GitHub Desktop.
#include <unistd.h>
#include <assert.h>
#include <stdlib.h>
#include <stdio.h>
#include "kstring.h"
#include "sam.h"
#include "kbtree.h"
typedef struct {
int pos, n, sum;
} endpos_t;
#define endpos_cmp(a, b) (((b).pos < (a).pos) - ((a).pos < (b).pos))
KBTREE_INIT(e, endpos_t, endpos_cmp) // sorted by the end position
int main(int argc, char *argv[])
{
samFile *in;
bam_hdr_t *h;
bam1_t *b;
kbtree_t(e) *t;
int c, is_sam = 0, max_len = 1000, min_short = 5;
long *dist_len, sum_short = 0, n_short = 0;
while ((c = getopt(argc, argv, "Sl:n:")) >= 0) {
if (c == 'S') is_sam = 1;
else if (c == 'l') max_len = atoi(optarg);
else if (c == 'n') min_short = atoi(optarg);
}
if (argc == optind) {
fprintf(stderr, "Usage: fraglen [-S] [-l %d] [-n %d] <in.bam> | <in.sam>\n", max_len, min_short);
return 1;
}
dist_len = (long*)calloc(max_len + 1, sizeof(long));
in = sam_open(argv[optind], is_sam? "r" : "rb", 0);
h = sam_hdr_read(in);
t = kb_init(e, KB_DEFAULT_SIZE);
b = bam_init1();
while (sam_read1(in, h, b) >= 0) {
const bam1_core_t *c = &b->core;
endpos_t end, *p;
if (c->tid < 0 || (c->flag&BAM_FUNMAP) || c->n_cigar <= 0 || c->isize <= 0) continue;
if (c->isize >= max_len) continue; // ignore excessive long fragments
//printf("%d\t%d\t%d\t%ld\t%f\n", c->tid, c->pos, kb_size(t), n_short, n_short > 0? (float)sum_short / n_short : -1);
while (kb_size(t) > 0) { // deplete fragments ending before c->pos
kbitr_t itr;
kb_itr_first(e, t, &itr);
end = kb_itr_key(endpos_t, &itr);
if (end.pos > c->pos) break;
sum_short -= end.sum;
n_short -= end.n;
kb_delp(e, t, &end);
if (n_short >= min_short)
++dist_len[(int)(((float)sum_short / n_short + .499))];
}
assert(n_short >= 0);
end.pos = c->pos + c->isize, end.n = 1, end.sum = c->isize;
++n_short, sum_short += c->isize;
p = kb_getp(e, t, &end);
if (p) p->n += end.n, p->sum += end.sum;
else kb_putp(e, t, &end);
}
bam_destroy1(b);
kb_destroy(int, t);
bam_hdr_destroy(h);
sam_close(in);
for (c = 0; c <= max_len; ++c)
printf("%d\t%ld\n", c, dist_len[c]);
free(dist_len);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment