Skip to content

Instantly share code, notes, and snippets.

@standage
Created January 30, 2014 19:39
Show Gist options
  • Save standage/8717134 to your computer and use it in GitHub Desktop.
Save standage/8717134 to your computer and use it in GitHub Desktop.
Working through issues with GenomeTools feature streams and memory mangement.
##gff-version 3
##sequence-region seq01 1 900
##sequence-region seq02 1 900
##sequence-region seq03 1 900
##sequence-region seq04 1 900
##sequence-region seq05 1 900
##sequence-region seq06 1 900
##sequence-region seq07 1 2000
##sequence-region seq08 1 2000
##sequence-region seq09 1 2000
##sequence-region seq10 1 2000
##sequence-region seq11 1 2000
##sequence-region seq12 1 2000
##sequence-region seq13 1 1500
##sequence-region seq14 1 1500
##sequence-region seq15 1 1500
##sequence-region seq16 1 1500
##sequence-region seq17 1 1500
##sequence-region seq18 1 1500
##sequence-region seq19 1 1500
##sequence-region seq20 1 1001
##sequence-region seq21 1 1000
##sequence-region seq22 1 999
##sequence-region seq23 1 801
##sequence-region seq24 1 800
##sequence-region seq25 1 799
#
seq01 nano mRNA 400 600 . + . ID=test1.1
seq02 nano mRNA 401 600 . + . ID=test1.2
seq03 nano mRNA 402 600 . + . ID=test1.3
seq04 nano mRNA 200 600 . + . ID=test1.4
seq05 nano mRNA 201 600 . + . ID=test1.5
seq06 nano mRNA 202 600 . + . ID=test1.6
#
seq07 nano mRNA 400 600 . + . ID=test2.1a
seq07 nano mRNA 1202 1400 . + . ID=test2.1b
seq08 nano mRNA 400 600 . + . ID=test2.2a
seq08 nano mRNA 1201 1400 . + . ID=test2.2b
seq09 nano mRNA 400 600 . + . ID=test2.3a
seq09 nano mRNA 1200 1400 . + . ID=test2.3b
#
seq10 nano mRNA 400 600 . + . ID=test3.1a
seq10 nano mRNA 1002 1300 . + . ID=test3.1b
seq11 nano mRNA 400 600 . + . ID=test3.2a
seq11 nano mRNA 1001 1300 . + . ID=test3.2b
seq12 nano mRNA 400 600 . + . ID=test3.3a
seq12 nano mRNA 1000 1300 . + . ID=test3.3b
#
seq13 nano mRNA 400 600 . + . ID=test4.1a
seq13 nano mRNA 803 1100 . + . ID=test4.1b
seq14 nano mRNA 400 600 . + . ID=test4.2a
seq14 nano mRNA 802 1100 . + . ID=test4.2b
seq15 nano mRNA 400 600 . + . ID=test4.3a
seq15 nano mRNA 801 1100 . + . ID=test4.3b
seq16 nano mRNA 400 600 . + . ID=test4.4a
seq16 nano mRNA 800 1100 . + . ID=test4.4b
#
seq17 nano mRNA 400 600 . + . ID=test5.1a
seq17 nano mRNA 606 1000 . + . ID=test5.1b
seq18 nano mRNA 400 600 . + . ID=test5.2a
seq18 nano mRNA 602 1000 . + . ID=test5.2b
seq19 nano mRNA 400 600 . + . ID=test5.3a
seq19 nano mRNA 601 1000 . + . ID=test5.3b
#
seq20 nano mRNA 400 600 . + . ID=test6.1
seq21 nano mRNA 400 600 . + . ID=test6.2
seq22 nano mRNA 400 600 . + . ID=test6.3
seq23 nano mRNA 400 600 . + . ID=test6.4
seq24 nano mRNA 400 600 . + . ID=test6.5
seq25 nano mRNA 400 600 . + . ID=test6.6
#
#include "genometools.h"
// gcc -Wall -g -I /usr/local/include/genometools/ -DWITHOUT_CAIRO -o streamtest streamtest.c -lgenometools
// valgrind ./streamtest streamtest-data.gff3 25 > test.txt 2> test.log
int main(int argc, const char **argv)
{
const char *infile;
GtError *error;
GtFeatureIndex *fi1, *fi2, *fi3;
GtNodeStream *gff3, *s1, *s2, *s3;
GtStrArray *seqids;
GtUword i, numseqs;
gt_lib_init();
gt_assert(argc > 2);
infile = argv[1];
numseqs = atol(argv[2]);
error = gt_error_new();
gff3 = gt_gff3_in_stream_new_unsorted(1, &infile);
gt_gff3_in_stream_check_id_attributes((GtGFF3InStream *)gff3);
gt_gff3_in_stream_enable_tidy_mode((GtGFF3InStream *)gff3);
fi1 = gt_feature_index_memory_new();
s1 = gt_feature_out_stream_new(gff3, fi1);
gt_node_stream_pull(s1, error);
if(gt_error_is_set(error))
fprintf(stderr, "error processing node stream: %s\n", gt_error_get(error));
gt_node_stream_delete(gff3);
gt_node_stream_delete(s1);
fi2 = gt_feature_index_memory_new();
seqids = gt_feature_index_get_seqids(fi1, error);
gt_assert(gt_str_array_size(seqids) == numseqs);
for(i = 0; i < numseqs; i++)
{
const char *seqid = gt_str_array_get(seqids, i);
GtStr *seqidstr = gt_str_new_cstr(seqid);
GtRange seqrange;
gt_feature_index_get_orig_range_for_seqid(fi1, &seqrange, seqid, error);
GtGenomeNode *region = gt_region_node_new(seqidstr, seqrange.start, seqrange.end);
GtRegionNode *rn = gt_region_node_cast(region);
gt_feature_index_add_region_node(fi2, rn, error);
gt_genome_node_delete(region);
gt_str_delete(seqidstr);
GtArray *seqfeats = gt_feature_index_get_features_for_seqid(fi1, seqid, error);
while(gt_array_size(seqfeats) > 0)
{
GtGenomeNode **gn = gt_array_pop(seqfeats);
GtFeatureNode *fn = gt_feature_node_cast(*gn);
GtRange range = gt_genome_node_get_range(*gn);
gt_feature_index_add_feature_node(fi2, fn, error);
printf("(fi1) %s: %s[%lu, %lu]\n", gt_feature_node_get_type(fn), seqid, range.start, range.end);
}
gt_array_delete(seqfeats);
}
gt_str_array_delete(seqids);
fi3 = gt_feature_index_memory_new();
s2 = gt_feature_in_stream_new(fi2);
s3 = gt_feature_out_stream_new(s2, fi3);
gt_node_stream_pull(s3, error);
if(gt_error_is_set(error))
fprintf(stderr, "error processing node stream: %s\n", gt_error_get(error));
seqids = gt_feature_index_get_seqids(fi3, error);
gt_assert(gt_str_array_size(seqids) == numseqs);
for(i = 0; i < numseqs; i++)
{
const char *seqid = gt_str_array_get(seqids, i);
GtArray *seqfeats = gt_feature_index_get_features_for_seqid(fi3, seqid, error);
while(gt_array_size(seqfeats) > 0)
{
GtGenomeNode **gn = gt_array_pop(seqfeats);
GtFeatureNode *fn = gt_feature_node_cast(*gn);
GtRange range = gt_genome_node_get_range(*gn);
printf("(fi2) %s: %s[%lu, %lu]\n", gt_feature_node_get_type(fn), seqid, range.start, range.end);
}
gt_array_delete(seqfeats);
}
gt_str_array_delete(seqids);
gt_feature_index_delete(fi1);
gt_feature_index_delete(fi2);
gt_feature_index_delete(fi3);
gt_node_stream_delete(s2);
gt_node_stream_delete(s3);
gt_error_delete(error);
gt_lib_clean();
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment