Created
January 30, 2014 19:39
-
-
Save standage/8717134 to your computer and use it in GitHub Desktop.
Working through issues with GenomeTools feature streams and memory mangement.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
##gff-version 3 | |
##sequence-region seq01 1 900 | |
##sequence-region seq02 1 900 | |
##sequence-region seq03 1 900 | |
##sequence-region seq04 1 900 | |
##sequence-region seq05 1 900 | |
##sequence-region seq06 1 900 | |
##sequence-region seq07 1 2000 | |
##sequence-region seq08 1 2000 | |
##sequence-region seq09 1 2000 | |
##sequence-region seq10 1 2000 | |
##sequence-region seq11 1 2000 | |
##sequence-region seq12 1 2000 | |
##sequence-region seq13 1 1500 | |
##sequence-region seq14 1 1500 | |
##sequence-region seq15 1 1500 | |
##sequence-region seq16 1 1500 | |
##sequence-region seq17 1 1500 | |
##sequence-region seq18 1 1500 | |
##sequence-region seq19 1 1500 | |
##sequence-region seq20 1 1001 | |
##sequence-region seq21 1 1000 | |
##sequence-region seq22 1 999 | |
##sequence-region seq23 1 801 | |
##sequence-region seq24 1 800 | |
##sequence-region seq25 1 799 | |
# | |
seq01 nano mRNA 400 600 . + . ID=test1.1 | |
seq02 nano mRNA 401 600 . + . ID=test1.2 | |
seq03 nano mRNA 402 600 . + . ID=test1.3 | |
seq04 nano mRNA 200 600 . + . ID=test1.4 | |
seq05 nano mRNA 201 600 . + . ID=test1.5 | |
seq06 nano mRNA 202 600 . + . ID=test1.6 | |
# | |
seq07 nano mRNA 400 600 . + . ID=test2.1a | |
seq07 nano mRNA 1202 1400 . + . ID=test2.1b | |
seq08 nano mRNA 400 600 . + . ID=test2.2a | |
seq08 nano mRNA 1201 1400 . + . ID=test2.2b | |
seq09 nano mRNA 400 600 . + . ID=test2.3a | |
seq09 nano mRNA 1200 1400 . + . ID=test2.3b | |
# | |
seq10 nano mRNA 400 600 . + . ID=test3.1a | |
seq10 nano mRNA 1002 1300 . + . ID=test3.1b | |
seq11 nano mRNA 400 600 . + . ID=test3.2a | |
seq11 nano mRNA 1001 1300 . + . ID=test3.2b | |
seq12 nano mRNA 400 600 . + . ID=test3.3a | |
seq12 nano mRNA 1000 1300 . + . ID=test3.3b | |
# | |
seq13 nano mRNA 400 600 . + . ID=test4.1a | |
seq13 nano mRNA 803 1100 . + . ID=test4.1b | |
seq14 nano mRNA 400 600 . + . ID=test4.2a | |
seq14 nano mRNA 802 1100 . + . ID=test4.2b | |
seq15 nano mRNA 400 600 . + . ID=test4.3a | |
seq15 nano mRNA 801 1100 . + . ID=test4.3b | |
seq16 nano mRNA 400 600 . + . ID=test4.4a | |
seq16 nano mRNA 800 1100 . + . ID=test4.4b | |
# | |
seq17 nano mRNA 400 600 . + . ID=test5.1a | |
seq17 nano mRNA 606 1000 . + . ID=test5.1b | |
seq18 nano mRNA 400 600 . + . ID=test5.2a | |
seq18 nano mRNA 602 1000 . + . ID=test5.2b | |
seq19 nano mRNA 400 600 . + . ID=test5.3a | |
seq19 nano mRNA 601 1000 . + . ID=test5.3b | |
# | |
seq20 nano mRNA 400 600 . + . ID=test6.1 | |
seq21 nano mRNA 400 600 . + . ID=test6.2 | |
seq22 nano mRNA 400 600 . + . ID=test6.3 | |
seq23 nano mRNA 400 600 . + . ID=test6.4 | |
seq24 nano mRNA 400 600 . + . ID=test6.5 | |
seq25 nano mRNA 400 600 . + . ID=test6.6 | |
# |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include "genometools.h" | |
// gcc -Wall -g -I /usr/local/include/genometools/ -DWITHOUT_CAIRO -o streamtest streamtest.c -lgenometools | |
// valgrind ./streamtest streamtest-data.gff3 25 > test.txt 2> test.log | |
int main(int argc, const char **argv) | |
{ | |
const char *infile; | |
GtError *error; | |
GtFeatureIndex *fi1, *fi2, *fi3; | |
GtNodeStream *gff3, *s1, *s2, *s3; | |
GtStrArray *seqids; | |
GtUword i, numseqs; | |
gt_lib_init(); | |
gt_assert(argc > 2); | |
infile = argv[1]; | |
numseqs = atol(argv[2]); | |
error = gt_error_new(); | |
gff3 = gt_gff3_in_stream_new_unsorted(1, &infile); | |
gt_gff3_in_stream_check_id_attributes((GtGFF3InStream *)gff3); | |
gt_gff3_in_stream_enable_tidy_mode((GtGFF3InStream *)gff3); | |
fi1 = gt_feature_index_memory_new(); | |
s1 = gt_feature_out_stream_new(gff3, fi1); | |
gt_node_stream_pull(s1, error); | |
if(gt_error_is_set(error)) | |
fprintf(stderr, "error processing node stream: %s\n", gt_error_get(error)); | |
gt_node_stream_delete(gff3); | |
gt_node_stream_delete(s1); | |
fi2 = gt_feature_index_memory_new(); | |
seqids = gt_feature_index_get_seqids(fi1, error); | |
gt_assert(gt_str_array_size(seqids) == numseqs); | |
for(i = 0; i < numseqs; i++) | |
{ | |
const char *seqid = gt_str_array_get(seqids, i); | |
GtStr *seqidstr = gt_str_new_cstr(seqid); | |
GtRange seqrange; | |
gt_feature_index_get_orig_range_for_seqid(fi1, &seqrange, seqid, error); | |
GtGenomeNode *region = gt_region_node_new(seqidstr, seqrange.start, seqrange.end); | |
GtRegionNode *rn = gt_region_node_cast(region); | |
gt_feature_index_add_region_node(fi2, rn, error); | |
gt_genome_node_delete(region); | |
gt_str_delete(seqidstr); | |
GtArray *seqfeats = gt_feature_index_get_features_for_seqid(fi1, seqid, error); | |
while(gt_array_size(seqfeats) > 0) | |
{ | |
GtGenomeNode **gn = gt_array_pop(seqfeats); | |
GtFeatureNode *fn = gt_feature_node_cast(*gn); | |
GtRange range = gt_genome_node_get_range(*gn); | |
gt_feature_index_add_feature_node(fi2, fn, error); | |
printf("(fi1) %s: %s[%lu, %lu]\n", gt_feature_node_get_type(fn), seqid, range.start, range.end); | |
} | |
gt_array_delete(seqfeats); | |
} | |
gt_str_array_delete(seqids); | |
fi3 = gt_feature_index_memory_new(); | |
s2 = gt_feature_in_stream_new(fi2); | |
s3 = gt_feature_out_stream_new(s2, fi3); | |
gt_node_stream_pull(s3, error); | |
if(gt_error_is_set(error)) | |
fprintf(stderr, "error processing node stream: %s\n", gt_error_get(error)); | |
seqids = gt_feature_index_get_seqids(fi3, error); | |
gt_assert(gt_str_array_size(seqids) == numseqs); | |
for(i = 0; i < numseqs; i++) | |
{ | |
const char *seqid = gt_str_array_get(seqids, i); | |
GtArray *seqfeats = gt_feature_index_get_features_for_seqid(fi3, seqid, error); | |
while(gt_array_size(seqfeats) > 0) | |
{ | |
GtGenomeNode **gn = gt_array_pop(seqfeats); | |
GtFeatureNode *fn = gt_feature_node_cast(*gn); | |
GtRange range = gt_genome_node_get_range(*gn); | |
printf("(fi2) %s: %s[%lu, %lu]\n", gt_feature_node_get_type(fn), seqid, range.start, range.end); | |
} | |
gt_array_delete(seqfeats); | |
} | |
gt_str_array_delete(seqids); | |
gt_feature_index_delete(fi1); | |
gt_feature_index_delete(fi2); | |
gt_feature_index_delete(fi3); | |
gt_node_stream_delete(s2); | |
gt_node_stream_delete(s3); | |
gt_error_delete(error); | |
gt_lib_clean(); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment