Skip to content

Instantly share code, notes, and snippets.

@mr-eyes
Created May 12, 2020 15:39
Show Gist options
  • Save mr-eyes/a21f32cf5f622bb21c63f25178e7b688 to your computer and use it in GitHub Desktop.
Save mr-eyes/a21f32cf5f622bb21c63f25178e7b688 to your computer and use it in GitHub Desktop.
Kseq Paired End Fastx reader in chunks
// to compile: gcc this_prog.c -lz
#include <zlib.h>
#include <cstdio>
#include "kseq.h"
#include <iostream>
#include <vector>
KSEQ_INIT(gzFile, gzread)
using namespace std;
class kseqReader {
private:
bool END = false;
string R1_fastx, R2_fastx;
int chunk_size;
gzFile fp_r1, fp_r2;
kseq_t *seq_1, *seq_2;
public:
struct peRead {
string R1_seq;
string R2_seq;
string R1_name;
string R2_name;
};
std::vector<peRead> chunk_reads;
kseqReader(string R1File, string R2File, int chunk_size) {
this->chunk_size = chunk_size;
this->R1_fastx = R1File;
this->R2_fastx = R2File;
fp_r1 = gzopen(R1File.c_str(), "r");
fp_r2 = gzopen(R2File.c_str(), "r");
seq_1 = kseq_init(fp_r1);
seq_2 = kseq_init(fp_r2);
};
std::vector<peRead> *next_chunk() {
this->chunk_reads.clear();
for (int i = 0; i < this->chunk_size && ((kseq_read(seq_1)) >= 0 && (kseq_read(seq_2)) >= 0); i++) {
this->chunk_reads.emplace_back(peRead{seq_1->seq.s, seq_2->seq.s, seq_1->name.s, seq_2->name.s});
}
if ((int) this->chunk_reads.size() != this->chunk_size) {
this->END = true;
}
return &this->chunk_reads;
}
[[nodiscard]] bool end() const {
return this->END;
}
~kseqReader() {
kseq_destroy(this->seq_1);
kseq_destroy(this->seq_2);
gzclose(this->fp_r1);
gzclose(this->fp_r2);
}
};
int main(int argc, char *argv[]) {
if (argc < 4) {
fprintf(stderr, "Usage: %s <in_1.fasta> <in_2.fasta> <chunk_size>\n", argv[0]);
return 1;
}
string R1_file = argv[1];
string R2_file = argv[2];
int chunkSize = stoi(argv[3]);
auto *PEReader = new kseqReader(R1_file, R2_file, chunkSize);
while (!PEReader->end()) {
for (auto &PE : *PEReader->next_chunk()) {
cout << PE.R1_name << endl;
cout << PE.R1_seq << endl;
cout << endl;
}
}
delete PEReader;
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment