|
#include <pthread.h> |
|
#include <stdio.h> |
|
#include <stdlib.h> |
|
#include <string.h> |
|
|
|
#define KMER "CGTCCCTGT" // 286 hits in soldata.txt |
|
#define KMER_SIZE 9 |
|
|
|
#define NUM_THREADS 6 |
|
#define N_READS_TO_LOAD 1000000 |
|
#define READ_SIZE 36 // 35 + \n |
|
|
|
// This is the code for the worker |
|
// In this case, just count how many times you see a kmer |
|
void *look(int n_reads, int tid, char **reads, int *t_count) |
|
{ |
|
char save; |
|
int i,j; |
|
for (i=0; i!=n_reads; ++i) { |
|
#ifdef HAVE_PTHREAD |
|
if (i % NUM_THREADS != tid) continue; |
|
#endif |
|
for (j=0; j<READ_SIZE-KMER_SIZE; ++j) { |
|
save = reads[i][j+KMER_SIZE]; |
|
reads[i][j+KMER_SIZE] = '\0'; |
|
if (strcmp(KMER, &reads[i][j]) == 0) *t_count += 1; |
|
reads[i][j+KMER_SIZE] = save; |
|
} |
|
} |
|
return 0; |
|
} |
|
|
|
/* Load data into buffer for thread t (if there is more data in file) */ |
|
int load_to_buffer(char **b) |
|
{ |
|
int nrr = 0; // number of reads read |
|
size_t linecap = 0; |
|
int bytes_read = 0; |
|
while (nrr < N_READS_TO_LOAD && |
|
(bytes_read = getline(&b[nrr], &linecap, stdin)) > 0) { |
|
if (bytes_read == -1) { |
|
fprintf(stderr, "ERROR: reading data from input stream. (nrr=%d)\n", nrr); |
|
exit(1); |
|
} |
|
else |
|
nrr++; |
|
} |
|
return nrr; |
|
} |
|
|
|
#ifdef HAVE_PTHREAD |
|
typedef struct { |
|
int tid; |
|
char **reads; |
|
int count; |
|
int n_reads; |
|
} thread_data; |
|
|
|
static void *worker(void *data) |
|
{ |
|
thread_data *d = (thread_data*) data; |
|
look(d->n_reads, d->tid, d->reads, &d->count); |
|
return 0; |
|
} |
|
#endif |
|
|
|
int main(int argc, char *argv[]) |
|
{ |
|
char **buffer; |
|
{ // request mem for buffer of reads |
|
int j; |
|
buffer = (char **) calloc(sizeof(char *), N_READS_TO_LOAD); |
|
for (j=0; j<N_READS_TO_LOAD; ++j) |
|
buffer[j] = (char *) calloc(sizeof(char), READ_SIZE); |
|
} |
|
|
|
int total = 0; // total number of hits seen for kmer |
|
int n_rr; // n_rr: number of reads read |
|
int total_n_reads = 0; // total number of reads processed |
|
while((n_rr = load_to_buffer(buffer)) != 0) { |
|
total_n_reads += n_rr; |
|
#ifdef HAVE_PTHREAD |
|
if (NUM_THREADS == 1) { |
|
int i_total = 0; |
|
look(n_rr, 0, buffer, &i_total); |
|
total += i_total; |
|
} |
|
else { |
|
pthread_t *tid; |
|
pthread_attr_t attr; |
|
thread_data *data; |
|
int j; |
|
pthread_attr_init(&attr); |
|
pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); |
|
data = (thread_data*) calloc(NUM_THREADS, sizeof(thread_data)); |
|
tid = (pthread_t*) calloc(NUM_THREADS , sizeof(pthread_t)); |
|
for (j=0; j<NUM_THREADS; ++j) { |
|
data[j].n_reads = n_rr; |
|
data[j].tid = j; |
|
data[j].reads = buffer; |
|
data[j].count = 0; |
|
pthread_create(&tid[j], &attr, worker, data+j); |
|
} |
|
// Now run them |
|
for (j=0; j<NUM_THREADS; ++j) pthread_join(tid[j], 0); |
|
for (j=0; j<NUM_THREADS; ++j) total += data[j].count; // save hits |
|
free(data); free(tid); |
|
} |
|
#else |
|
look(n_rr, 0, buffer, &total); |
|
#endif |
|
fprintf(stderr, "> # of lines processed: %d\n", total_n_reads); |
|
} |
|
|
|
printf("# lines processed: %d, hits: %d\n", total_n_reads, total); // Dump # of times we have seen the kmer in input stream. |
|
|
|
{ // Release mem |
|
int j; |
|
for (j=0; j<N_READS_TO_LOAD; ++j) |
|
free(buffer[j]); |
|
free(buffer); |
|
} |
|
|
|
return 0; |
|
} |
|
|
Hello drio, I'm testing your code but it's throwing the following error, /root/Dropbox/git_repo/rGenotype/data/soldata.txt: file or directory not found, could you upload soldata.txt?. Regards.