-
-
Save njbooher/000d9b57b4efff644199 to your computer and use it in GitHub Desktop.
ScoreBindingSites with shared memory and loading multiple chars at a time
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// chosen using occupancy spreadsheet | |
#define SCORE_THREADS_PER_BLOCK 448 | |
__device__ double ScoringMatrixVal(double *scoring_matrix, size_t pitch, unsigned int row, unsigned int column) { | |
return *((double*)((char*) scoring_matrix + row * pitch) + column); | |
} | |
__global__ void ScoreBindingSites(char *input_sequence, unsigned long is_length, unsigned int *rvd_sequence, unsigned int rs_len, double cutoff, unsigned int rvd_num, double *scoring_matrix, size_t sm_pitch, unsigned char *results) { | |
__shared__ char shared_seq[512]; | |
int block_seq_index = SCORE_THREADS_PER_BLOCK * (blockIdx.y * gridDim.x + blockIdx.x); | |
int thread_id = (blockDim.x * threadIdx.y) + threadIdx.x; | |
int seq_index = block_seq_index + thread_id; | |
int shared_seq_index = thread_id + 1; | |
char4 *shared_vector = (char4 *) shared_seq; | |
if (threadIdx.y < 4) { | |
// if (block_seq_index + (4 * thread_id) < is_length) | |
// shared_seq[4 * thread_id] = input_sequence[block_seq_index + 4 * thread_id]; | |
// if (block_seq_index + (4 * thread_id + 1) < is_length) | |
// shared_seq[4 * thread_id + 1] = input_sequence[block_seq_index + 4 * thread_id + 1]; | |
// if (block_seq_index + (4 * thread_id + 2) < is_length) | |
// shared_seq[4 * thread_id + 2] = input_sequence[block_seq_index + 4 * thread_id + 2]; | |
// if (block_seq_index + (4 * thread_id + 3) < is_length) | |
// shared_seq[4 * thread_id + 3] = input_sequence[block_seq_index + 4 * thread_id + 3]; | |
if (block_seq_index + (4 * (thread_id + 1)) <= is_length) { | |
shared_vector[thread_id] = *(char4 *)(input_sequence + block_seq_index + (4 * thread_id)); | |
} else { | |
if (block_seq_index + (4 * thread_id) < is_length) | |
shared_seq[4 * thread_id] = input_sequence[block_seq_index + 4 * thread_id]; | |
if (block_seq_index + (4 * thread_id + 1) < is_length) | |
shared_seq[4 * thread_id + 1] = input_sequence[block_seq_index + 4 * thread_id + 1]; | |
if (block_seq_index + (4 * thread_id + 2) < is_length) | |
shared_seq[4 * thread_id + 2] = input_sequence[block_seq_index + 4 * thread_id + 2]; | |
if (block_seq_index + (4 * thread_id + 3) < is_length) | |
shared_seq[4 * thread_id + 3] = input_sequence[block_seq_index + 4 * thread_id + 3]; | |
} | |
} | |
__syncthreads(); | |
if (seq_index < 1 || seq_index >= is_length || seq_index + rs_len >= is_length - 1) return; | |
char first = shared_seq[shared_seq_index - 1]; | |
char last = shared_seq[shared_seq_index + rs_len]; | |
bool first_t = first == 'T' || first == 't'; | |
bool last_a = last == 'A' || last == 'a'; | |
if (first_t || last_a) { | |
double thread_result_t = 0; | |
double thread_result_a = 0; | |
for (int i = 0; i < rs_len; i++) { | |
int sm_col_t = 4; | |
char base = shared_seq[shared_seq_index + i]; | |
if (base == 'A' || base == 'a') | |
sm_col_t = 0; | |
if (base == 'C' || base == 'c') | |
sm_col_t = 1; | |
if (base == 'G' || base == 'g') | |
sm_col_t = 2; | |
if (base == 'T' || base == 't') | |
sm_col_t = 3; | |
int rvd_index_t = i; | |
int rvd_index_a = rs_len - i - 1; | |
thread_result_t += ScoringMatrixVal(scoring_matrix, sm_pitch, rvd_sequence[rvd_index_t], sm_col_t); | |
int sm_col_a = (sm_col_t == 4 ? 4 : 3 - sm_col_t); | |
thread_result_a += ScoringMatrixVal(scoring_matrix, sm_pitch, rvd_sequence[rvd_index_a], sm_col_a); | |
} | |
if (first_t) | |
results[seq_index] |= (thread_result_t < cutoff ? 1UL : 0UL) << (2 * rvd_num); | |
if (last_a) | |
results[seq_index] |= (thread_result_a < cutoff ? 1UL : 0UL) << (2 * rvd_num + 1); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment