Skip to content

Instantly share code, notes, and snippets.

@njbooher
Created July 9, 2012 23:39
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save njbooher/000d9b57b4efff644199 to your computer and use it in GitHub Desktop.
Save njbooher/000d9b57b4efff644199 to your computer and use it in GitHub Desktop.
ScoreBindingSites with shared memory and loading multiple chars at a time
// chosen using occupancy spreadsheet
#define SCORE_THREADS_PER_BLOCK 448
__device__ double ScoringMatrixVal(double *scoring_matrix, size_t pitch, unsigned int row, unsigned int column) {
return *((double*)((char*) scoring_matrix + row * pitch) + column);
}
__global__ void ScoreBindingSites(char *input_sequence, unsigned long is_length, unsigned int *rvd_sequence, unsigned int rs_len, double cutoff, unsigned int rvd_num, double *scoring_matrix, size_t sm_pitch, unsigned char *results) {
__shared__ char shared_seq[512];
int block_seq_index = SCORE_THREADS_PER_BLOCK * (blockIdx.y * gridDim.x + blockIdx.x);
int thread_id = (blockDim.x * threadIdx.y) + threadIdx.x;
int seq_index = block_seq_index + thread_id;
int shared_seq_index = thread_id + 1;
char4 *shared_vector = (char4 *) shared_seq;
if (threadIdx.y < 4) {
// if (block_seq_index + (4 * thread_id) < is_length)
// shared_seq[4 * thread_id] = input_sequence[block_seq_index + 4 * thread_id];
// if (block_seq_index + (4 * thread_id + 1) < is_length)
// shared_seq[4 * thread_id + 1] = input_sequence[block_seq_index + 4 * thread_id + 1];
// if (block_seq_index + (4 * thread_id + 2) < is_length)
// shared_seq[4 * thread_id + 2] = input_sequence[block_seq_index + 4 * thread_id + 2];
// if (block_seq_index + (4 * thread_id + 3) < is_length)
// shared_seq[4 * thread_id + 3] = input_sequence[block_seq_index + 4 * thread_id + 3];
if (block_seq_index + (4 * (thread_id + 1)) <= is_length) {
shared_vector[thread_id] = *(char4 *)(input_sequence + block_seq_index + (4 * thread_id));
} else {
if (block_seq_index + (4 * thread_id) < is_length)
shared_seq[4 * thread_id] = input_sequence[block_seq_index + 4 * thread_id];
if (block_seq_index + (4 * thread_id + 1) < is_length)
shared_seq[4 * thread_id + 1] = input_sequence[block_seq_index + 4 * thread_id + 1];
if (block_seq_index + (4 * thread_id + 2) < is_length)
shared_seq[4 * thread_id + 2] = input_sequence[block_seq_index + 4 * thread_id + 2];
if (block_seq_index + (4 * thread_id + 3) < is_length)
shared_seq[4 * thread_id + 3] = input_sequence[block_seq_index + 4 * thread_id + 3];
}
}
__syncthreads();
if (seq_index < 1 || seq_index >= is_length || seq_index + rs_len >= is_length - 1) return;
char first = shared_seq[shared_seq_index - 1];
char last = shared_seq[shared_seq_index + rs_len];
bool first_t = first == 'T' || first == 't';
bool last_a = last == 'A' || last == 'a';
if (first_t || last_a) {
double thread_result_t = 0;
double thread_result_a = 0;
for (int i = 0; i < rs_len; i++) {
int sm_col_t = 4;
char base = shared_seq[shared_seq_index + i];
if (base == 'A' || base == 'a')
sm_col_t = 0;
if (base == 'C' || base == 'c')
sm_col_t = 1;
if (base == 'G' || base == 'g')
sm_col_t = 2;
if (base == 'T' || base == 't')
sm_col_t = 3;
int rvd_index_t = i;
int rvd_index_a = rs_len - i - 1;
thread_result_t += ScoringMatrixVal(scoring_matrix, sm_pitch, rvd_sequence[rvd_index_t], sm_col_t);
int sm_col_a = (sm_col_t == 4 ? 4 : 3 - sm_col_t);
thread_result_a += ScoringMatrixVal(scoring_matrix, sm_pitch, rvd_sequence[rvd_index_a], sm_col_a);
}
if (first_t)
results[seq_index] |= (thread_result_t < cutoff ? 1UL : 0UL) << (2 * rvd_num);
if (last_a)
results[seq_index] |= (thread_result_a < cutoff ? 1UL : 0UL) << (2 * rvd_num + 1);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment