Created
October 1, 2012 20:51
-
-
Save xydrolase/3814322 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
inline kmer_pair_t *compute_delta(DOUBLE *ptr_ups_alpha, DOUBLE *norm_trans_p, | |
DOUBLE emit_dens, DOUBLE *delta, kmer_pair_t *kpair) | |
{ | |
#ifdef __AVX__ | |
/* yay! */ | |
#endif | |
#ifdef __SSE3__ | |
DOUBLE idx_argmax; | |
/* transition prob */ | |
register __m128d x_tp = _mm_load_pd(norm_trans_p); | |
/* upstream alpha's */ | |
register __m128d x_pa = _mm_load_pd(ptr_ups_alpha); | |
/* compute 1st part of recursion */ | |
register __m128d x_delta = _mm_mul_pd(x_tp, x_pa); | |
register __m128d x_emit = _mm_set1_pd(emit_dens); | |
x_tp = _mm_load_pd(norm_trans_p + 2); | |
x_pa = _mm_load_pd(ptr_ups_alpha + 2); | |
/* compute 2nd part of recursion */ | |
__m128d x_delta_tg = _mm_mul_pd(x_tp, x_pa); | |
/* find the two largest delta's */ | |
register __m128d x_ord_mask = _mm_cmpord_pd(x_delta, x_delta_tg); | |
register __m128d x_max = _mm_add_pd( | |
_mm_and_pd(x_ord_mask, x_delta), | |
_mm_andnot_pd(x_ord_mask, x_delta_tg)); | |
register __m128d x_argmax = _mm_add_pd( | |
_mm_and_pd(x_ord_mask, _mm_set_pd(1.0, 0.0)), /* A/C */ | |
_mm_andnot_pd(x_ord_mask, _mm_set_pd(3.0, 2.0))); | |
register __m128d x_max_sr = (__m128d) _mm_srli_pd((__m128i) x_max, 8); | |
register __m128d x_argmax_sr = (__m128d) _mm_srli_pd( | |
(__m128i) x_argmax, 8); | |
x_ord_mask = _mm_cmpord_pd( | |
(__m128d) _mm_srli_si128((__m128i) x_max, 8), x_max); | |
x_max = _mm_add_pd(_mm_and_pd(x_ord_mask, x_max_sr), | |
_mm_andnot_pd(x_ord_mask, x_max)); | |
x_argmax = _mm_add_pd(_mm_and_pd(x_ord_mask, x_argmax_sr), | |
_mm_andnot_pd(x_ord_mask, x_argmax)); | |
_mm_storel_pd(delta, x_max); | |
_mm_storel_pd(&idx_argmax, x_argmax); | |
return (kmer_pair_t *) ((intptr_t) kpair + (int) idx_argmax); | |
#else | |
/* legacy code without speedup */ | |
#endif | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment