Skip to content

Instantly share code, notes, and snippets.

@xydrolase
Created October 1, 2012 20:51
Show Gist options
  • Save xydrolase/3814322 to your computer and use it in GitHub Desktop.
Save xydrolase/3814322 to your computer and use it in GitHub Desktop.
inline kmer_pair_t *compute_delta(DOUBLE *ptr_ups_alpha, DOUBLE *norm_trans_p,
DOUBLE emit_dens, DOUBLE *delta, kmer_pair_t *kpair)
{
#ifdef __AVX__
/* yay! */
#endif
#ifdef __SSE3__
DOUBLE idx_argmax;
/* transition prob */
register __m128d x_tp = _mm_load_pd(norm_trans_p);
/* upstream alpha's */
register __m128d x_pa = _mm_load_pd(ptr_ups_alpha);
/* compute 1st part of recursion */
register __m128d x_delta = _mm_mul_pd(x_tp, x_pa);
register __m128d x_emit = _mm_set1_pd(emit_dens);
x_tp = _mm_load_pd(norm_trans_p + 2);
x_pa = _mm_load_pd(ptr_ups_alpha + 2);
/* compute 2nd part of recursion */
__m128d x_delta_tg = _mm_mul_pd(x_tp, x_pa);
/* find the two largest delta's */
register __m128d x_ord_mask = _mm_cmpord_pd(x_delta, x_delta_tg);
register __m128d x_max = _mm_add_pd(
_mm_and_pd(x_ord_mask, x_delta),
_mm_andnot_pd(x_ord_mask, x_delta_tg));
register __m128d x_argmax = _mm_add_pd(
_mm_and_pd(x_ord_mask, _mm_set_pd(1.0, 0.0)), /* A/C */
_mm_andnot_pd(x_ord_mask, _mm_set_pd(3.0, 2.0)));
register __m128d x_max_sr = (__m128d) _mm_srli_pd((__m128i) x_max, 8);
register __m128d x_argmax_sr = (__m128d) _mm_srli_pd(
(__m128i) x_argmax, 8);
x_ord_mask = _mm_cmpord_pd(
(__m128d) _mm_srli_si128((__m128i) x_max, 8), x_max);
x_max = _mm_add_pd(_mm_and_pd(x_ord_mask, x_max_sr),
_mm_andnot_pd(x_ord_mask, x_max));
x_argmax = _mm_add_pd(_mm_and_pd(x_ord_mask, x_argmax_sr),
_mm_andnot_pd(x_ord_mask, x_argmax));
_mm_storel_pd(delta, x_max);
_mm_storel_pd(&idx_argmax, x_argmax);
return (kmer_pair_t *) ((intptr_t) kpair + (int) idx_argmax);
#else
/* legacy code without speedup */
#endif
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment