Last active
December 15, 2015 08:29
-
-
Save hohoCode/5231592 to your computer and use it in GitHub Desktop.
SSE Code
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
inline item_bound sequential_SSE(int *sentence_num, unsigned long *bit_index, int shortlen, int short_start, int short_end, int longlen, int long_start, int long_end, int distance, int *res_sentence_num, unsigned long *res_bit_index, int *globalIndexStart) | |
{ | |
int sind = short_start, lind = long_start; | |
int shortLenReg = ((shortlen / 4)) * 4 - 4; | |
int longLenReg = ((longlen / 4)) * 4 - 4; | |
bool flager = true; | |
if (shortLenReg < 0 || longLenReg < 0) { | |
flager = false; | |
} else { | |
shortLenReg += short_start; | |
longLenReg += long_start; | |
} | |
int lend = sentence_num[longLenReg + 3]; | |
int send = sentence_num[shortLenReg + 3]; | |
int a_max = 0, b_max = 0, a_min = 0, b_min = 0; | |
int mask[4]; | |
int b; | |
unsigned long result_bit_index = 0; | |
int item = 0; | |
item_bound resultBoundary; | |
resultBoundary.low = *globalIndexStart; | |
resultBoundary.up = *globalIndexStart; | |
__m128i short_a; | |
__m128i long_b; | |
__m128i cmp_mask1; | |
__m128i cmp_mask2; | |
__m128i cmp_mask3; | |
__m128i cmp_mask4; | |
__m128i cmp_mask; | |
__m128i temper1; | |
__m128i temper2; | |
__m128i temper3; | |
__m128i temper4; | |
if (flager) { | |
//////////////Load 4 integers at one time, for two tokens sperately with SSE intrinstics - Vectorized codes. | |
short_a = _mm_load_si128((__m128i *) & sentence_num[sind]); | |
long_b = _mm_load_si128((__m128i *) & sentence_num[lind]); | |
a_max = _mm_extract_epi32(short_a, 3); | |
b_max = _mm_extract_epi32(long_b, 3); | |
do { | |
//////////////Iterative set the four integers, and do the comparisons on the basis of 4 ints. | |
temper1 = _mm_shuffle_epi32(long_b, _MM_SHUFFLE(0, 0, 0, 0)); | |
temper2 = _mm_shuffle_epi32(long_b, _MM_SHUFFLE(1, 1, 1, 1)); | |
temper3 = _mm_shuffle_epi32(long_b, _MM_SHUFFLE(2, 2, 2, 2)); | |
temper4 = _mm_shuffle_epi32(long_b, _MM_SHUFFLE(3, 3, 3, 3)); | |
cmp_mask4 = _mm_cmpeq_epi32(short_a, temper4); | |
cmp_mask3 = _mm_cmpeq_epi32(short_a, temper3); | |
cmp_mask2 = _mm_cmpeq_epi32(short_a, temper2); | |
cmp_mask1 = _mm_cmpeq_epi32(short_a, temper1); | |
cmp_mask = _mm_or_si128(_mm_or_si128(cmp_mask1, cmp_mask2), _mm_or_si128(cmp_mask3, cmp_mask4)); | |
unsigned int masker = _mm_movemask_ps((__m128) cmp_mask); | |
if (masker != 0) { | |
////////////////Move 128bits into 4 bits for results checking, | |
////////////////only sentences that can be merged is inside this if condition | |
mask[0] = _mm_movemask_ps((__m128) cmp_mask1); | |
mask[1] = _mm_movemask_ps((__m128) cmp_mask2); | |
mask[2] = _mm_movemask_ps((__m128) cmp_mask3); | |
mask[3] = _mm_movemask_ps((__m128) cmp_mask4); | |
for (int ccc = 0; ccc < 4; ccc++) { | |
if (mask[ccc] != 0) { | |
for (int bitpos = 0; bitpos < 4; ++bitpos) { | |
// extract the i-th bit | |
b = ((mask[ccc] >> bitpos) & 1); | |
if (b == 1) { | |
result_bit_index = 0; | |
result_bit_index = bit_index[sind + bitpos]; //short | |
result_bit_index = (result_bit_index >> 1) & bit_index[lind + ccc]; | |
if (result_bit_index != 0) { | |
if (bitpos == 3) { | |
item = a_max; | |
} else { | |
item = _mm_extract_epi32(short_a, bitpos); | |
} | |
///////////Puts results into this array once merges can be done | |
res_bit_index[*globalIndexStart] = result_bit_index; | |
res_sentence_num[*globalIndexStart] = item; | |
(*globalIndexStart)++; | |
} | |
} | |
} | |
} | |
} | |
} | |
if (a_max <= b_max) { | |
sind += 4; | |
short_a = _mm_load_si128((__m128i *) & sentence_num[sind]); | |
a_max = _mm_extract_epi32(short_a, 3); | |
} else { | |
lind += 4; | |
long_b = _mm_load_si128((__m128i *) & sentence_num[lind]); | |
b_max = _mm_extract_epi32(long_b, 3); | |
} | |
} | |
while (sind <= shortLenReg && lind <= longLenReg); | |
} | |
////////////////////Sequential - For the remaining parts that cannot have multiple of 4 integers. | |
int outer_starter; | |
int outer_ender; | |
int prev; | |
int inner_ender; | |
if (!flager) { | |
outer_starter = sind; | |
outer_ender = short_end; | |
prev = lind; | |
inner_ender = long_end; | |
} else { | |
if (lind == longLenReg + 4) { | |
outer_starter = lind; | |
outer_ender = long_end; | |
prev = sind; | |
inner_ender = short_end; | |
distance *= (-1); | |
} else if (sind == shortLenReg + 4) { | |
outer_starter = sind; | |
outer_ender = short_end; | |
prev = lind; | |
inner_ender = long_end; | |
} else { | |
cerr << "Can this be possible?!" << endl; | |
exit(0); | |
} | |
} | |
for (int i = outer_starter; i <= outer_ender; i++) { | |
item = sentence_num[i]; | |
for (int j = prev; j <= inner_ender; j++) { | |
prev = j; | |
if (item < sentence_num[j]) { | |
break; | |
} else if (item == sentence_num[j]) { | |
///Found it | |
result_bit_index = 0; | |
if (distance < 0) { | |
result_bit_index = bit_index[j]; //long - A | |
result_bit_index = (result_bit_index >> 1) & bit_index[i]; | |
} else { | |
result_bit_index = bit_index[i]; //short | |
result_bit_index = (result_bit_index >> 1) & bit_index[j]; | |
} | |
if (result_bit_index != 0) { | |
res_bit_index[*globalIndexStart] = result_bit_index; | |
res_sentence_num[*globalIndexStart] = item; | |
(*globalIndexStart)++; | |
} | |
break; | |
} | |
} | |
} | |
resultBoundary.up = *globalIndexStart; | |
return resultBoundary; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment