This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
uint64_t neonmovemask_bulk(uint8x16_t p0, uint8x16_t p1, uint8x16_t p2, uint8x16_t p3) { | |
const uint8x16_t bitmask1 = { 0x01, 0x10, 0x01, 0x10, 0x01, 0x10, 0x01, 0x10, | |
0x01, 0x10, 0x01, 0x10, 0x01, 0x10, 0x01, 0x10}; | |
const uint8x16_t bitmask2 = { 0x02, 0x20, 0x02, 0x20, 0x02, 0x20, 0x02, 0x20, | |
0x02, 0x20, 0x02, 0x20, 0x02, 0x20, 0x02, 0x20}; | |
const uint8x16_t bitmask3 = { 0x04, 0x40, 0x04, 0x40, 0x04, 0x40, 0x04, 0x40, | |
0x04, 0x40, 0x04, 0x40, 0x04, 0x40, 0x04, 0x40}; | |
const uint8x16_t bitmask4 = { 0x08, 0x80, 0x08, 0x80, 0x08, 0x80, 0x08, 0x80, | |
0x08, 0x80, 0x08, 0x80, 0x08, 0x80, 0x08, 0x80}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
uint64_t neonmovemask_bulk(uint8x16_t p0, uint8x16_t p1, uint8x16_t p2, uint8x16_t p3) { | |
const uint8x16_t bitmask = { 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, | |
0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80}; | |
uint8x16_t t0 = vandq_u8(p0, bitmask); | |
uint8x16_t t1 = vandq_u8(p1, bitmask); | |
uint8x16_t t2 = vandq_u8(p2, bitmask); | |
uint8x16_t t3 = vandq_u8(p3, bitmask); | |
uint8x16_t sum0 = vpaddq_u8(t0, t1); | |
uint8x16_t sum1 = vpaddq_u8(t2, t3); | |
sum0 = vpaddq_u8(sum0, sum1); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//////////////////////////////////////////////////////////////////////////////////////////// | |
// Step 2: detect insides of quote pairs | |
//////////////////////////////////////////////////////////////////////////////////////////// | |
u64 quote_bits = cmp_mask_against_input(input_lo, input_hi, _mm256_set1_epi8('"')); | |
quote_bits = quote_bits & ~odd_ends; | |
dumpbits(quote_bits, "quote_bits"); | |
// pdep pattern is alternating 0 and 1 bits, starting with 0 or 1 depending on whether | |
// we're in a quote-pair from the previous iteration | |
u64 pdep_pattern = even_bits ^ prev_iter_inside_quote; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
really_inline uint64_t find_quote_mask_and_bits( | |
__m256i input_lo, __m256i input_hi, uint64_t odd_ends, | |
uint64_t &prev_iter_inside_quote, uint64_t "e_bits) { | |
quote_bits = | |
cmp_mask_against_input(input_lo, input_hi, _mm256_set1_epi8('"')); | |
quote_bits = quote_bits & ~odd_ends; | |
uint64_t quote_mask = _mm_cvtsi128_si64(_mm_clmulepi64_si128( | |
_mm_set_epi64x(0ULL, quote_bits), _mm_set1_epi8(0xFF), 0)); | |
quote_mask ^= prev_iter_inside_quote; | |
// right shift of a signed value expected to be well-defined and standard |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
template <typename T> | |
void match_multiple_smh(T & smh, std::vector<u8 *> & buffers, std::vector<size_t> & lengths, | |
std::vector<u32> & results) { | |
u32 i = 0; | |
#ifndef NO_UNROLL | |
for (; i+7 < buffers.size(); i+=8) { | |
results[i+0] = smh.match(buffers[i+0], lengths[i+0]); LFENCE | |
results[i+1] = smh.match(buffers[i+1], lengths[i+1]); LFENCE | |
results[i+2] = smh.match(buffers[i+2], lengths[i+2]); LFENCE | |
results[i+3] = smh.match(buffers[i+3], lengths[i+3]); LFENCE |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
template <typename T> | |
void match_multiple_smh_latency_test(T & smh, std::vector<u8 *> & buffers, std::vector<size_t> & lengths, | |
std::vector<u32> & results) { | |
u32 i = 0; | |
u32 tmp = 0; | |
#ifndef NO_UNROLL | |
// NOTE: experimental code only. Note that the addition of 'tmp' - being the id of a possible | |
// match - could take us RIGHT outside our buffer if we actually matched something. We aren't | |
// in this particular run, but so it goes. Saner would be to build up an all-zero id vector | |
for (; i+7 < buffers.size(); i+=8) { |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
109 111 117 115:101 0 0 0| 0 0 0 0: 0 0 0 0|109 111 117 115:101 0 0 0| 0 0 0 0: 0 0 0 0| input | |
0 1 2 3: 4 128 0 1| 2 3 4 128: 0 1 2 128| 0 1 2 128:128 128 128 128|128 128 128 128:128 128 128 128| shuf_mask | |
109 111 117 115:101 0 109 111|117 115 101 0:109 111 117 0|109 111 117 0: 0 0 0 0| 0 0 0 0: 0 0 0 0| shuf result | |
109 111 111 115:101 255 109 111|117 115 101 255: 99 97 116 255|100 111 103 255:255 255 255 255|255 255 255 255:255 255 255 255| cmp_mask | |
255 255 0 255:255 0 255 255|255 255 255 0: 0 0 0 0| 0 255 0 0: 0 0 0 0| 0 0 0 0: 0 0 0 0| cmp result | |
11_11_11111______1______________________________________________ input to gpr-smh | |
_____1_____1___1___1____________________________________________ hi | |
1_____1_____1___1_______________________________________________ low | |
__111______11___11______________________________________________ after_add |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
struct SIMD_SMH_PART { | |
m256 shuf_mask; | |
m256 cmp_mask; | |
m256 and_mask; // not yet used | |
m256 sub_mask; // not yet used | |
u32 doit(m256 d) { | |
return _mm256_movemask_epi8( | |
_mm256_cmpeq_epi8(_mm256_shuffle_epi8(d, shuf_mask), | |
cmp_mask)); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$ sudo nice --20 taskset -c 1 ./sheng | |
Sheng | |
0/1 1/1 2/1 3/1 4/1 5/1 6/1 7/1 8/1 9/1 | |
10/1 11/1 12/1 13/1 14/1 15/1 16/1 17/1 18/1 19/1 | |
20/1 21/1 22/1 23/1 24/1 25/2 26/3 27/4 28/5 29/5 | |
30/5 31/5 32/5 33/5 34/5 35/5 36/5 37/5 38/5 39/5 | |
40/5 41/5 42/5 43/5 44/5 45/5 46/5 47/5 48/5 49/5 | |
50/5 51/5 52/5 53/5 54/5 55/5 56/5 57/5 58/5 59/5 | |
60/6 61/7 62/8 63/9 64/10 65/1 66/1 67/1 68/1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
struct Sheng { | |
typedef m128 State; | |
m128 transitions[256]; | |
State start_state; | |
Sheng(std::vector<std::tuple<u32, u32, u8>> & trans_vec, u8 start_state_, u8 default_state) { | |
// fill all transitions with default state | |
for (u32 i = 0; i < 256; ++i) { | |
transitions[i] = _mm_set1_epi8(default_state); | |
} |
NewerOlder