View neon-pmovmskb-interleaved
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
uint64_t neonmovemask_bulk(uint8x16_t p0, uint8x16_t p1, uint8x16_t p2, uint8x16_t p3) { | |
const uint8x16_t bitmask1 = { 0x01, 0x10, 0x01, 0x10, 0x01, 0x10, 0x01, 0x10, | |
0x01, 0x10, 0x01, 0x10, 0x01, 0x10, 0x01, 0x10}; | |
const uint8x16_t bitmask2 = { 0x02, 0x20, 0x02, 0x20, 0x02, 0x20, 0x02, 0x20, | |
0x02, 0x20, 0x02, 0x20, 0x02, 0x20, 0x02, 0x20}; | |
const uint8x16_t bitmask3 = { 0x04, 0x40, 0x04, 0x40, 0x04, 0x40, 0x04, 0x40, | |
0x04, 0x40, 0x04, 0x40, 0x04, 0x40, 0x04, 0x40}; | |
const uint8x16_t bitmask4 = { 0x08, 0x80, 0x08, 0x80, 0x08, 0x80, 0x08, 0x80, | |
0x08, 0x80, 0x08, 0x80, 0x08, 0x80, 0x08, 0x80}; |
View neon-pmovmskb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
uint64_t neonmovemask_bulk(uint8x16_t p0, uint8x16_t p1, uint8x16_t p2, uint8x16_t p3) { | |
const uint8x16_t bitmask = { 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, | |
0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80}; | |
uint8x16_t t0 = vandq_u8(p0, bitmask); | |
uint8x16_t t1 = vandq_u8(p1, bitmask); | |
uint8x16_t t2 = vandq_u8(p2, bitmask); | |
uint8x16_t t3 = vandq_u8(p3, bitmask); | |
uint8x16_t sum0 = vpaddq_u8(t0, t1); | |
uint8x16_t sum1 = vpaddq_u8(t2, t3); | |
sum0 = vpaddq_u8(sum0, sum1); |
View old-quote-detection
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//////////////////////////////////////////////////////////////////////////////////////////// | |
// Step 2: detect insides of quote pairs | |
//////////////////////////////////////////////////////////////////////////////////////////// | |
u64 quote_bits = cmp_mask_against_input(input_lo, input_hi, _mm256_set1_epi8('"')); | |
quote_bits = quote_bits & ~odd_ends; | |
dumpbits(quote_bits, "quote_bits"); | |
// pdep pattern is alternating 0 and 1 bits, starting with 0 or 1 depending on whether | |
// we're in a quote-pair from the previous iteration | |
u64 pdep_pattern = even_bits ^ prev_iter_inside_quote; |
View quotes-fragment.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
really_inline uint64_t find_quote_mask_and_bits( | |
__m256i input_lo, __m256i input_hi, uint64_t odd_ends, | |
uint64_t &prev_iter_inside_quote, uint64_t "e_bits) { | |
quote_bits = | |
cmp_mask_against_input(input_lo, input_hi, _mm256_set1_epi8('"')); | |
quote_bits = quote_bits & ~odd_ends; | |
uint64_t quote_mask = _mm_cvtsi128_si64(_mm_clmulepi64_si128( | |
_mm_set_epi64x(0ULL, quote_bits), _mm_set1_epi8(0xFF), 0)); | |
quote_mask ^= prev_iter_inside_quote; | |
// right shift of a signed value expected to be well-defined and standard |
View smh_throughput.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
template <typename T> | |
void match_multiple_smh(T & smh, std::vector<u8 *> & buffers, std::vector<size_t> & lengths, | |
std::vector<u32> & results) { | |
u32 i = 0; | |
#ifndef NO_UNROLL | |
for (; i+7 < buffers.size(); i+=8) { | |
results[i+0] = smh.match(buffers[i+0], lengths[i+0]); LFENCE | |
results[i+1] = smh.match(buffers[i+1], lengths[i+1]); LFENCE | |
results[i+2] = smh.match(buffers[i+2], lengths[i+2]); LFENCE | |
results[i+3] = smh.match(buffers[i+3], lengths[i+3]); LFENCE |
View smh_latency.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
template <typename T> | |
void match_multiple_smh_latency_test(T & smh, std::vector<u8 *> & buffers, std::vector<size_t> & lengths, | |
std::vector<u32> & results) { | |
u32 i = 0; | |
u32 tmp = 0; | |
#ifndef NO_UNROLL | |
// NOTE: experimental code only. Note that the addition of 'tmp' - being the id of a possible | |
// match - could take us RIGHT outside our buffer if we actually matched something. We aren't | |
// in this particular run, but so it goes. Saner would be to build up an all-zero id vector | |
for (; i+7 < buffers.size(); i+=8) { |
View gist:2b873b7b166cafadfba164e58323b73e
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
109 111 117 115:101 0 0 0| 0 0 0 0: 0 0 0 0|109 111 117 115:101 0 0 0| 0 0 0 0: 0 0 0 0| input | |
0 1 2 3: 4 128 0 1| 2 3 4 128: 0 1 2 128| 0 1 2 128:128 128 128 128|128 128 128 128:128 128 128 128| shuf_mask | |
109 111 117 115:101 0 109 111|117 115 101 0:109 111 117 0|109 111 117 0: 0 0 0 0| 0 0 0 0: 0 0 0 0| shuf result | |
109 111 111 115:101 255 109 111|117 115 101 255: 99 97 116 255|100 111 103 255:255 255 255 255|255 255 255 255:255 255 255 255| cmp_mask | |
255 255 0 255:255 0 255 255|255 255 255 0: 0 0 0 0| 0 255 0 0: 0 0 0 0| 0 0 0 0: 0 0 0 0| cmp result | |
11_11_11111______1______________________________________________ input to gpr-smh | |
_____1_____1___1___1____________________________________________ hi | |
1_____1_____1___1_______________________________________________ low | |
__111______11___11______________________________________________ after_add |
View smh good bits
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
struct SIMD_SMH_PART { | |
m256 shuf_mask; | |
m256 cmp_mask; | |
m256 and_mask; // not yet used | |
m256 sub_mask; // not yet used | |
u32 doit(m256 d) { | |
return _mm256_movemask_epi8( | |
_mm256_cmpeq_epi8(_mm256_shuffle_epi8(d, shuf_mask), | |
cmp_mask)); | |
} |
View sheng-output
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$ sudo nice --20 taskset -c 1 ./sheng | |
Sheng | |
0/1 1/1 2/1 3/1 4/1 5/1 6/1 7/1 8/1 9/1 | |
10/1 11/1 12/1 13/1 14/1 15/1 16/1 17/1 18/1 19/1 | |
20/1 21/1 22/1 23/1 24/1 25/2 26/3 27/4 28/5 29/5 | |
30/5 31/5 32/5 33/5 34/5 35/5 36/5 37/5 38/5 39/5 | |
40/5 41/5 42/5 43/5 44/5 45/5 46/5 47/5 48/5 49/5 | |
50/5 51/5 52/5 53/5 54/5 55/5 56/5 57/5 58/5 59/5 | |
60/6 61/7 62/8 63/9 64/10 65/1 66/1 67/1 68/1 |
View sheng-gist
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
struct Sheng { | |
typedef m128 State; | |
m128 transitions[256]; | |
State start_state; | |
Sheng(std::vector<std::tuple<u32, u32, u8>> & trans_vec, u8 start_state_, u8 default_state) { | |
// fill all transitions with default state | |
for (u32 i = 0; i < 256; ++i) { | |
transitions[i] = _mm_set1_epi8(default_state); | |
} |
NewerOlder