Skip to content

Instantly share code, notes, and snippets.

@lpereira
Last active January 6, 2017 16:06
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save lpereira/745bf7fda4e29c7aa027de1d51e5c651 to your computer and use it in GitHub Desktop.
Save lpereira/745bf7fda4e29c7aa027de1d51e5c651 to your computer and use it in GitHub Desktop.
% sudo cpupower frequency-set -g performance ~/git/benchmark/build
Setting cpu: 0
Setting cpu: 1
Setting cpu: 2
Setting cpu: 3
% ./test ~/git/simdswitch
Run on (4 X 2746.51 MHz CPU s)
2017-01-06 08:06:24
Benchmark Time CPU Iterations
---------------------------------------------------------------
prebuilt_mask_sse2_epi32 7 ns 7 ns 102876850
prebuilt_mask_sse2_epi8 8 ns 8 ns 88439940
simple_sse2 8 ns 8 ns 82075111
naive 436 ns 436 ns 1586377
bsearch 335 ns 335 ns 2092091
lwan 2 ns 2 ns 355762240
#include <benchmark/benchmark.h>
#include <emmintrin.h>
#include <immintrin.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
static const char* vec[] = {
"Accept",
"Authorization",
"Connection",
"Content-Type",
"Cookie",
"Host",
"If-Modified-Since",
"Range",
};
static __m128i mask[2];
static inline int _mm_movemask_epi32(__m128i a)
{
return _mm_movemask_ps(_mm_castsi128_ps(a));
}
static inline int string_as_int(const char* s)
{
int i;
memcpy(&i, s, sizeof(i));
return i;
}
static void build_select_string_mask(const char* vec[], int vec_len, __m128i mask[])
{
for (int j = 0, i = 0; i < vec_len; i += 4, j++) {
mask[j] = _mm_set_epi32(string_as_int(vec[i + 3]), string_as_int(vec[i + 2]),
string_as_int(vec[i + 1]), string_as_int(vec[i + 0]));
}
}
static int select_string_prebuilt_mask_epi32(const __m128i mask[], int n_mask, const char* s)
{
const __m128i look_mask = _mm_set1_epi32(string_as_int(s));
for (int i = 0; i < n_mask; i++) {
int cmp_mask = _mm_movemask_epi32(_mm_cmpeq_epi32(look_mask, mask[i]));
if (cmp_mask)
return (i * 4) + __builtin_ffs(cmp_mask) - 1;
}
return -1;
}
static int select_string_prebuilt_mask_epi8(const __m128i mask[], int n_mask, const char* s)
{
const __m128i look_mask = _mm_set1_epi32(string_as_int(s));
for (int i = 0; i < n_mask; i++) {
int cmp_mask = _mm_movemask_epi8(_mm_cmpeq_epi32(look_mask, mask[i]));
if (cmp_mask)
return (i * 4) + __builtin_ffs(cmp_mask) / 4;
}
return -1;
}
static int select_string(const char* vec[], int n_vec, const char* s)
{
const __m128i look_mask = _mm_set1_epi32(string_as_int(s));
for (int i = 0; i < n_vec; i += 4) {
__m128i curr_mask = _mm_set_epi32(string_as_int(vec[i + 3]),
string_as_int(vec[i + 2]), string_as_int(vec[i + 1]),
string_as_int(vec[i]));
int cmp_mask = _mm_movemask_epi32(_mm_cmpeq_epi32(look_mask, curr_mask));
if (cmp_mask)
return i + __builtin_ffs(cmp_mask) - 1;
}
return -1;
}
#define _mm256_set_m128i(hi, lo) _mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 0x1)
static int select_string_avx2(const char* vec[], int n_vec, const char* s)
{
const __m128i look_mask_128 = _mm_set1_epi32(string_as_int(s));
const __m256i look_mask = _mm256_set_m128i(look_mask_128, look_mask_128);
for (int i = 0; i < n_vec; i += 8) {
__m256i curr_mask = _mm256_setr_epi32(string_as_int(vec[i + 7]),
string_as_int(vec[i + 6]), string_as_int(vec[i + 5]),
string_as_int(vec[i + 4]), string_as_int(vec[i + 3]),
string_as_int(vec[i + 2]), string_as_int(vec[i + 1]),
string_as_int(vec[i]));
int cmp_mask = _mm256_movemask_epi8(_mm256_cmpeq_epi32(look_mask, curr_mask));
if (cmp_mask)
return (i * 8) + __builtin_ffs(cmp_mask) / 8;
}
return -1;
}
static int select_string_naive(const char* vec[], int n_vec, const char* s)
{
for (int i = 0; i < n_vec; i++) {
if (!strncmp(vec[i], s, 4))
return i;
}
return -1;
}
static inline int compar(const void* a, const void* b)
{
const char *aa = (const char *)a;
const char **bb = (const char **)b;
return strcmp(aa, *bb);
}
static int select_string_bsearch(const char* vec[], int n_vec, const char* s)
{
void* ret;
ret = bsearch(s, vec, n_vec, sizeof(vec[0]), compar);
if (!ret) {
return -1;
}
const char** c = (const char**)ret;
return c - vec;
}
static int select_string_lwan(const char* s)
{
#define MC(a, b, c, d) ((int)((a) | (b) << 8 | (c) << 16 | (d) << 24))
enum {
ACCEPT = MC('A', 'c', 'c', 'e'),
AUTHORIZ = MC('A', 'u', 't', 'h'),
CONNECTION = MC('C', 'o', 'n', 'n'),
CONTENT_TYPE = MC('C', 'o', 'n', 't'),
COOKIE = MC('C', 'o', 'o', 'k'),
HOST = MC('H', 'o', 's', 't'),
IF_MOD_SINCE = MC('I', 'f', '-', 'M'),
RANGE = MC('R', 'a', 'n', 'g'),
};
#undef MC
switch (string_as_int(s)) {
case ACCEPT: return 0;
case AUTHORIZ: return 1;
case CONNECTION: return 2;
case CONTENT_TYPE: return 3;
case COOKIE: return 4;
case HOST: return 5;
case IF_MOD_SINCE: return 6;
case RANGE: return 7;
}
return -1;
}
static void BM_SSE2_PrebuiltMask1(benchmark::State& state)
{
while (state.KeepRunning()) {
if (select_string_prebuilt_mask_epi32(mask, 2, "Content-Type") != 3) {
state.SkipWithError("search returned wrong value");
break;
}
if (select_string_prebuilt_mask_epi32(mask, 2, "Accept") != 0) {
state.SkipWithError("search returned wrong value");
break;
}
if (select_string_prebuilt_mask_epi32(mask, 2, "Foo-Bar") != -1) {
state.SkipWithError("search returned wrong value");
break;
}
if (select_string_prebuilt_mask_epi32(mask, 2, "Range") != 7) {
state.SkipWithError("search returned wrong value");
break;
}
if (select_string_prebuilt_mask_epi32(mask, 2, "Connection") != 2) {
state.SkipWithError("search returned wrong value");
break;
}
if (select_string_prebuilt_mask_epi32(mask, 2, "Host") != 5) {
state.SkipWithError("search returned wrong value");
break;
}
}
}
static void BM_SSE2_PrebuiltMask2(benchmark::State& state)
{
while (state.KeepRunning()) {
if (select_string_prebuilt_mask_epi8(mask, 2, "Content-Type") != 3) {
state.SkipWithError("search returned wrong value");
break;
}
if (select_string_prebuilt_mask_epi8(mask, 2, "Accept") != 0) {
state.SkipWithError("search returned wrong value");
break;
}
if (select_string_prebuilt_mask_epi8(mask, 2, "Foo-Bar") != -1) {
state.SkipWithError("search returned wrong value");
break;
}
if (select_string_prebuilt_mask_epi8(mask, 2, "Range") != 7) {
state.SkipWithError("search returned wrong value");
break;
}
if (select_string_prebuilt_mask_epi8(mask, 2, "Connection") != 2) {
state.SkipWithError("search returned wrong value");
break;
}
if (select_string_prebuilt_mask_epi8(mask, 2, "Host") != 5) {
state.SkipWithError("search returned wrong value");
break;
}
}
}
static void BM_SSE2_Simple(benchmark::State& state)
{
while (state.KeepRunning()) {
if (select_string(vec, 8, "Content-Type") != 3) {
state.SkipWithError("search returned wrong value");
break;
}
if (select_string(vec, 8, "Accept") != 0) {
state.SkipWithError("search returned wrong value");
break;
}
if (select_string(vec, 8, "Foo-Bar") != -1) {
state.SkipWithError("search returned wrong value");
break;
}
if (select_string(vec, 8, "Range") != 7) {
state.SkipWithError("search returned wrong value");
break;
}
if (select_string(vec, 8, "Connection") != 2) {
state.SkipWithError("search returned wrong value");
break;
}
if (select_string(vec, 8, "Host") != 5) {
state.SkipWithError("search returned wrong value");
break;
}
}
}
static void BM_AVX2_Simple(benchmark::State& state)
{
while (state.KeepRunning()) {
if (select_string_avx2(vec, 8, "Content-Type") != 3) {
state.SkipWithError("search returned wrong value");
break;
}
if (select_string_avx2(vec, 8, "Accept") != 0) {
state.SkipWithError("search returned wrong value");
break;
}
if (select_string_avx2(vec, 8, "Foo-Bar") != -1) {
state.SkipWithError("search returned wrong value");
break;
}
if (select_string_avx2(vec, 8, "Range") != 7) {
state.SkipWithError("search returned wrong value");
break;
}
if (select_string_avx2(vec, 8, "Connection") != 2) {
state.SkipWithError("search returned wrong value");
break;
}
if (select_string_avx2(vec, 8, "Host") != 5) {
state.SkipWithError("search returned wrong value");
break;
}
}
}
static void BM_Naive(benchmark::State& state)
{
while (state.KeepRunning()) {
if (select_string_naive(vec, 8, "Content-Type") != 3) {
state.SkipWithError("search returned wrong value");
break;
}
if (select_string_naive(vec, 8, "Accept") != 0) {
state.SkipWithError("search returned wrong value");
break;
}
if (select_string_naive(vec, 8, "Foo-Bar") != -1) {
state.SkipWithError("search returned wrong value");
break;
}
if (select_string_naive(vec, 8, "Range") != 7) {
state.SkipWithError("search returned wrong value");
break;
}
if (select_string_naive(vec, 8, "Connection") != 2) {
state.SkipWithError("search returned wrong value");
break;
}
if (select_string_naive(vec, 8, "Host") != 5) {
state.SkipWithError("search returned wrong value");
break;
}
}
}
static void BM_Lwan(benchmark::State& state)
{
while (state.KeepRunning()) {
if (select_string_lwan("Content-Type") != 3) {
state.SkipWithError("search returned wrong value");
break;
}
if (select_string_lwan("Accept") != 0) {
state.SkipWithError("search returned wrong value");
break;
}
if (select_string_lwan("Foo-Bar") != -1) {
state.SkipWithError("search returned wrong value");
break;
}
if (select_string_lwan("Range") != 7) {
state.SkipWithError("search returned wrong value");
break;
}
if (select_string_lwan("Connection") != 2) {
state.SkipWithError("search returned wrong value");
break;
}
if (select_string_lwan("Host") != 5) {
state.SkipWithError("search returned wrong value");
break;
}
}
}
static void BM_Bsearch(benchmark::State& state)
{
while (state.KeepRunning()) {
if (select_string_bsearch(vec, 8, "Content-Type") != 3) {
state.SkipWithError("search returned wrong value");
break;
}
if (select_string_bsearch(vec, 8, "Accept") != 0) {
state.SkipWithError("search returned wrong value");
break;
}
if (select_string_bsearch(vec, 8, "Foo-Bar") != -1) {
state.SkipWithError("search returned wrong value");
break;
}
if (select_string_bsearch(vec, 8, "Range") != 7) {
state.SkipWithError("search returned wrong value");
break;
}
if (select_string_bsearch(vec, 8, "Connection") != 2) {
state.SkipWithError("search returned wrong value");
break;
}
if (select_string_bsearch(vec, 8, "Host") != 5) {
state.SkipWithError("search returned wrong value");
break;
}
}
}
int main(int argc, char** argv)
{
__builtin_cpu_init();
build_select_string_mask(vec, 8, mask);
if (__builtin_cpu_supports("sse2")) {
benchmark::RegisterBenchmark("prebuilt_mask_sse2_epi32", BM_SSE2_PrebuiltMask1);
benchmark::RegisterBenchmark("prebuilt_mask_sse2_epi8", BM_SSE2_PrebuiltMask2);
benchmark::RegisterBenchmark("simple_sse2", BM_SSE2_Simple);
}
if (__builtin_cpu_supports("avx2")) {
benchmark::RegisterBenchmark("simple_avx", BM_AVX2_Simple);
}
benchmark::RegisterBenchmark("naive", BM_Naive);
benchmark::RegisterBenchmark("bsearch", BM_Bsearch);
benchmark::RegisterBenchmark("lwan", BM_Lwan);
benchmark::Initialize(&argc, argv);
benchmark::RunSpecifiedBenchmarks();
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment