Skip to content

Instantly share code, notes, and snippets.

@gaoxyt
Last active October 19, 2023 08:13
Show Gist options
  • Save gaoxyt/4506c10fc06b3501445e32c4257113e9 to your computer and use it in GitHub Desktop.
Save gaoxyt/4506c10fc06b3501445e32c4257113e9 to your computer and use it in GitHub Desktop.
PostgreSQL optimized CRC32C benchmark
// benchmark code
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <sys/time.h>
#include <memory.h>
#include "c.h"
#include "port/pg_crc32c.h"
uint64_t
GetTickCount()
{
struct timeval tv;
gettimeofday(&tv, NULL);
return tv.tv_sec * 1000000 + tv.tv_usec;
}
int
main()
{
#define CASE_CNT 2
uint32_t test_size[CASE_CNT] = {512, 1024 * 4};
for (int case_cnt = 0; case_cnt < CASE_CNT; case_cnt++)
{
uint8_t *buf = (uint8_t *) malloc(test_size[case_cnt] * sizeof(uint8_t));
srand(0);
for (int i = 0; i < test_size[case_cnt]; i++)
{
*(buf + i) = (uint8_t) (rand() % 256u);
}
static const uint32_t kLoop = 1024;
uint32_t crc = 0;
uint64_t start = GetTickCount();
INIT_CRC32C(crc);
for (int i = 0; i < kLoop; i++)
{
COMP_CRC32C(crc, buf, test_size[case_cnt]);
}
FIN_CRC32C(crc);
uint64_t stop = GetTickCount();
printf("data size is %d bytes, and compute crc cost %ld us totally, %f us per loop\n", test_size[case_cnt], stop - start, (double) (stop - start) / kLoop);
free(buf);
}
#undef CASE_CNT
return 0;
}
// compile
// compile postgres first with different crc32c implementation(use arm vmull_p64 or not)
// we should comment out some codes about elog in pg_crc32c_armv8_choose.c to compile correctly and simply.
$ gcc -I ../postgres/_install/include -I ../postgres/_install/include/server main.c -L ../postgres/build/src/port -l pgport_srv -O2 -o main
// result
// this test was run on Neoverse-N1
$ ./main.no_vmull
data size is 512 bytes, and compute crc cost 139 us totally, 0.135742 us per loop
data size is 4096 bytes, and compute crc cost 1061 us totally, 1.036133 us per loop
$ ./main.use_vmull
data size is 512 bytes, and compute crc cost 101 us totally, 0.098633 us per loop
data size is 4096 bytes, and compute crc cost 540 us totally, 0.527344 us per loop
We can see that the cost of computing crc32c without vmull_p64 is about two times than the cost that using vmull_p64 when data size is large. and the cost is almost same when data size is small.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment