Skip to content

Instantly share code, notes, and snippets.

@depp
Created February 12, 2015 22:59
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save depp/00425610e265c93d5c2c to your computer and use it in GitHub Desktop.
Save depp/00425610e265c93d5c2c to your computer and use it in GitHub Desktop.
Copy memory, reversing bits

Benchmark

To test with 100M integers and 10 iterations, after compiling, run:

./run 100000000 10

On my system, which reports CPU as "Intel(R) Core(TM) i5-4258U CPU @ 2.40GHz" and uses Apple LLVM 6.0, I get the following numbers:

$ ./run 100000000 10
memcpy: 8434.601265 MB/s
copy_rev: 2470.059788 MB/s
copy_tbl: 1512.520074 MB/s
#include <stdlib.h>
#include <stdio.h>
#include <time.h>
#include <sys/time.h>
void
copy_rev(unsigned int *restrict dest,
unsigned int const *restrict src,
unsigned int n);
void
copy_fwd(unsigned int *restrict dest,
unsigned int const *restrict src,
unsigned int n);
void
copy_tbl(unsigned int *restrict dest,
unsigned int const *restrict src,
unsigned int n);
typedef void
(*copy_t)(unsigned *restrict,
unsigned const *restrict,
unsigned);
double
run(copy_t func, unsigned size, unsigned count)
{
struct timeval t1, t2;
unsigned *inp, *outp;
inp = malloc(size * sizeof(unsigned));
if (!inp)
abort();
outp = malloc(size * sizeof(unsigned));
if (!outp)
abort();
for (unsigned i = 0; i < size; i++)
inp[i] = ((unsigned) rand() << 16) | ((unsigned) rand() & 0xffff);
func(outp, inp, size);
gettimeofday(&t1, NULL);
for (unsigned i = 0; i < count; i++)
func(outp, inp, size);
gettimeofday(&t2, NULL);
free(inp);
free(outp);
double sec = (t2.tv_sec - t1.tv_sec) + (t2.tv_usec - t1.tv_usec) * 1e-6;
return ((double) size * count * sizeof(unsigned)) / sec;
}
int
main(int argc, char *argv[])
{
if (argc != 3)
abort();
unsigned size = strtol(argv[1], 0, 0);
unsigned count = strtol(argv[2], 0, 0);
if (!size || !count)
abort();
printf("memcpy: %f MB/s\n", run(copy_fwd, size, count) * 1e-6);
printf("copy_rev: %f MB/s\n", run(copy_rev, size, count) * 1e-6);
printf("copy_tbl: %f MB/s\n", run(copy_tbl, size, count) * 1e-6);
return 0;
}
run: test.o main.o
cc -o $@ $^
test.o: test.c
cc -Wall -Wextra -O3 -c $< -o $@
main.o: main.c
cc -Wall -Wextra -O2 -c $< -o $@
#include <string.h>
void
copy_rev(unsigned int *restrict dest,
unsigned int const *restrict src,
unsigned int n)
{
unsigned int i, x;
for (i = 0; i < n; ++i) {
x = src[i];
x = (x >> 16) | (x << 16);
x = ((x >> 8) & 0x00ff00ffU) | ((x & 0x00ff00ffU) << 8);
x = ((x >> 4) & 0x0f0f0f0fU) | ((x & 0x0f0f0f0fU) << 4);
x = ((x >> 2) & 0x33333333U) | ((x & 0x33333333U) << 2);
x = ((x >> 1) & 0x55555555U) | ((x & 0x555555555) << 1);
dest[n-1-i] = x;
}
}
void
copy_fwd(unsigned int *restrict dest,
unsigned int const *restrict src,
unsigned int n)
{
memcpy(dest, src, n * sizeof(unsigned int));
}
static const unsigned char TABLE[256] = {
0, 128, 64, 192, 32, 160, 96, 224,
16, 144, 80, 208, 48, 176, 112, 240,
8, 136, 72, 200, 40, 168, 104, 232,
24, 152, 88, 216, 56, 184, 120, 248,
4, 132, 68, 196, 36, 164, 100, 228,
20, 148, 84, 212, 52, 180, 116, 244,
12, 140, 76, 204, 44, 172, 108, 236,
28, 156, 92, 220, 60, 188, 124, 252,
2, 130, 66, 194, 34, 162, 98, 226,
18, 146, 82, 210, 50, 178, 114, 242,
10, 138, 74, 202, 42, 170, 106, 234,
26, 154, 90, 218, 58, 186, 122, 250,
6, 134, 70, 198, 38, 166, 102, 230,
22, 150, 86, 214, 54, 182, 118, 246,
14, 142, 78, 206, 46, 174, 110, 238,
30, 158, 94, 222, 62, 190, 126, 254,
1, 129, 65, 193, 33, 161, 97, 225,
17, 145, 81, 209, 49, 177, 113, 241,
9, 137, 73, 201, 41, 169, 105, 233,
25, 153, 89, 217, 57, 185, 121, 249,
5, 133, 69, 197, 37, 165, 101, 229,
21, 149, 85, 213, 53, 181, 117, 245,
13, 141, 77, 205, 45, 173, 109, 237,
29, 157, 93, 221, 61, 189, 125, 253,
3, 131, 67, 195, 35, 163, 99, 227,
19, 147, 83, 211, 51, 179, 115, 243,
11, 139, 75, 203, 43, 171, 107, 235,
27, 155, 91, 219, 59, 187, 123, 251,
7, 135, 71, 199, 39, 167, 103, 231,
23, 151, 87, 215, 55, 183, 119, 247,
15, 143, 79, 207, 47, 175, 111, 239,
31, 159, 95, 223, 63, 191, 127, 255
};
void
copy_tbl(unsigned int *restrict dest,
unsigned int const *restrict src,
unsigned int n)
{
unsigned char *op = (unsigned char *) dest;
unsigned char const *ip = (unsigned char const *) src;
for (unsigned i = 0; i < n * 4; i++) {
op[n*4-1-i] = TABLE[ip[i]];
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment