Skip to content

Instantly share code, notes, and snippets.

@karthick18
Created July 28, 2010 19:42
Show Gist options
  • Save karthick18/495979 to your computer and use it in GitHub Desktop.
Save karthick18/495979 to your computer and use it in GitHub Desktop.
/*
* DONT USE strncpy
* Compile with -m32 if on x86_64
* Moral of the story: If you have large buffers to be strncpy'ed,
* dont use it and use a strncat by zeroing off the first byte in the destination. which is
* atleast 2x faster based on the bytes left to be zeroed by strncpy to the destination.
* In short, dont use strncpy :-)
*/
#define _GNU_SOURCE
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
#include <sched.h>
#include <sys/mman.h>
#include <assert.h>
/*
* poor mans profiler. cpu tsc for x86 though would be skewed with cpufreq sched,etc.
* but still better for approximations.
*/
#define rdtsc(x) do { asm __volatile__("rdtsc":"=A"(x)::); }while(0)
/*
* flush instr. cacheline before rdtsc
*/
#define barrier() do { asm __volatile__("cpuid":::"memory"); }while(0)
#ifdef FAKE_STRNCPY
#define strncpy xstrncat
static int fake = 1;
#else
#define strncpy xstrncpy
static int fake;
#endif
/*
* From linux kernel arch/x86/lib/string_32.c
*/
#ifndef FAKE_STRNCPY
char *xstrncpy(char *dest, const char *src, size_t count)
{
int d0, d1, d2, d3;
asm volatile("1:\tdecl %2\n\t"
"js 2f\n\t"
"lodsb\n\t"
"stosb\n\t"
"testb %%al,%%al\n\t"
"jne 1b\n\t"
"rep\n\t"
"stosb\n"
"2:"
: "=&S" (d0), "=&D" (d1), "=&c" (d2), "=&a" (d3)
: "0" (src), "1" (dest), "2" (count) : "memory");
return dest;
}
#else
char *xstrncat(char *dest, const char *src, size_t count)
{
int d0, d1, d2, d3;
asm volatile("repne\n\t"
"scasb\n\t"
"decl %1\n\t"
"movl %8,%3\n"
"1:\tdecl %3\n\t"
"js 2f\n\t"
"lodsb\n\t"
"stosb\n\t"
"testb %%al,%%al\n\t"
"jne 1b\n"
"2:\txorl %2,%2\n\t"
"stosb"
: "=&S" (d0), "=&D" (d1), "=&a" (d2), "=&c" (d3)
: "0" (src), "1" (dest), "2" (0), "3" (0xffffffffu), "g" (count)
: "memory");
return dest;
}
#endif
int main(int argc, char **argv)
{
unsigned long long A=0,B=0,t,avg = 0,min=~0LL, max=0;
cpu_set_t set;
int samples=10,byte_range=256, pagesize = getpagesize();
char *d, *s;
register int i;
CPU_ZERO(&set);
CPU_SET(0, &set); /*lock proc. to cpu zero to avoid large skews in rdtsc output*/
assert(sched_setaffinity(0, sizeof set, &set) == 0);
if(argc > 1)
samples = atoi(argv[1]);
if(argc > 2)
{
byte_range = atoi(argv[2]);
if(!byte_range) byte_range = 8;
}
if(samples < 10)
samples = 10;
byte_range += 7;
byte_range &= ~7;
if(byte_range > pagesize)
byte_range = pagesize;
/*
* Lock the zero pages so we don't let page-faults get in
*/
d = mmap(0, byte_range, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
s = mmap(0, byte_range, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
assert(d != MAP_FAILED && s != MAP_FAILED);
mlock(d, byte_range);
mlock(s, byte_range);
memset(s, 0xa5, byte_range >> 2); /* copy 1/4th of the byte range into src.*/
for(i = 0; i < samples; ++i)
{
barrier();
*d = 0;
rdtsc(A);
strncpy(d, s, byte_range);
rdtsc(B);
barrier();
avg+=(t = B-A);
if(t < min) min = t;
if(t > max) max = t;
}
printf("samples taken [%d], byte range [%d], Min [%s] time [%lld], max [%lld], avg [%.3f]\n",
samples, byte_range, fake ? "strncat":"strncpy",
min, max, avg*1.0/samples);
return 0;
}
/*
* Local variables:
* c-file-style: "linux"
* c-basic-offset: 4
* tab-width: 4
* compile-command: "gcc -m32 -Wall -g -o strncpy strncpy.c"
* End:
*/
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment