Skip to content

Instantly share code, notes, and snippets.

@karthick18
Created June 17, 2011 23:08
Show Gist options
  • Save karthick18/1032553 to your computer and use it in GitHub Desktop.
Save karthick18/1032553 to your computer and use it in GitHub Desktop.
Just an example to remind that its futile to free memory in the child to avoid taking a break COW perf. hit. Check the header comments for more details.
/*
* Just an example to remind that its futile to free memory in the child
* allocated by the parent to avoid taking a break COW perf. hit.
* Makes sense only to free large chunk sizes in the child. Smaller chunk sizes
* aren't really trimmed by malloc and only end up causing perf hits with
* break COW pages (copy on write) when freeing in the child.
* A break COW is when free results in malloc lib. touching the freed chunk
* of memory resulting in a write protection page fault for the child that ends up
* unmapping the shared page table entry and then maps a writable page copy to the child.
* The net effect for RSS(resident set size) is the same as in the parent but
* with an additional page fault overhead caused by freeing of the chunks.
* Hence no point in freeing the memory assuming we aren't dealing with
* large chunk sizes (common scenario).
* Chunk sizes preferably > 64k/128k are unmapped or trimmed by malloc
* and ends up in reduced RSS for the child on free which gives a slight benefit
* over not freeing the chunk in the child coz of the break COW overhead
* as shown by the example run below with:
* ./brk_cow -s 1m
*
* Special thanks to Gopal.V the great, a.k.a (@t3rmin4t0r) for pointing out
* that its futile to issue free in the child.
*
* Compile with: gcc -o brk_cow brk_cow.c -g -Wall
* Example usage for testing: ./brk_cow -h
*/
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <unistd.h>
#include <fcntl.h>
#include <assert.h>
#include <string.h>
#include <sys/wait.h>
#include <sys/mman.h>
#include <malloc.h>
#include <getopt.h>
#define CHUNK_SIZE_DEFAULT (32<<10)
#define __PAGE_SIZE (page_size)
#define __PAGE_SHIFT (page_shift)
#define NUM_CHILDS_DEFAULT (8)
#define NUM_ALLOCS_DEFAULT (1024)
static void **mem_pool;
static int page_size, page_shift;
struct memconfig
{
int allocs;
int chunk_size;
int childs;
int verbose;
int dont_free;
};
static struct memconfig memconfig;
#ifdef USE_MMAP
#undef malloc
#undef free
#define malloc xmalloc
#define calloc xcalloc
#define free xfree
static void *xmalloc(int size)
{
char *addr = mmap(0, size, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
return addr == MAP_FAILED ? NULL : (void*)addr;
}
static void *xcalloc(int num, int size) { return xmalloc(num *size); }
static void xfree(void *addr)
{
munmap(addr, memconfig.chunk_size);
}
#endif
#define GET_PAGE_FAULTS (1)
#define GET_RSS (2)
static long int get_pid_stat(int pid, int flags)
{
static char cmdbuf[80], data[512];
static int fd, bytes;
static long int min_flt, rss;
snprintf(cmdbuf, sizeof(cmdbuf), "/proc/%d/stat", pid);
/*
* Avoiding using the glibc file io to be safe against page faults during multiple runs
* of the routine. Also statics above to avoid page faults for stack.
*/
if((fd = open(cmdbuf, O_RDONLY)) >= 0)
{
if( (bytes = read(fd, data, sizeof(data))) > 0)
data[bytes] = 0;
close(fd);
switch(flags)
{
case GET_PAGE_FAULTS:
/*
* Get minor faults for anonymous pages.
*/
if(sscanf(data,
"%*d %*s %*c %*d %*d %*d %*d %*d %*u %lu" \
"%*u %*u %*u %*u %*u %*d %*d %*d %*d %*d %*d %*u %*u %*d %*u %*u %*u %*u %*u" \
"%*u %*u %*u %*u %*u %*u %*u %*u %*d %*d %*u %*u %*u %*u %*d",
&min_flt) != 1)
{
perror("sscanf:");
return 0;
}
return min_flt;
case GET_RSS:
if(sscanf(data,
"%*d %*s %*c %*d %*d %*d %*d %*d %*u %*u" \
"%*u %*u %*u %*u %*u %*d %*d %*d %*d %*d %*d %*u %*u %ld %*u %*u %*u %*u %*u" \
"%*u %*u %*u %*u %*u %*u %*u %*u %*d %*d %*u %*u %*u %*u %*d",
&rss) != 1)
{
perror("sscanf:");
return 0;
}
return rss;
default:
break;
}
}
return 0;
}
static void free_chunk(int start, int count)
{
register int i;
int pid = getpid();
long int min_flt = 0, min_flt2 = 0, rss = 0;
if(start + count > memconfig.allocs) return;
printf("Child [%d] freeing chunk [%d - %d], %d bytes\n", pid, start, start+count,
count * memconfig.chunk_size);
int c = 0;
min_flt = get_pid_stat(pid, GET_PAGE_FAULTS);
while(c++ < 2)
{
for(i = start; i < start+count; ++i)
{
if(c == 2 && !memconfig.dont_free) free(mem_pool[i]);
}
min_flt = min_flt2;
min_flt2 = get_pid_stat(pid, GET_PAGE_FAULTS);
}
printf("Faults on free mem for child [%d] = [%ld]\n", pid, min_flt2 - min_flt);
rss = get_pid_stat(pid, GET_RSS);
printf("IN-memory consumption for child [%d] = [%ld] kb\n", pid, rss << (__PAGE_SHIFT - 10));
if(memconfig.verbose)
malloc_stats();
}
static void spawn_children(void)
{
register int i;
int objs_per_child = memconfig.allocs/memconfig.childs;
int rem_objs = memconfig.allocs % objs_per_child;
for(i = 0; i < memconfig.childs; ++i)
{
int pid;
int start = i*objs_per_child;
int count = objs_per_child;
if(i+1 == memconfig.childs)
{
count += rem_objs;
}
switch( pid = fork())
{
case 0:
{
free_chunk(start, count);
exit(0);
}
break;
case -1:
continue;
default:
break;
}
}
while(wait(NULL) != -1 );
}
static void test_malloc(void)
{
register int i;
if(!mem_pool)
{
mem_pool = calloc(memconfig.allocs, sizeof(*mem_pool));
assert(mem_pool);
}
for(i = 0; i < memconfig.allocs; ++i)
{
mem_pool[i] = malloc(memconfig.chunk_size);
assert(mem_pool[i]);
memset(mem_pool[i], 0, memconfig.chunk_size); /*touch*/
}
printf("RSS for parent [%d] = [%ld] Kb\n", getpid(), get_pid_stat(getpid(), GET_RSS) << (__PAGE_SHIFT - 10));
spawn_children();
}
static void get_page_shift(void)
{
int i;
page_size = sysconf(_SC_PAGESIZE);
for(i = 0; (1 << i) < page_size; ++i);
page_shift = i;
}
static int get_chunk_size(const char *s)
{
char *e = NULL;
int chunk_size = (int)strtol(s, &e, 10);
int shift = 0;
switch(tolower(*e))
{
case 'k':
shift = 10;
break;
case 'm':
shift = 20;
break;
default:
break;
}
chunk_size <<= shift;
chunk_size += (page_size-1);
chunk_size &= ~(page_size-1);
return chunk_size;
}
static char *prog;
static void usage(void)
{
fprintf(stderr, "%s [OPTION]\n"
"-h\t\t usage\n"
"-a\t\t number of allocations\n"
"-s\t\t chunk size to allocate (examples: 32k, 512k, 1m)\n"
"-c\t\t children to spawn for freeing chunks\n"
"-v\t\t verbose mode that dumps malloc_stats\n"
"-d\t\t dont free the chunks in the child\n",
prog);
exit(1);
}
int main(int argc, char **argv)
{
int c;
opterr = 0;
get_page_shift();
if( (prog = strrchr(argv[0], '/') ) )
++prog;
else prog = argv[0];
while ( ( c = getopt(argc, argv, "c:a:s:vdh") ) != EOF )
switch(c)
{
case 'c':
memconfig.childs = atoi(optarg);
break;
case 's':
memconfig.chunk_size = get_chunk_size(optarg);
break;
case 'a':
memconfig.allocs = atoi(optarg);
break;
case 'v':
memconfig.verbose = 1;
break;
case 'd':
memconfig.dont_free = 1;
break;
case '?':
case 'h':
default:
usage();
}
if(optind != argc) usage();
if(!memconfig.allocs)
memconfig.allocs = NUM_ALLOCS_DEFAULT;
if(!memconfig.chunk_size)
memconfig.chunk_size = CHUNK_SIZE_DEFAULT;
if(!memconfig.childs)
memconfig.childs = NUM_CHILDS_DEFAULT;
printf("Page size [%d], childs [%d], chunk size [%d], allocs [%d], memory allocated [%d] bytes\n",
page_size, memconfig.childs, memconfig.chunk_size, memconfig.allocs, memconfig.allocs * memconfig.chunk_size);
test_malloc();
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment