Skip to content

Instantly share code, notes, and snippets.

@huyang531

huyang531/main.c Secret

Created July 17, 2024 00:17
Show Gist options
  • Save huyang531/28a31fc2d9f348fafb5b9d8e6c9493d5 to your computer and use it in GitHub Desktop.
Save huyang531/28a31fc2d9f348fafb5b9d8e6c9493d5 to your computer and use it in GitHub Desktop.
cache-scratch: A Multi-threaded Memory Allocation Stress Test Ported to Unikraft
#include <stdio.h>
/* Import user configuration: */
#ifdef __Unikraft__
#include <uk/config.h>
#endif /* __Unikraft__ */
/////////////////////////////////////////////////////////////////////
//
// Hoard: A Fast, Scalable, and Memory-Efficient Allocator
// for Shared-Memory Multiprocessors
// Contact author: Emery Berger, http://www.cs.umass.edu/~emery
//
// Copyright (c) 1998-2003, The University of Texas at Austin.
//
// This library is free software; you can redistribute it and/or modify
// it under the terms of the GNU Library General Public License as
// published by the Free Software Foundation, http://www.fsf.org.
//
// This library is distributed in the hope that it will be useful, but
// WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Library General Public License for more details.
//
//////////////////////////////////////////////////////////////////////////////
/**
* @file cache-scratch.c
*
* cache-scratch is a benchmark that exercises a heap's cache-locality.
* An allocator that allows multiple threads to re-use the same small
* object (possibly all in one cache-line) will scale poorly, while
* an allocator like Hoard will exhibit near-linear scaling.
*
* Try the following (on a P-processor machine):
*
* cache-scratch 1 1000 1 1000000
* cache-scratch P 1000 1 1000000
*
* cache-scratch-hoard 1 1000 1 1000000
* cache-scratch-hoard P 1000 1 1000000
*
* The ideal is a P-fold speedup.
*/
#include <uk/assert.h>
#include <uk/print.h>
#include <stdio.h>
#include <stdlib.h>
#include <stddef.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <string.h>
#include <sys/syscall.h>
#include <sys/sysinfo.h>
#include <stdio.h>
#include <unistd.h>
#include <time.h>
#include <stdint.h>
#define _GNU_SOURCE
#include <sched.h>
#include <pthread.h>
/////////// Helper variables ///////////
#define DSEG_MAX 256*1024*1024 /* 256 Mb */
char *dseg_lo = NULL, *dseg_hi = NULL;
long dseg_size; /* Maximum size of data segment */
static int page_size;
/* Align pointer to closest page boundary downwards */
#define PAGE_ALIGN(p) ((void *)(((unsigned long)(p) / page_size) * page_size))
/* Align pointer to closest page boundary upwards */
#define PAGE_ALIGN_UP(p) ((void *)((((unsigned long)(p) + page_size - 1) / page_size) * page_size))
///////// Helper function definitions //////////
double timespec_diff(struct timespec *start, struct timespec *end);
void initialize_pthread_attr(int detachstate, int schedpolicy, int priority,
int inheritsched, int scope, pthread_attr_t *attr);
int getNumProcessors(void);
void setCPU (int n);
/**
* TODO: Use these custom memory management functions to collect more stats.
*/
int mem_init (void);
void *mem_sbrk (ptrdiff_t increment);
int mem_pagesize (void);
ptrdiff_t mem_usage (void);
// This struct just holds arguments to each thread.
struct workerArg {
char * _object;
int _objSize;
int _iterations;
int _repetitions;
int _cpu;
};
extern void * worker (void * arg)
{
// free the object we were given.
// Then, repeatedly do the following:
// malloc a given-sized object,
// repeatedly write on it,
// then free it.
int i, j, k; /* Loop control variables */
struct workerArg * w = (struct workerArg *) arg;
setCPU(w->_cpu);
free(w->_object);
uk_pr_debug("Worker thread %d starting to iterate %d times\n", w->_cpu, w->_iterations);
for (i = 0; i < w->_iterations; i++) {
// Allocate the object.
char * obj = (char *)malloc(w->_objSize);
// Write into it a bunch of times.
for (j = 0; j < w->_repetitions; j++) {
for (k = 0; k < w->_objSize; k++) {
obj[k] = (char) k;
volatile char ch = obj[k];
ch++;
}
}
// Free the object.
free(obj);
}
uk_pr_debug("Worker thread %d finished iterations\n", w->_cpu);
free(w);
return NULL;
}
int main (int argc, char * argv[]) {
int nthreads;
int iterations;
int objSize;
int repetitions;
pthread_attr_t attr;
int numCPU;
int i;
char **objs;
struct timespec start_time;
struct timespec end_time;
if (argc > 4) {
nthreads = atoi(argv[1]);
iterations = atoi(argv[2]);
objSize = atoi(argv[3]);
repetitions = atoi(argv[4]);
} else {
fprintf (stderr, "Usage: %s nthreads iterations objSize repetitions\n", argv[0]);
return 1;
}
/* Declare threads[] array here after nthreads is set, so we
* can use stack-allocated space for the array.
*/
pthread_t threads[nthreads];
numCPU = getNumProcessors();
// Allocate nthreads objects and distribute them among the threads.
objs = (char **)malloc(nthreads * sizeof(char *));
for (i = 0; i < nthreads; i++) {
objs[i] = (char *)malloc(objSize);
}
initialize_pthread_attr(PTHREAD_CREATE_JOINABLE, SCHED_RR, -10, PTHREAD_EXPLICIT_SCHED,
PTHREAD_SCOPE_SYSTEM, &attr);
/* Get the starting time */
clock_gettime(CLOCK_MONOTONIC_RAW, &start_time);
for (i = 0; i < nthreads; i++) {
struct workerArg * w = (struct workerArg *)malloc(sizeof(struct workerArg));
w->_object = objs[i];
w->_objSize = objSize;
w->_repetitions = repetitions / nthreads;
w->_iterations = iterations;
w->_cpu = (i+1)%numCPU;
pthread_create(&threads[i], &attr, &worker, (void *)w);
uk_pr_debug("Created worker thread %d\n", i);
}
for (i = 0; i < nthreads; i++) {
uk_pr_debug("Waiting for worker thread %d\n", i);
pthread_join(threads[i], NULL);
uk_pr_debug("Worker thread %d finished\n", i);
}
/* Get the finish time */
clock_gettime(CLOCK_MONOTONIC_RAW, &end_time);
double t = timespec_diff(&start_time, &end_time);
free(objs);
printf ("Time elapsed = %f seconds\n", t);
printf ("Memory used = %ld bytes\n",mem_usage());
return 0;
}
/////////////// Following are helper functions ////////////////////
double timespec_diff(struct timespec *start, struct timespec *end) {
struct timespec diff;
diff.tv_nsec = end->tv_nsec - start->tv_nsec;
diff.tv_sec = end->tv_sec - start->tv_sec;
if (diff.tv_nsec < 0) {
if (diff.tv_sec == 0) {
return 0.0;
}
/* Move 1 second from seconds to nanoseconds */
diff.tv_sec -= 1;
diff.tv_nsec += 1000000000L;
}
return (double)(diff.tv_sec + (double)diff.tv_nsec/1000000000.0);
}
/* Set thread attributes */
void initialize_pthread_attr(int detachstate, int schedpolicy, int priority,
int inheritsched, int scope, pthread_attr_t *attr)
{
pthread_attr_init(attr);
pthread_attr_setdetachstate(attr, detachstate);
if (inheritsched == PTHREAD_EXPLICIT_SCHED) {
pthread_attr_setschedpolicy(attr, schedpolicy);
struct sched_param p;
p.sched_priority = priority;
pthread_attr_setschedparam(attr, &p);
}
pthread_attr_setscope(attr, scope);
}
/*
* This function should be more complicated to try and avoid a call to the
* C library malloc() routine embedded in the Linux sysconf() call.
* However, here we can allow a call to malloc() before the
* main test starts.
*/
int getNumProcessors (void)
{
static int np = 0;
if (!np) {
np = sysconf(_SC_NPROCESSORS_ONLN);
}
return np;
}
void setCPU (int n) {
/* Set CPU affinity to CPU n only. */
pid_t tid = gettid();
cpu_set_t mask;
CPU_ZERO(&mask);
CPU_SET(n, &mask);
if (sched_setaffinity(tid, sizeof(cpu_set_t), &mask) != 0) {
perror("sched_setaffinity failed");
}
}
int mem_init (void)
{
/* Get system page size */
page_size = (int) getpagesize();
/* Allocate heap */
dseg_lo = (char *) malloc(DSEG_MAX + 2*page_size);
if (!dseg_lo)
return -1;
/* align heap to the next page boundary */
dseg_lo = (char *) PAGE_ALIGN_UP(dseg_lo);
dseg_hi = dseg_lo-1;
dseg_size = DSEG_MAX;
return 0;
}
void *mem_sbrk (ptrdiff_t increment)
{
char *new_hi = dseg_hi + increment;
char *old_hi = dseg_hi;
long dseg_cursize = dseg_hi - dseg_lo + 1;
UK_ASSERT(increment > 0);
/* Resize data segment, if the memory is available */
if (new_hi > dseg_lo + dseg_size)
return NULL;
dseg_hi = new_hi;
dseg_cursize = dseg_hi - dseg_lo + 1;
return (void *)(old_hi + 1);
}
int mem_pagesize (void)
{
return page_size;
}
ptrdiff_t mem_usage (void)
{
/* hack for libc */
if (dseg_lo != NULL && dseg_hi == NULL) {
dseg_hi = sbrk(0);
}
return dseg_hi - dseg_lo;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment