huyang531/main.c Secret

## main.c
#include <stdio.h>

/* Import user configuration: */
#ifdef __Unikraft__
#include <uk/config.h>
#endif /* __Unikraft__ */

/////////////////////////////////////////////////////////////////////
//
// Hoard: A Fast, Scalable, and Memory-Efficient Allocator
//        for Shared-Memory Multiprocessors
// Contact author: Emery Berger, http://www.cs.umass.edu/~emery
//
// Copyright (c) 1998-2003, The University of Texas at Austin.
//
// This library is free software; you can redistribute it and/or modify
// it under the terms of the GNU Library General Public License as
// published by the Free Software Foundation, http://www.fsf.org.
//
// This library is distributed in the hope that it will be useful, but
// WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
// Library General Public License for more details.
//
//////////////////////////////////////////////////////////////////////////////

/**
 * @file cache-scratch.c
 *
 * cache-scratch is a benchmark that exercises a heap's cache-locality.
 * An allocator that allows multiple threads to re-use the same small
 * object (possibly all in one cache-line) will scale poorly, while
 * an allocator like Hoard will exhibit near-linear scaling.
 *
 * Try the following (on a P-processor machine):
 *
 *  cache-scratch 1 1000 1 1000000
 *  cache-scratch P 1000 1 1000000
 *
 *  cache-scratch-hoard 1 1000 1 1000000
 *  cache-scratch-hoard P 1000 1 1000000
 *
 *  The ideal is a P-fold speedup.
*/

#include <uk/assert.h>
#include <uk/print.h>

#include <stdio.h>
#include <stdlib.h>
#include <stddef.h>

#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <string.h>

#include <sys/syscall.h>
#include <sys/sysinfo.h>
#include <stdio.h>
#include <unistd.h>

#include <time.h>
#include <stdint.h>

#define _GNU_SOURCE
#include <sched.h>
#include <pthread.h>


/////////// Helper variables ///////////
#define DSEG_MAX 256*1024*1024  /* 256 Mb */
char *dseg_lo = NULL, *dseg_hi = NULL;
long dseg_size;  /* Maximum size of data segment */

static int page_size;

/* Align pointer to closest page boundary downwards */
#define PAGE_ALIGN(p)    ((void *)(((unsigned long)(p) / page_size) * page_size))
/* Align pointer to closest page boundary upwards */
#define PAGE_ALIGN_UP(p) ((void *)((((unsigned long)(p) + page_size - 1) / page_size) * page_size))


///////// Helper function definitions //////////
double timespec_diff(struct timespec *start, struct timespec *end);
void initialize_pthread_attr(int detachstate, int schedpolicy, int priority,
				    int inheritsched, int scope, pthread_attr_t *attr);
int getNumProcessors(void);
void setCPU (int n);
/**
 * TODO: Use these custom memory management functions to collect more stats.
 */
int mem_init (void);
void *mem_sbrk (ptrdiff_t increment);
int mem_pagesize (void);
ptrdiff_t mem_usage (void);

// This struct just holds arguments to each thread.
struct workerArg {
  char * _object;
  int _objSize;
  int _iterations;
  int _repetitions;
  int _cpu;
};

extern void * worker (void * arg)
{
  // free the object we were given.
  // Then, repeatedly do the following:
  //   malloc a given-sized object,
  //   repeatedly write on it,
  //   then free it.

  int i, j, k; /* Loop control variables */

  struct workerArg * w = (struct workerArg *) arg;
  setCPU(w->_cpu);

  free(w->_object);

  uk_pr_debug("Worker thread %d starting to iterate %d times\n", w->_cpu, w->_iterations);
  for (i = 0; i < w->_iterations; i++) {
    // Allocate the object.
    char * obj = (char *)malloc(w->_objSize);
    // Write into it a bunch of times.
    for (j = 0; j < w->_repetitions; j++) {
      for (k = 0; k < w->_objSize; k++) {
	obj[k] = (char) k;
	volatile char ch = obj[k];
	ch++;
      }
    }
    // Free the object.
    free(obj);
  }
  uk_pr_debug("Worker thread %d finished iterations\n", w->_cpu);
  free(w);

  return NULL;
}


int main (int argc, char * argv[]) {
	int nthreads;
	int iterations;
	int objSize;
	int repetitions;
	pthread_attr_t attr;
	int numCPU;
	int i;
	char **objs;
	struct timespec start_time;
	struct timespec end_time;

	if (argc > 4) {
		nthreads = atoi(argv[1]);
		iterations = atoi(argv[2]);
		objSize = atoi(argv[3]);
		repetitions = atoi(argv[4]);
	} else {
		fprintf (stderr, "Usage: %s nthreads iterations objSize repetitions\n", argv[0]);
		return 1;
	}

	/* Declare threads[] array here after nthreads is set, so we
	 * can use stack-allocated space for the array.
	 */
	pthread_t threads[nthreads];

	numCPU = getNumProcessors();

	// Allocate nthreads objects and distribute them among the threads.
	objs = (char **)malloc(nthreads * sizeof(char *));
	for (i = 0; i < nthreads; i++) {
		objs[i] = (char *)malloc(objSize);
	}

	initialize_pthread_attr(PTHREAD_CREATE_JOINABLE, SCHED_RR, -10, PTHREAD_EXPLICIT_SCHED,
				PTHREAD_SCOPE_SYSTEM, &attr);

	/* Get the starting time */
	clock_gettime(CLOCK_MONOTONIC_RAW, &start_time);

	for (i = 0; i < nthreads; i++) {
		struct workerArg * w = (struct workerArg *)malloc(sizeof(struct workerArg));
		w->_object = objs[i];
		w->_objSize = objSize;
		w->_repetitions = repetitions / nthreads;
		w->_iterations = iterations;
		w->_cpu = (i+1)%numCPU;
		pthread_create(&threads[i], &attr, &worker, (void *)w);
		uk_pr_debug("Created worker thread %d\n", i);
	}

	for (i = 0; i < nthreads; i++) {
		uk_pr_debug("Waiting for worker thread %d\n", i);
		pthread_join(threads[i], NULL);
		uk_pr_debug("Worker thread %d finished\n", i);
	}

	/* Get the finish time */
	clock_gettime(CLOCK_MONOTONIC_RAW, &end_time);

	double t = timespec_diff(&start_time, &end_time);

	free(objs);

	printf ("Time elapsed = %f seconds\n", t);
	printf ("Memory used = %ld bytes\n",mem_usage());
	return 0;
}

/////////////// Following are helper functions ////////////////////
double timespec_diff(struct timespec *start, struct timespec *end) {
	struct timespec diff;
	diff.tv_nsec = end->tv_nsec - start->tv_nsec;
	diff.tv_sec = end->tv_sec - start->tv_sec;
	if (diff.tv_nsec < 0) {
		if (diff.tv_sec == 0) {
			return 0.0;
		}
		/* Move 1 second from seconds to nanoseconds */
		diff.tv_sec -= 1;
		diff.tv_nsec +=  1000000000L;
	}
	return (double)(diff.tv_sec + (double)diff.tv_nsec/1000000000.0);
}

/* Set thread attributes */

void initialize_pthread_attr(int detachstate, int schedpolicy, int priority,
				    int inheritsched, int scope, pthread_attr_t *attr)
{
	pthread_attr_init(attr);
	pthread_attr_setdetachstate(attr, detachstate);
	if (inheritsched == PTHREAD_EXPLICIT_SCHED) {
		pthread_attr_setschedpolicy(attr, schedpolicy);
		struct sched_param p;
		p.sched_priority = priority;
		pthread_attr_setschedparam(attr, &p);
	}
	pthread_attr_setscope(attr, scope);
}

/*
 * This function should be more complicated to try and avoid a call to the
 * C library malloc() routine embedded in the Linux sysconf() call.
 * However, here we can allow a call to malloc() before the
 * main test starts.
 */
int getNumProcessors (void)
{
	static int np = 0;
	if (!np) {
		np = sysconf(_SC_NPROCESSORS_ONLN);
	}
	return np;
}

void setCPU (int n) {
	/* Set CPU affinity to CPU n only. */
	pid_t tid = gettid();
	cpu_set_t mask;
	CPU_ZERO(&mask);
	CPU_SET(n, &mask);
	if (sched_setaffinity(tid, sizeof(cpu_set_t), &mask) != 0) {
		perror("sched_setaffinity failed");
	}
}


int mem_init (void)
{

    /* Get system page size */
    page_size = (int) getpagesize();

    /* Allocate heap */
    dseg_lo = (char *) malloc(DSEG_MAX + 2*page_size);
    if (!dseg_lo)
        return -1;

    /* align heap to the next page boundary */
    dseg_lo = (char *) PAGE_ALIGN_UP(dseg_lo);
    dseg_hi = dseg_lo-1;
    dseg_size = DSEG_MAX;


    return 0;
}

void *mem_sbrk (ptrdiff_t increment)
{
    char *new_hi = dseg_hi + increment;
    char *old_hi = dseg_hi;
    long dseg_cursize = dseg_hi - dseg_lo + 1;

    UK_ASSERT(increment > 0);

    /* Resize data segment, if the memory is available */
    if (new_hi > dseg_lo + dseg_size)
        return NULL;
    dseg_hi = new_hi;
    dseg_cursize = dseg_hi - dseg_lo + 1;

    return (void *)(old_hi + 1);
}

int mem_pagesize (void)
{
    return page_size;
}

ptrdiff_t mem_usage (void)
{
  /* hack for libc */
  if (dseg_lo != NULL && dseg_hi == NULL) {
    dseg_hi = sbrk(0);
  }
  return dseg_hi - dseg_lo;
}
	#include <stdio.h>

	/* Import user configuration: */
	#ifdef __Unikraft__
	#include <uk/config.h>
	#endif /* __Unikraft__ */

	/////////////////////////////////////////////////////////////////////
	//
	// Hoard: A Fast, Scalable, and Memory-Efficient Allocator
	// for Shared-Memory Multiprocessors
	// Contact author: Emery Berger, http://www.cs.umass.edu/~emery
	//
	// Copyright (c) 1998-2003, The University of Texas at Austin.
	//
	// This library is free software; you can redistribute it and/or modify
	// it under the terms of the GNU Library General Public License as
	// published by the Free Software Foundation, http://www.fsf.org.
	//
	// This library is distributed in the hope that it will be useful, but
	// WITHOUT ANY WARRANTY; without even the implied warranty of
	// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	// Library General Public License for more details.
	//
	//////////////////////////////////////////////////////////////////////////////

	/**
	* @file cache-scratch.c
	*
	* cache-scratch is a benchmark that exercises a heap's cache-locality.
	* An allocator that allows multiple threads to re-use the same small
	* object (possibly all in one cache-line) will scale poorly, while
	* an allocator like Hoard will exhibit near-linear scaling.
	*
	* Try the following (on a P-processor machine):
	*
	* cache-scratch 1 1000 1 1000000
	* cache-scratch P 1000 1 1000000
	*
	* cache-scratch-hoard 1 1000 1 1000000
	* cache-scratch-hoard P 1000 1 1000000
	*
	* The ideal is a P-fold speedup.
	*/

	#include <uk/assert.h>
	#include <uk/print.h>

	#include <stdio.h>
	#include <stdlib.h>
	#include <stddef.h>

	#include <sys/types.h>
	#include <sys/stat.h>
	#include <fcntl.h>
	#include <string.h>

	#include <sys/syscall.h>
	#include <sys/sysinfo.h>
	#include <stdio.h>
	#include <unistd.h>

	#include <time.h>
	#include <stdint.h>

	#define _GNU_SOURCE
	#include <sched.h>
	#include <pthread.h>


	/////////// Helper variables ///////////
	#define DSEG_MAX 25610241024 /* 256 Mb */
	char dseg_lo = NULL, dseg_hi = NULL;
	long dseg_size; /* Maximum size of data segment */

	static int page_size;

	/* Align pointer to closest page boundary downwards */
	#define PAGE_ALIGN(p) ((void )(((unsigned long)(p) / page_size) page_size))
	/* Align pointer to closest page boundary upwards */
	#define PAGE_ALIGN_UP(p) ((void )((((unsigned long)(p) + page_size - 1) / page_size) page_size))


	///////// Helper function definitions //////////
	double timespec_diff(struct timespec start, struct timespec end);
	void initialize_pthread_attr(int detachstate, int schedpolicy, int priority,
	int inheritsched, int scope, pthread_attr_t *attr);
	int getNumProcessors(void);
	void setCPU (int n);
	/**
	* TODO: Use these custom memory management functions to collect more stats.
	*/
	int mem_init (void);
	void *mem_sbrk (ptrdiff_t increment);
	int mem_pagesize (void);
	ptrdiff_t mem_usage (void);

	// This struct just holds arguments to each thread.
	struct workerArg {
	char * _object;
	int _objSize;
	int _iterations;
	int _repetitions;
	int _cpu;
	};

	extern void * worker (void * arg)
	{
	// free the object we were given.
	// Then, repeatedly do the following:
	// malloc a given-sized object,
	// repeatedly write on it,
	// then free it.

	int i, j, k; /* Loop control variables */

	struct workerArg * w = (struct workerArg *) arg;
	setCPU(w->_cpu);

	free(w->_object);

	uk_pr_debug("Worker thread %d starting to iterate %d times\n", w->_cpu, w->_iterations);
	for (i = 0; i < w->_iterations; i++) {
	// Allocate the object.
	char * obj = (char *)malloc(w->_objSize);
	// Write into it a bunch of times.
	for (j = 0; j < w->_repetitions; j++) {
	for (k = 0; k < w->_objSize; k++) {
	obj[k] = (char) k;
	volatile char ch = obj[k];
	ch++;
	}
	}
	// Free the object.
	free(obj);
	}
	uk_pr_debug("Worker thread %d finished iterations\n", w->_cpu);
	free(w);

	return NULL;
	}


	int main (int argc, char * argv[]) {
	int nthreads;
	int iterations;
	int objSize;
	int repetitions;
	pthread_attr_t attr;
	int numCPU;
	int i;
	char **objs;
	struct timespec start_time;
	struct timespec end_time;

	if (argc > 4) {
	nthreads = atoi(argv[1]);
	iterations = atoi(argv[2]);
	objSize = atoi(argv[3]);
	repetitions = atoi(argv[4]);
	} else {
	fprintf (stderr, "Usage: %s nthreads iterations objSize repetitions\n", argv[0]);
	return 1;
	}

	/* Declare threads[] array here after nthreads is set, so we
	* can use stack-allocated space for the array.
	*/
	pthread_t threads[nthreads];

	numCPU = getNumProcessors();

	// Allocate nthreads objects and distribute them among the threads.
	objs = (char *)malloc(nthreads sizeof(char *));
	for (i = 0; i < nthreads; i++) {
	objs[i] = (char *)malloc(objSize);
	}

	initialize_pthread_attr(PTHREAD_CREATE_JOINABLE, SCHED_RR, -10, PTHREAD_EXPLICIT_SCHED,
	PTHREAD_SCOPE_SYSTEM, &attr);

	/* Get the starting time */
	clock_gettime(CLOCK_MONOTONIC_RAW, &start_time);

	for (i = 0; i < nthreads; i++) {
	struct workerArg * w = (struct workerArg *)malloc(sizeof(struct workerArg));
	w->_object = objs[i];
	w->_objSize = objSize;
	w->_repetitions = repetitions / nthreads;
	w->_iterations = iterations;
	w->_cpu = (i+1)%numCPU;
	pthread_create(&threads[i], &attr, &worker, (void *)w);
	uk_pr_debug("Created worker thread %d\n", i);
	}

	for (i = 0; i < nthreads; i++) {
	uk_pr_debug("Waiting for worker thread %d\n", i);
	pthread_join(threads[i], NULL);
	uk_pr_debug("Worker thread %d finished\n", i);
	}

	/* Get the finish time */
	clock_gettime(CLOCK_MONOTONIC_RAW, &end_time);

	double t = timespec_diff(&start_time, &end_time);

	free(objs);

	printf ("Time elapsed = %f seconds\n", t);
	printf ("Memory used = %ld bytes\n",mem_usage());
	return 0;
	}

	/////////////// Following are helper functions ////////////////////
	double timespec_diff(struct timespec start, struct timespec end) {
	struct timespec diff;
	diff.tv_nsec = end->tv_nsec - start->tv_nsec;
	diff.tv_sec = end->tv_sec - start->tv_sec;
	if (diff.tv_nsec < 0) {
	if (diff.tv_sec == 0) {
	return 0.0;
	}
	/* Move 1 second from seconds to nanoseconds */
	diff.tv_sec -= 1;
	diff.tv_nsec += 1000000000L;
	}
	return (double)(diff.tv_sec + (double)diff.tv_nsec/1000000000.0);
	}

	/* Set thread attributes */

	void initialize_pthread_attr(int detachstate, int schedpolicy, int priority,
	int inheritsched, int scope, pthread_attr_t *attr)
	{
	pthread_attr_init(attr);
	pthread_attr_setdetachstate(attr, detachstate);
	if (inheritsched == PTHREAD_EXPLICIT_SCHED) {
	pthread_attr_setschedpolicy(attr, schedpolicy);
	struct sched_param p;
	p.sched_priority = priority;
	pthread_attr_setschedparam(attr, &p);
	}
	pthread_attr_setscope(attr, scope);
	}

	/*
	* This function should be more complicated to try and avoid a call to the
	* C library malloc() routine embedded in the Linux sysconf() call.
	* However, here we can allow a call to malloc() before the
	* main test starts.
	*/
	int getNumProcessors (void)
	{
	static int np = 0;
	if (!np) {
	np = sysconf(_SC_NPROCESSORS_ONLN);
	}
	return np;
	}

	void setCPU (int n) {
	/* Set CPU affinity to CPU n only. */
	pid_t tid = gettid();
	cpu_set_t mask;
	CPU_ZERO(&mask);
	CPU_SET(n, &mask);
	if (sched_setaffinity(tid, sizeof(cpu_set_t), &mask) != 0) {
	perror("sched_setaffinity failed");
	}
	}


	int mem_init (void)
	{

	/* Get system page size */
	page_size = (int) getpagesize();

	/* Allocate heap */
	dseg_lo = (char ) malloc(DSEG_MAX + 2page_size);
	if (!dseg_lo)
	return -1;

	/* align heap to the next page boundary */
	dseg_lo = (char *) PAGE_ALIGN_UP(dseg_lo);
	dseg_hi = dseg_lo-1;
	dseg_size = DSEG_MAX;


	return 0;
	}

	void *mem_sbrk (ptrdiff_t increment)
	{
	char *new_hi = dseg_hi + increment;
	char *old_hi = dseg_hi;
	long dseg_cursize = dseg_hi - dseg_lo + 1;

	UK_ASSERT(increment > 0);

	/* Resize data segment, if the memory is available */
	if (new_hi > dseg_lo + dseg_size)
	return NULL;
	dseg_hi = new_hi;
	dseg_cursize = dseg_hi - dseg_lo + 1;

	return (void *)(old_hi + 1);
	}

	int mem_pagesize (void)
	{
	return page_size;
	}

	ptrdiff_t mem_usage (void)
	{
	/* hack for libc */
	if (dseg_lo != NULL && dseg_hi == NULL) {
	dseg_hi = sbrk(0);
	}
	return dseg_hi - dseg_lo;
	}