cfstras/Makefile

## join.c

// the question i must ask myself is this one:
// has anyone really been far even as decided to use even go want to do look
// more like?
// or is this all just snake oil?

#pragma GCC optimize ("inline-functions")
#pragma GCC optimize ("indirect-inlining")
#pragma GCC optimize ("inline-limit=1024")
#pragma GCC optimize ("Ofast")
#pragma GCC optimize ("loop-interchange")
#pragma GCC optimize ("align-loops")

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <time.h>
#include <stdint.h>
#include <fcntl.h>
#include <math.h>
#include <string.h>

#define null NULL

#undef VERYDEBUG
#undef READFILE
#undef DEBUG
#undef POSIXREAD

#ifdef PROFILE
    #include <google/profiler.h>
    unsigned insertCollisions = 0, probeCollisions = 0;
#endif

typedef unsigned uint32_t;

typedef struct {
    int size;
    unsigned* el;
    bool* full;
} Bucket;

struct {
    Bucket* buckets;
    int size;
    unsigned mask;
    int bucketSize;
} hashSet;

unsigned inline h1(const unsigned num) {
    //return num & hashSet.mask;
    return (num*2654435761) & (hashSet.mask);
}

unsigned inline h2(const unsigned num, unsigned mask) {
    return num & mask;
    //return (num*2654435761) & (hashSet.mask);
}

void buildTable(int totalNodes) {
    int size = 1;
    while (size < totalNodes) {
        size = size << 1;
    }
    //size = size << 1; // double the space, double the power!
    hashSet.mask = size-1;
    hashSet.size = size;
    hashSet.bucketSize = log2(size);
    hashSet.buckets = (Bucket*)calloc(size, sizeof(Bucket));
}

void insert(const unsigned val) {
    int ha = h1(val);
    Bucket* bucket = hashSet.buckets + ha;
#ifdef VERYDEBUG
    printf("v %d: h1 %d\n", val, i);
#endif
    if (bucket->el == null) {
        bucket->size = hashSet.bucketSize;
        bucket->el = (unsigned*)malloc(hashSet.bucketSize * sizeof(unsigned));
        bucket->full = (bool*)calloc(hashSet.bucketSize, sizeof(bool));
    }
    unsigned mask = bucket->size - 1;
    int i = h2(val, mask);
    int n = 0;
    while(bucket->full[i] && bucket->el[i] != val) {
        #ifdef PROFILE
        insertCollisions++;
        #endif
        i++; n++;
        i = (i & mask);
        if (n >= bucket->size) {
            // resize
            if (hashSet.bucketSize <= bucket->size) {
                hashSet.bucketSize = hashSet.bucketSize << 1;
            }
            bucket->el = (unsigned*)realloc(bucket->el, hashSet.bucketSize * sizeof(unsigned));
            bucket->full = (bool*)realloc(bucket->full, hashSet.bucketSize * sizeof(bool));
            memset(bucket->full + bucket->size, 0,
                (hashSet.bucketSize - bucket->size)*sizeof(bool));
            bucket->size = hashSet.bucketSize;
            mask = bucket->size - 1;
        }
    }
    bucket->el[i] = val;
    bucket->full[i] = true;
}

int exists(const unsigned val) {
    int ha = h1(val);
    Bucket* bucket = hashSet.buckets + ha;
    unsigned mask = bucket->size - 1;
    int i = h2(val, mask);
    int n = 0;
    if (bucket->el == NULL) return 0;
    while(bucket->full[i] && bucket->el[i] != val) {
        i++; n++;
        i = (i & mask);
        if (n >= bucket->size) {
            return 0;
        }
    }
    return val == bucket->el[i] && bucket->full[i];
}

struct {
    char* buf;
    int   len;
    char* current;
    char* end;
#ifdef POSIXREAD
    int   fd;
#else
    FILE* fd;
#endif
} buffer __attribute__ ((aligned (16)));


void initBuffer() {
    buffer.buf = (char*)malloc(sizeof(char) * buffer.len);
    buffer.current = buffer.end = buffer.buf;
}

int replenish() {
#ifdef POSIXREAD
    int nl = read(buffer.fd,buffer.buf,buffer.len*sizeof(char));
#else
    int nl = fread(buffer.buf, sizeof(char), buffer.len, buffer.fd);
#endif
    buffer.end = buffer.buf + nl;
    buffer.current = buffer.buf;
    return nl > 0;
}

unsigned readInt() {
    unsigned num = 0;
    while (true) {
        while(buffer.current != buffer.end) {
            char c = *buffer.current;
            if(c >= '0' && c <= '9') {
                num = num * 10 + (c - '0');
            } else {
#ifdef VERYDEBUG
                printf("num %d\n", num);
#endif
                return num;
            }
            buffer.current++;
        }
        if (!replenish()) {
#ifdef VERYDEBUG
            printf("num %d, eof\n", num);
#endif
            return num;
        }
    }
}

void readSpace() {
    while(true) {
        for (; buffer.current != buffer.end; buffer.current++) {
            char c = *buffer.current;
            if ( c != ' ' && c != '\n') {
                return;
            }
        }
        if (!replenish()) {
            return;
        }
    }
}

void readInts(unsigned* buf, int len, int offset) {
    for (int i=offset; i<len; i++) {
        buf[i] = readInt();
        readSpace();
    }
}

int offset = 0;
unsigned result = 0;
unsigned *buildData = NULL, *probeData = NULL;
unsigned buildCount, probeCount;

void probePhase(int len, int offset) {
    for (int i = offset; i < len; i++) {
        if (exists(probeData[i])) {
            ++result;
        }
    }
}

void buildPhase(int len, int offset) {
    for (int i = offset; i < len; i++) {
        insert(buildData[i]);
    }
}

int main() {

#ifdef READFILE
    #ifdef POSIXREAD
        buffer.fd = open("big.txt", 0);
    #else
        buffer.fd = fopen("big.txt", "r");
    #endif
#else
    #ifdef POSIXREAD
        buffer.fd = 0;
    #else
        buffer.fd = stdin;
    #endif
#endif
#ifdef DEBUG
    printf("build...\n");
#endif
#ifdef PROFILE
    ProfilerStart("build.prof");

    struct timespec start, stop;
    double accum;

    if( clock_gettime( CLOCK_REALTIME, &start) == -1 ) {
      printf( "clock gettime" );
      return 1;
    }
#endif

    buffer.len = 1<<16;
    initBuffer();

    int lcount, rcount;
    unsigned *lbuf, *rbuf;

    buildCount = lcount = readInt(); readSpace();
    buildData = lbuf = (unsigned*)malloc(sizeof(unsigned)*(lcount));
    readInts(lbuf, lcount, 0); // read l

    probeCount = rcount = readInt(); readSpace();
    probeData = rbuf = (unsigned*)malloc(sizeof(unsigned)*(rcount));

    if (buildCount > probeCount) { //swap
        int tmp = buildCount;
        buildCount = probeCount;
        probeCount = tmp;
        unsigned* tmp2 = probeData;
        probeData = buildData;
        buildData = tmp2;
    }
    buildTable(buildCount);

    readInts(rbuf, rcount, 0); // read second

    buildPhase(buildCount, 0);
    probePhase(probeCount, 0);

    // Output result
    printf("%d\n",result);

    #ifdef POSIXREAD
        close(buffer.fd);
    #else
        fclose(buffer.fd);
    #endif

#ifdef PROFILE
    if( clock_gettime( CLOCK_REALTIME, &stop) == -1 ) {
      perror( "clock gettime" );
      return 1;
    }

    accum = ( stop.tv_sec - start.tv_sec )
             + (double)( stop.tv_nsec - start.tv_nsec )
               / (double)100000;
    printf("TIME: %lf *100 us\n", accum);
    printf("Collisions: insert %d, probe %d, buildCount: %d, probeCount: %d, hashtable size: %d\n",
        insertCollisions, probeCollisions, buildCount, probeCount, hashSet.size);

    ProfilerStop();
#endif
    return 0;
}

## Makefile
CXX := clang++
CXXFLAGS := -std=c++0x -xc++ -march=native -O3 -Wall -g
DPROFILE := -DPROFILE
CXXVERBOSE := -ftree-vectorizer-verbose=0
LIBS := -lprofiler

TIME := time
export CPUPROFILE=join.prof
export CPUPROFILE_FREQUENCY=10000000000
export CPUPROFILE_REALTIME=0

all: clean build

.PHONY: build
build:
	${CXX} ${CXXFLAGS} ${CXXVERBOSE} ${DPROFILE} join.c -o join ${LIBS}

.PHONY: buildraw
buildraw:
	${CXX} ${CXXFLAGS} join.c -o join ${LIBS}

run: clean build start
runbig: clean build startbig

start:
	${TIME} ${PROFILE} ./join < small.txt

startbig:
	${TIME} ${PROFILE} ./join < big.txt

profile: runbig pprof

t: build run_time

run_time:
	bash time.sh

pprof:
	google-pprof --web ./join join.prof

clean:
	rm -f join

operf: clean buildraw operf_do

operf_do:
	operf ./join < big.txt
	opannotate --source --assembly ./join > annotated/join.sc 2>/dev/null

## small.txt
10
44
47
64
67
9
83
21
36
87
70
5
47
87
81
77
9

## time.sh
#!/bin/bash

x=1
num=0
total=0
while [ $x -le 100 ]
do
	took=$(./join < big.txt 2>&1 | grep TIME | cut -c7-15)
	if [[ $(echo $took | cut -c1) != "-" && $x > 15 ]]; then
		num=$(( $num + 1 ))
		total=$(echo "$took + $total" | bc)
	fi

x=$(( $x + 1 ))
done

avg=$(echo "$total / $num.0" | bc)
echo "Average: $avg, Total: $total"

	// the question i must ask myself is this one:
	// has anyone really been far even as decided to use even go want to do look
	// more like?
	// or is this all just snake oil?

	#pragma GCC optimize ("inline-functions")
	#pragma GCC optimize ("indirect-inlining")
	#pragma GCC optimize ("inline-limit=1024")
	#pragma GCC optimize ("Ofast")
	#pragma GCC optimize ("loop-interchange")
	#pragma GCC optimize ("align-loops")

	#include <stdio.h>
	#include <stdlib.h>
	#include <unistd.h>
	#include <time.h>
	#include <stdint.h>
	#include <fcntl.h>
	#include <math.h>
	#include <string.h>

	#define null NULL

	#undef VERYDEBUG
	#undef READFILE
	#undef DEBUG
	#undef POSIXREAD

	#ifdef PROFILE
	#include <google/profiler.h>
	unsigned insertCollisions = 0, probeCollisions = 0;
	#endif

	typedef unsigned uint32_t;

	typedef struct {
	int size;
	unsigned* el;
	bool* full;
	} Bucket;

	struct {
	Bucket* buckets;
	int size;
	unsigned mask;
	int bucketSize;
	} hashSet;

	unsigned inline h1(const unsigned num) {
	//return num & hashSet.mask;
	return (num*2654435761) & (hashSet.mask);
	}

	unsigned inline h2(const unsigned num, unsigned mask) {
	return num & mask;
	//return (num*2654435761) & (hashSet.mask);
	}

	void buildTable(int totalNodes) {
	int size = 1;
	while (size < totalNodes) {
	size = size << 1;
	}
	//size = size << 1; // double the space, double the power!
	hashSet.mask = size-1;
	hashSet.size = size;
	hashSet.bucketSize = log2(size);
	hashSet.buckets = (Bucket*)calloc(size, sizeof(Bucket));
	}

	void insert(const unsigned val) {
	int ha = h1(val);
	Bucket* bucket = hashSet.buckets + ha;
	#ifdef VERYDEBUG
	printf("v %d: h1 %d\n", val, i);
	#endif
	if (bucket->el == null) {
	bucket->size = hashSet.bucketSize;
	bucket->el = (unsigned)malloc(hashSet.bucketSize sizeof(unsigned));
	bucket->full = (bool*)calloc(hashSet.bucketSize, sizeof(bool));
	}
	unsigned mask = bucket->size - 1;
	int i = h2(val, mask);
	int n = 0;
	while(bucket->full[i] && bucket->el[i] != val) {
	#ifdef PROFILE
	insertCollisions++;
	#endif
	i++; n++;
	i = (i & mask);
	if (n >= bucket->size) {
	// resize
	if (hashSet.bucketSize <= bucket->size) {
	hashSet.bucketSize = hashSet.bucketSize << 1;
	}
	bucket->el = (unsigned)realloc(bucket->el, hashSet.bucketSize sizeof(unsigned));
	bucket->full = (bool)realloc(bucket->full, hashSet.bucketSize sizeof(bool));
	memset(bucket->full + bucket->size, 0,
	(hashSet.bucketSize - bucket->size)*sizeof(bool));
	bucket->size = hashSet.bucketSize;
	mask = bucket->size - 1;
	}
	}
	bucket->el[i] = val;
	bucket->full[i] = true;
	}

	int exists(const unsigned val) {
	int ha = h1(val);
	Bucket* bucket = hashSet.buckets + ha;
	unsigned mask = bucket->size - 1;
	int i = h2(val, mask);
	int n = 0;
	if (bucket->el == NULL) return 0;
	while(bucket->full[i] && bucket->el[i] != val) {
	i++; n++;
	i = (i & mask);
	if (n >= bucket->size) {
	return 0;
	}
	}
	return val == bucket->el[i] && bucket->full[i];
	}

	struct {
	char* buf;
	int len;
	char* current;
	char* end;
	#ifdef POSIXREAD
	int fd;
	#else
	FILE* fd;
	#endif
	} buffer __attribute__ ((aligned (16)));


	void initBuffer() {
	buffer.buf = (char)malloc(sizeof(char) buffer.len);
	buffer.current = buffer.end = buffer.buf;
	}

	int replenish() {
	#ifdef POSIXREAD
	int nl = read(buffer.fd,buffer.buf,buffer.len*sizeof(char));
	#else
	int nl = fread(buffer.buf, sizeof(char), buffer.len, buffer.fd);
	#endif
	buffer.end = buffer.buf + nl;
	buffer.current = buffer.buf;
	return nl > 0;
	}

	unsigned readInt() {
	unsigned num = 0;
	while (true) {
	while(buffer.current != buffer.end) {
	char c = *buffer.current;
	if(c >= '0' && c <= '9') {
	num = num * 10 + (c - '0');
	} else {
	#ifdef VERYDEBUG
	printf("num %d\n", num);
	#endif
	return num;
	}
	buffer.current++;
	}
	if (!replenish()) {
	#ifdef VERYDEBUG
	printf("num %d, eof\n", num);
	#endif
	return num;
	}
	}
	}

	void readSpace() {
	while(true) {
	for (; buffer.current != buffer.end; buffer.current++) {
	char c = *buffer.current;
	if ( c != ' ' && c != '\n') {
	return;
	}
	}
	if (!replenish()) {
	return;
	}
	}
	}

	void readInts(unsigned* buf, int len, int offset) {
	for (int i=offset; i<len; i++) {
	buf[i] = readInt();
	readSpace();
	}
	}

	int offset = 0;
	unsigned result = 0;
	unsigned buildData = NULL, probeData = NULL;
	unsigned buildCount, probeCount;

	void probePhase(int len, int offset) {
	for (int i = offset; i < len; i++) {
	if (exists(probeData[i])) {
	++result;
	}
	}
	}

	void buildPhase(int len, int offset) {
	for (int i = offset; i < len; i++) {
	insert(buildData[i]);
	}
	}

	int main() {

	#ifdef READFILE
	#ifdef POSIXREAD
	buffer.fd = open("big.txt", 0);
	#else
	buffer.fd = fopen("big.txt", "r");
	#endif
	#else
	#ifdef POSIXREAD
	buffer.fd = 0;
	#else
	buffer.fd = stdin;
	#endif
	#endif
	#ifdef DEBUG
	printf("build...\n");
	#endif
	#ifdef PROFILE
	ProfilerStart("build.prof");

	struct timespec start, stop;
	double accum;

	if( clock_gettime( CLOCK_REALTIME, &start) == -1 ) {
	printf( "clock gettime" );
	return 1;
	}
	#endif

	buffer.len = 1<<16;
	initBuffer();

	int lcount, rcount;
	unsigned lbuf, rbuf;

	buildCount = lcount = readInt(); readSpace();
	buildData = lbuf = (unsigned)malloc(sizeof(unsigned)(lcount));
	readInts(lbuf, lcount, 0); // read l

	probeCount = rcount = readInt(); readSpace();
	probeData = rbuf = (unsigned)malloc(sizeof(unsigned)(rcount));

	if (buildCount > probeCount) { //swap
	int tmp = buildCount;
	buildCount = probeCount;
	probeCount = tmp;
	unsigned* tmp2 = probeData;
	probeData = buildData;
	buildData = tmp2;
	}
	buildTable(buildCount);

	readInts(rbuf, rcount, 0); // read second

	buildPhase(buildCount, 0);
	probePhase(probeCount, 0);

	// Output result
	printf("%d\n",result);

	#ifdef POSIXREAD
	close(buffer.fd);
	#else
	fclose(buffer.fd);
	#endif

	#ifdef PROFILE
	if( clock_gettime( CLOCK_REALTIME, &stop) == -1 ) {
	perror( "clock gettime" );
	return 1;
	}

	accum = ( stop.tv_sec - start.tv_sec )
	+ (double)( stop.tv_nsec - start.tv_nsec )
	/ (double)100000;
	printf("TIME: %lf *100 us\n", accum);
	printf("Collisions: insert %d, probe %d, buildCount: %d, probeCount: %d, hashtable size: %d\n",
	insertCollisions, probeCollisions, buildCount, probeCount, hashSet.size);

	ProfilerStop();
	#endif
	return 0;
	}
	CXX := clang++
	CXXFLAGS := -std=c++0x -xc++ -march=native -O3 -Wall -g
	DPROFILE := -DPROFILE
	CXXVERBOSE := -ftree-vectorizer-verbose=0
	LIBS := -lprofiler

	TIME := time
	export CPUPROFILE=join.prof
	export CPUPROFILE_FREQUENCY=10000000000
	export CPUPROFILE_REALTIME=0

	all: clean build

	.PHONY: build
	build:
	${CXX} ${CXXFLAGS} ${CXXVERBOSE} ${DPROFILE} join.c -o join ${LIBS}

	.PHONY: buildraw
	buildraw:
	${CXX} ${CXXFLAGS} join.c -o join ${LIBS}

	run: clean build start
	runbig: clean build startbig

	start:
	${TIME} ${PROFILE} ./join < small.txt

	startbig:
	${TIME} ${PROFILE} ./join < big.txt

	profile: runbig pprof

	t: build run_time

	run_time:
	bash time.sh

	pprof:
	google-pprof --web ./join join.prof

	clean:
	rm -f join

	operf: clean buildraw operf_do

	operf_do:
	operf ./join < big.txt
	opannotate --source --assembly ./join > annotated/join.sc 2>/dev/null
	#!/bin/bash

	x=1
	num=0
	total=0
	while [ $x -le 100 ]
	do
	took=$(./join < big.txt 2>&1 \| grep TIME \| cut -c7-15)
	if [[ $(echo $took \| cut -c1) != "-" && $x > 15 ]]; then
	num=$(( $num + 1 ))
	total=$(echo "$took + $total" \| bc)
	fi

	x=$(( $x + 1 ))
	done

	avg=$(echo "$total / $num.0" \| bc)
	echo "Average: $avg, Total: $total"