Skip to content

Instantly share code, notes, and snippets.

@JakubMifek
Created December 30, 2017 21:24
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save JakubMifek/7e9f779dea90bf77ee51fb0855a4dd7c to your computer and use it in GitHub Desktop.
Save JakubMifek/7e9f779dea90bf77ee51fb0855a4dd7c to your computer and use it in GitHub Desktop.
#include <stdio.h>
#include <limits.h>
#include <math.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <errno.h>
#include <assert.h>
#include <sys/time.h>
///////////////////////////////////////////////////////////////////////////////////////
/// ///
/// Author: Jakub Mifek ///
/// Assignment for Data Structures I course on Charles University ///
/// ///
///////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////
/// ///
/// This code was copied from Xenon post found on ///
/// https://stackoverflow.com/questions/822323/how-to-generate-a-random-number-in-c ///
/// ///
/// Note that this code will only work on UNIX based computers that have ///
/// /dev/urandom device available. ///
/// ///
///////////////////////////////////////////////////////////////////////////////////////
int urandom_fd = -2;
void urandom_init()
{
urandom_fd = open("/dev/urandom", O_RDONLY);
if (urandom_fd == -1)
{
int errsv = urandom_fd;
printf("Error opening [/dev/urandom]: %i\n", errsv);
exit(1);
}
}
unsigned long int urandom()
{
unsigned long int buf_impl;
unsigned long int *buf = &buf_impl;
if (urandom_fd == -2)
{
urandom_init();
}
/* Read 4 bytes, or 32 bits into *buf, which points to buf_impl */
read(urandom_fd, buf, sizeof(unsigned long int));
return buf_impl;
}
///////////////////////////////////////////////////////////////////////////////////////
/// ///
/// End of copied code ///
/// ///
///////////////////////////////////////////////////////////////////////////////////////
const unsigned long int U = ULONG_MAX;
const unsigned int u = 64;
// Max value on my test-computer: 29
unsigned int k = 20; // default value
// want to try out values 4, 8 and 16
unsigned int c = 8; // default value
unsigned long int U_bound;
unsigned long int m;
unsigned int p, q;
unsigned int t;
double steps;
int deb, flag, time, seq;
int F, R;
unsigned long int *hash_table;
#define MAX(x, y) (((x) > (y)) ? (x) : (y))
#define MIN(x, y) (((x) < (y)) ? (x) : (y))
///////////////////////////////////////////////////////////////////////////////////////
/// ///
/// Hash functions ///
/// ///
/// All hash funcions return value from the set [0; m]. ///
/// ///
///////////////////////////////////////////////////////////////////////////////////////
unsigned long int **
init_table()
{
int i;
p = u / c;
q = p * (c - 1);
t = 1U << p;
// Init T
unsigned long int **T = (unsigned long int **) malloc(sizeof(unsigned long int *) * c);
for (i = 0; i < c; i++)
T[i] = (unsigned long int *) malloc(sizeof(unsigned long int) * t);
// init random values in Ts - note that the numbers contained in T_1 - T_c
// are not completely random. That is due to use of the '% m'. To be completely
// random 'm' must be a factor of '2^u'.
unsigned int j;
for (i = 0; i < c; i++)
for (j = 0; j < t; j++)
T[i][j] = urandom() % m;
return T;
}
unsigned long int
table_hash(unsigned long int **T, unsigned long int x)
{
int i;
unsigned long int result = T[0][ (unsigned int)(( (x << q) % U ) >> q) ];
for (i = 1; i < c; i++)
result ^= T[i][ (unsigned int)((x << ( (p * (c - i - 1)) % U )) >> q) ];
return result;
}
unsigned long int
init_multiplicative()
{
unsigned long int a = urandom() % U;
a = a - (a % 2) + 1;
if(deb)
printf("a: %lu\n", a);
return a;
}
unsigned long int
multiplicative_hash(unsigned long int a, unsigned long int x)
{
printf("modulo modulo %lu %lu\n", a, x);
return ( (a * x) >> (u - k) );
}
unsigned long int
modulo_hash(unsigned long int x)
{
return x % m;
}
///////////////////////////////////////////////////////////////////////////////////////
/// ///
/// Addition functions ///
/// ///
/// Addition functions expect given value to be from the set [1; U] ///
/// ///
///////////////////////////////////////////////////////////////////////////////////////
unsigned int actual_i = 0;
void
linear_place(unsigned long int hash, unsigned long int value)
{
unsigned long int stop = MIN(hash - 1, m - 1);
struct timeval t1, t2;
double elapsedTime;
if(time)
{
gettimeofday(&t1, NULL);
}
do
{
if(!time && actual_i > 0)
{
steps++;
}
if (!hash_table[hash])
{
hash_table[hash] = value;
value = 0;
hash = stop - 1;
}
}
while ((hash = (hash + 1) % m) != stop);
if(time && actual_i > 0)
{
// Stop timer
gettimeofday(&t2, NULL);
// Compute and print the elapsed time in millisec
elapsedTime = (t2.tv_sec - t1.tv_sec) * 1000.0; // sec to ms
elapsedTime += (t2.tv_usec - t1.tv_usec) / 1000.0; // us to ms
steps += elapsedTime;
}
if(!time && actual_i > 0)
{
steps--;
}
if (value)
printf("Value %lu could not be added to the hash table under hash %lu.\nTable full.\n", value, hash);
}
void
linear(int max_steps, int perc)
{
int i;
// Init variables
unsigned long int x = 0;
unsigned long int P = (unsigned long int)((m + 9) / 10);
unsigned long int M = (unsigned long int)((m / 100.0) * perc);
printf("Will compute only before %d%% of the hash_table is full.\n", perc);
printf("Output values will be means of %ds of computed values.\n", max_steps);
struct timeval t1, t2;
double elapsedTime;
unsigned long int a = 0UL;
unsigned long int **T = NULL;
// Init hashing
switch (flag)
{
case 1:
if (deb)
printf("Modulo hash init.\n");
break;
case 2:
if (deb)
printf("Multiplicative hash init.\n");
a = init_multiplicative();
break;
case 4:
if (deb)
printf("Table hash init\n");
T = init_table();
break;
default:
printf("Unexpected hash flag for linear add.\n");
exit(1);
break;
}
// Start timer
gettimeofday(&t1, NULL);
// Start hashing
for (i = 0; i < M; i++)
{
if(i * 10.0 / P >= F && i * 10.0 / P <= R)
{
actual_i++;
} else {
if(actual_i > 0)
{
printf("steps: %f\n", steps / (float)actual_i);
steps = 0;
actual_i = 0;
}
if (deb)
printf("%u%% full.\n", (unsigned int)((i * 10.0) / P));
}
if (actual_i % max_steps == 0)
{
if(actual_i > 0)
{
printf("steps: %f\n", steps / (float)actual_i);
steps = 0;
actual_i = 0;
}
if (deb)
printf("%u%% full.\n", (unsigned int)((i * 10.0) / P));
}
if(seq)
x = (urandom() % (U_bound - 1)) + 1; // x is from [1; U]
else
x++;
if (a != 0)
{
linear_place(multiplicative_hash(a, x), x);
}
else if (T != NULL)
{
linear_place(table_hash(T, x), x);
}
else
{
linear_place(modulo_hash(x), x);
}
}
if (actual_i > 0)
{
printf("steps: %f\n", steps / (float)actual_i);
if (deb)
printf("%u%% full.\n", perc);
steps = 0;
}
// Stop timer
gettimeofday(&t2, NULL);
// Compute and print the elapsed time in millisec
elapsedTime = (t2.tv_sec - t1.tv_sec) * 1000.0; // sec to ms
elapsedTime += (t2.tv_usec - t1.tv_usec) / 1000.0; // us to ms
printf("Computed in: %fms\n", elapsedTime);
// Free memory if necessary
if (T != NULL)
{
for (i = 0; i < c; i++)
free(T[i]);
free(T);
}
}
unsigned long int current_steps = 0UL;
unsigned long int a_1 = 0UL;
unsigned long int a_2 = 0UL;
unsigned long int **T_1 = NULL;
unsigned long int **T_2 = NULL;
unsigned long int half = 0UL;
unsigned int timeout = 0UL;
unsigned long int unassigned;
void
init_functions()
{
// Init hashing
switch (flag)
{
case 1:
if (deb)
printf("Modulo hash init.\n");
break;
case 2:
if (deb)
printf("Multiplicative hash init.\n");
a_1 = init_multiplicative();
a_2 = init_multiplicative();
break;
case 3:
if (deb)
printf("Modulo & multiplicative hash init.\n");
a_2 = init_multiplicative();
case 4:
if (deb)
printf("Table hash init\n");
T_1 = init_table();
T_2 = init_table();
break;
case 5:
if (deb)
printf("Modulo & table hash init.\n");
T_2 = init_table();
break;
case 6:
if (deb)
printf("Multiplicative & table hash init.\n");
a_1 = init_multiplicative();
T_2 = init_table();
break;
default:
printf("Unexpected hash flag for linear add.\n");
exit(1);
break;
}
if (deb)
printf("Functions init end.\n");
}
int
cuckoo_swap(unsigned long int hash, unsigned long int value)
{
current_steps++;
if (!hash_table[hash])
{
hash_table[hash] = value;
if (deb)
printf("Value placed.\n");
return 0;
}
if (current_steps > timeout)
{
if (deb)
printf("Timeout: Too many steps. Rebuilding.\n");
unassigned = value;
return 1;
}
if (deb)
printf("Swaping value %lu with value %lu on position %lu\n", value, hash_table[hash], hash);
unsigned long int temp = value;
value = hash_table[hash];
hash_table[hash] = temp;
if (hash >= half)
{
if (a_1)
hash = multiplicative_hash(a_1, value);
else if (T_1)
hash = table_hash(T_1, value);
else
hash = modulo_hash(value);
hash = hash % half;
}
else
{
if (a_2)
hash = multiplicative_hash(a_2, value);
else if (T_2)
hash = table_hash(T_2, value);
else
hash = modulo_hash(value);
hash = (hash % half) + half;
}
if (deb)
printf("Placing value %lu under hash %lu\n", value, hash);
return cuckoo_swap(hash, value);
}
int
cuckoo_place(unsigned long int value)
{
if (deb)
printf("Placing value %lu ", value);
unsigned long int hash = 0UL;
if (a_1)
hash = multiplicative_hash(a_1, value);
else if (T_1)
hash = table_hash(T_1, value);
else
hash = modulo_hash(value);
hash = hash % half;
if (deb)
printf("under hash %lu\n", hash);
return cuckoo_swap(hash, value);
}
int
rehash()
{
if (deb)
printf("Rehashing.\n");
init_functions();
// Move current table
int i;
unsigned long int *table = hash_table;
hash_table = (unsigned long int *) malloc(sizeof(unsigned long int) * m);
for (i = 0; i < m; i++)
hash_table[i] = 0UL;
int ret;
unsigned long int temp = unassigned;
for (i = 0; i < m; i++)
{
if (table[i])
{
ret = cuckoo_place(table[i]);
if(!time && actual_i > 0)
{
steps += current_steps;
}
current_steps = 0UL;
if (ret)
{
if (deb)
printf("Rehashing failed.\n");
free(hash_table);
hash_table = table;
unassigned = temp;
return 1;
}
}
}
// Free old table
free(table);
return 0;
}
unsigned long int generated = 0UL;
unsigned long int placed = 0UL;
void
cuckoo(int max_steps, int perc)
{
// Init variables
unsigned long int x = 0;
unsigned long int P = (unsigned long int)((m + 9) / 10);
unsigned long int M = (unsigned long int)((m / 100.0) * perc);
printf("Will compute only before %d%% of the hash_table is full.\n", perc);
printf("Output values will be means of %ds of computed values.\n", max_steps);
struct timeval t1, t2, t3, t4;
double elapsedTime;
half = m >> 1;
timeout = 6 * k;
printf("half: %lu\n", half);
printf("timeout: %u\n", timeout);
// Init hashing
rehash(flag);
// Start hashing
int ret, reh, i;
for (i = 0; i < M; i++)
{
if(i * 10.0 / P >= F && i * 10.0 / P <= R)
{
actual_i++;
} else {
if(actual_i > 0)
{
printf("steps: %f\n", steps / (float)actual_i);
steps = 0;
actual_i = 0;
}
if (deb)
printf("%u%% full.\n", (unsigned int)((i * 10.0) / P));
}
if (actual_i % max_steps == 0)
{
if(actual_i > 0)
{
printf("steps: %f\n", steps / (float)actual_i);
steps = 0;
actual_i = 0;
}
if (deb)
printf("%u%% full.\n", (unsigned int)((i * 10.0) / P));
}
if(!seq)
x = (urandom() % (U_bound - 1)) + 1; // x is from [1; U]
else
x++;
if(time)
{
// Start timer
gettimeofday(&t3, NULL);
}
if (deb)
{
generated++;
printf("Generated value: %lu\n", x);
}
do
{
ret = cuckoo_place(x);
if(!time && actual_i > 0)
{
steps += current_steps;
}
current_steps = 0UL;
if (ret)
{
do
{
reh = rehash();
}
while (reh); // while the table could not be rehashed, repeat
x = unassigned;
}
}
while (ret); // while the value could not be palced, repeat
if(time && actual_i > 0)
{
// Stop timer
gettimeofday(&t4, NULL);
// Compute and print the elapsed time in millisec
elapsedTime = (t4.tv_sec - t3.tv_sec) * 1000.0; // sec to ms
elapsedTime += (t4.tv_usec - t3.tv_usec) / 1000.0; // us to ms
steps += elapsedTime;
}
}
if (actual_i > 0)
{
printf("steps: %f\n", steps / (float)actual_i);
if (deb)
printf("%u%% full.\n", perc);
steps = 0;
}
printf("Computed in: %fms\n", elapsedTime);
if (deb)
{
printf("Generated values: %lu\n", generated);
for (i = 0; i < m; i++)
if (hash_table[i])
placed++;
printf("Placed: %lu\n", placed);
}
// Free memory if necessary
if (T_1)
{
for (i = 0; i < c; i++)
free(T_1[i]);
free(T_1);
}
if (T_2)
{
for (i = 0; i < c; i++)
free(T_2[i]);
free(T_2);
}
}
///////////////////////////////////////////////////////////////////////////////////////
/// ///
/// Main function ///
/// ///
/// This program uses unsigned long int numbers. -> Maximum exponent value is 64. ///
/// ///
/// Arguments: ///
/// -m Modulo hashing ///
/// -d Multiplication hashing ///
/// -t Table hashing ///
/// -l Linear addition ///
/// -u Cuckoo addition ///
/// -D Debug output ///
/// -S Sequential generation ///
/// -T Measuring time instead of steps ///
/// -F Floor bound of measuring; fillment in % ///
/// -R Roof bound of measuring; fillment in % ///
/// -k <num> Table size exponent ///
/// -c <num> Number of partions of number used for table hashing ///
/// -p <num> Maximum percentage of hash_table to be used ///
/// -s <num> Number of computations used for average steps ///
/// -U <num> Exponent of the upper bound of the Universum ///
/// ///
///////////////////////////////////////////////////////////////////////////////////////
int
main(int argc, char ** argv)
{
// Init variables
unsigned long int i;
opterr = 0;
void (*fun)(int, int) = NULL;
int perc = 100;
int max_steps = 100;
deb = 0;
flag = 0;
U_bound = ULONG_MAX;
time = 0;
seq = 0;
F = 0;
R = 100;
// Parse arguments
while ((i = getopt (argc, argv, "mdtluDTSk:c:p:s:U:F:R:")) != -1)
{
switch (i)
{
case 'k':
k = atoi(optarg);
break;
case 'c':
c = atoi(optarg);
break;
case 'm':
flag += 1;
break;
case 'd':
flag += 2;
break;
case 't':
flag += 4;
break;
case 'l':
fun = &linear;
break;
case 'u':
fun = &cuckoo;
break;
case 'p':
perc = atoi(optarg);
break;
case 's':
max_steps = atoi(optarg);
break;
case 'D':
deb = 1;
break;
case 'U':
U_bound = 1UL << atoi(optarg);
break;
case 'T':
time = 1;
printf("Measuring time.\n");
break;
case 'S':
seq = 1;
break;
case 'F':
F = atoi(optarg);
break;
case 'R':
R = atoi(optarg);
break;
}
}
if (!flag)
{
printf("You must specify hash function by using -m / -d / -t.\n");
return 1;
}
if (!fun)
{
printf("You must specify add function by using -l / -u.\n");
return 1;
}
if (perc < 0 || perc > 100)
{
printf("p must be a value between 0 and 100.\n");
return 1;
}
if (k < 0 || k > 64)
{
printf("k must be a value between 0 and 64.\n");
return 1;
}
if(F < 0 || R < F || F > R || R > 100)
{
printf("F and R has following rules: 0 <= F < R <= 100; F,R are integers.\n");
return 1;
}
printf("u: %u\n", u);
printf("k: %u\n", k);
printf("c: %u\n", c);
m = 1UL << k; // each table has 2^k space
printf("M: %lu\n", m);
printf("U: %lu\n", U_bound);
printf("P: %u\n", perc);
printf("O: %u\n", max_steps);
// Init hash_table
hash_table = (unsigned long int *) malloc(sizeof(unsigned long int) * m);
for (i = 0; i < m; i++)
hash_table[i] = 0;
// Run the hashing
fun(max_steps, perc);
// Free all allocated memory
free(hash_table);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment