Skip to content

Instantly share code, notes, and snippets.

Created February 2, 2013 05:31
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save anonymous/4696220 to your computer and use it in GitHub Desktop.
Save anonymous/4696220 to your computer and use it in GitHub Desktop.
My failed attempt at "Detect duplicates in a random list of 10,000,000 64bit ints in under a second" also trying to remember how to write c I'm not getting enough overlap of the first 32 bits, so the random numbers are suspect.
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <stdint.h>
#define MAX_CANDIDATES 25000
#define TWOTOTHE32 4294967296
int64_t random64bit(){
// we need 64 bits of random data
int64_t r = 0;
r += rand();
r <<= 31;
r += rand();
r <<= 31;
r += rand();
return r;
}
int64_t *rand_array(long length){
int64_t *a;
a = malloc(sizeof(*a) * length);
for (long i = 0; i<length; i++){
a[i] = random64bit();
}
return a;
}
int64_t *rand_array_with_dup(long length){
int64_t *a = rand_array(length);
a[1234] = a[56789];
return a;
}
int compare_function(const int64_t *a, const int64_t *b){
if (*a < *b){
return -1;
}
if (*a == *b){
printf("duplicate found: it's %lld!\n", *a);
return 0;
}
return 1;
}
int main(){
srand(time(0));
int length = 10000000;
//int64_t *a = rand_array(length);
int64_t *a = rand_array_with_dup(length);
printf("Done generating random array of 64-bit ints of length %d\n", length);
time_t now;
time(&now);
printf("%s", ctime(&now));
uint32_t *halves = (uint32_t*) a;
char *seen_halves = calloc(TWOTOTHE32, sizeof(char));
printf("first pass\n"); fflush(stdout);
long doublelength = length * 2;
for (long i = 0; i<doublelength; i+=2){
seen_halves[halves[i]] += 1;
}
int64_t candidates[MAX_CANDIDATES];
int candidate_index = 0;
printf("second pass\n"); fflush(stdout);
for (long i = 0; i<doublelength; i+=2){
if (seen_halves[halves[i]] > 1){
candidates[candidate_index++] = a[i/2];
}
}
printf("found %d possible duplicates, sorting now\n", candidate_index); fflush(stdout);
qsort(candidates, candidate_index, sizeof(int64_t), compare_function);
time(&now);
printf("%s", ctime(&now)); fflush(stdout);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment