Skip to content

Instantly share code, notes, and snippets.

@rafaelbrandao
Created September 25, 2012 13:58
Show Gist options
  • Save rafaelbrandao/3782077 to your computer and use it in GitHub Desktop.
Save rafaelbrandao/3782077 to your computer and use it in GitHub Desktop.
Program to handle files subset.
#include <cstdio>
#include <cstring>
#include <set>
using namespace std;
typedef unsigned long long uint64;
const uint64 hash_base = 263;
uint64 hash(const char* s) {
uint64 h = 0;
for (int i=0, j=strlen(s); i < j; ++i)
h = (h*hash_base) + (s[i]+1);
return h;
}
// This is useful if you have a subset of tests that are failing and the set of tests you've run originally.
// The output of this file is the complement of that subset: the list of tests that are passing.
int main(int argc, char* argv[]) {
if (argc != 3) {
printf("Usage: ./files-list-diff-subset-set {subset-file} {set-file}\n");
return 0;
}
FILE* in;
set<uint64> hashes;
char line[2000000];
in = fopen(argv[1], "r");
while (fscanf(in, "%s", line) == 1)
hashes.insert(hash(line));
fclose(in);
in = fopen(argv[2], "r");
while (fscanf(in, "%s", line) == 1) {
if (!hashes.count(hash(line)))
printf("%s\n", line);
}
fclose(in);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment