Skip to content

Instantly share code, notes, and snippets.

@kazhang
Created March 7, 2013 17:22
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kazhang/5109883 to your computer and use it in GitHub Desktop.
Save kazhang/5109883 to your computer and use it in GitHub Desktop.
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#define MOD 100003
#define N_GROUP 31
#define MAXLINE 1024
#define MAXLEN 10
static unsigned int
BKDRHash(char *str) {
unsigned int seed=131;
unsigned int res=0;
while(*str)
res = res * seed + (*str++);
return (res & 0x7FFFFFFF);
}
static unsigned int
getKey(char *str) {
return BKDRHash(str)%MOD;
}
static void
split(char *filename,int n) {
FILE *fp = fopen(filename,"r");
FILE *gfp[N_GROUP];
char buf[MAXLINE];
int i;
for(i=0;i<N_GROUP;i++)
{
sprintf(buf,"log-%d-%d",n,i);
gfp[i] = fopen(buf,"a");
}
while(fgets(buf,MAXLINE,fp) != NULL) {
int gid = BKDRHash(buf) % N_GROUP;
fprintf(gfp[gid], "%s", buf);
}
for(i=0;i<N_GROUP;i++)
fclose(gfp[i]);
}
typedef struct Node {
char str[MAXLEN];
struct Node * nxt;
}Node;
Node *head[MOD];
static int
find(int ng, FILE *result) {
char buf[MAXLINE];
FILE *fp;
sprintf(buf,"log-1-%d",ng);
fp = fopen(buf,"r");
while(fgets(buf,MAXLINE,fp) != NULL) {
int len = strlen(buf);
buf[len-1]=0;
int key = getKey(buf);
Node *ptr = (Node *) malloc(sizeof(Node));
strcpy(ptr->str, buf);
ptr->nxt = head[key];
head[key]=ptr;
}
fclose(fp);
sprintf(buf,"log-2-%d",ng);
fp = fopen(buf,"r");
int cnt=0;
while(fgets(buf,MAXLINE,fp) != NULL) {
int len = strlen(buf);
buf[len-1]=0;
int key = getKey(buf);
Node *ptr = head[key];
while(ptr && strcmp(ptr->str, buf))
ptr = ptr->nxt;
if(ptr != NULL){
cnt++;
fprintf(result,"%s\n",ptr->str);
}
}
fclose(fp);
int i;
for(i=0;i<MOD;i++) {
Node *ptr=head[i],*t;
while(ptr) {
t=ptr;
ptr = ptr->nxt;
free(t);
}
head[i] = NULL;
}
return cnt;
}
int main(int argc, char **argv)
{
if(argc != 3)
{
fprintf(stderr,"usage: %s log1 log2\n",argv[0]);
return 0;
}
int i;
for(i=0;i<MOD;i++)
head[i]=NULL;
split(argv[1],1);
split(argv[2],2);
FILE *result = fopen("common_names","w");
int cnt = 0;
for(i=0;i<N_GROUP;i++)
cnt += find(i, result);
fprintf(result,"total = %d\n",cnt);
fclose(result);
char buf[MAXLINE];
for(i=0;i<N_GROUP;i++) {
sprintf(buf,"log-1-%d",i);
unlink(buf);
sprintf(buf,"log-2-%d",i);
unlink(buf);
}
return 0;
}
#include <stdio.h>
#include <stdlib.h>
int main(int argc,char **argv)
{
if(argc<3)
{
fprintf(stderr,"usage:%s <number> <length>\n",argv[0]);
return 0;
}
FILE *fp=fopen("data.in","a");
srand(time(NULL));
int i,j;
int n=atoi(argv[1]);
int len=atoi(argv[2]);
for(i=0;i<n;i++)
{
for(j=0;j<len;j++)
fputc('a'+rand()%26,fp);
fputc(10,fp);
}
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment