Created
October 22, 2011 20:09
-
-
Save edma2/1306431 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
enum { | |
NPREF = 2, /* Number of prefix words */ | |
NHASH = 4093, /* Size of hash table */ | |
MAXGEN = 10000 /* Maximum words generated */ | |
}; | |
char NONWORD[] = "\n"; | |
typedef struct Suffix Suffix; | |
struct Suffix { | |
char *word; | |
Suffix *next; | |
}; | |
typedef struct State State; | |
struct State { | |
char *pref[NPREF]; | |
Suffix *suf; | |
State *next; | |
}; | |
typedef struct String String; | |
struct String { | |
char *str; | |
String *next; | |
}; | |
/* Hash tables */ | |
State *statetab[NHASH]; | |
String *strtab[MAXGEN]; | |
unsigned int hash(char *s[], int n); | |
State *lookup(char *prefix[NPREF], int create); | |
void build(char *prefix[NPREF], FILE *f); | |
void add(char *prefix[NPREF], char *suffix); | |
void addsuffix(State *sp, char *suffix); | |
void generate(int nwords); | |
char *insert(char *str); | |
void cleanup(void); | |
/* | |
* Insert string into hash table if not already present. | |
* Return address of existing string or newly created string. | |
*/ | |
char *insert(char *str) { | |
String *s; | |
int h; | |
h = hash(&str, 1); | |
for (s = strtab[h]; s != NULL; s = s->next) { | |
if (strcmp(s->str, str) == 0) /* found */ | |
return s->str; | |
} | |
s = malloc(sizeof(String)); | |
s->str = strdup(str); | |
s->next = strtab[h]; | |
strtab[h] = s; | |
return s->str; | |
} | |
/* Return the hash value of an array of n strings */ | |
unsigned int hash(char *s[], int n) { | |
enum { MULTIPLIER = 37 }; | |
unsigned int h; | |
unsigned char *p; | |
int i; | |
h = 0; | |
for (i = 0; i < n; i++) { | |
for (p = (unsigned char *)s[i]; *p != '\0'; p++) | |
h = MULTIPLIER * h + *p; | |
} | |
return h % NHASH; | |
} | |
State *lookup(char *prefix[NPREF], int create) { | |
int i, h; | |
State *sp; | |
h = hash(prefix, NPREF); | |
for (sp = statetab[h]; sp != NULL; sp = sp->next) { | |
for (i = 0; i < NPREF; i++) { | |
/* | |
* The hash table for strings gaurantees that identical | |
* strings are stored in the same memory location, thus | |
* a direct comparison is not only correct, but also | |
* faster. | |
*/ | |
if (prefix[i] != sp->pref[i]) | |
break; | |
} | |
if (i == NPREF) | |
return sp; | |
} | |
if (create) { | |
sp = (State *)malloc(sizeof(State)); | |
for (i = 0; i < NPREF; i++) | |
sp->pref[i] = prefix[i]; | |
sp->suf = NULL; | |
sp->next = statetab[h]; | |
statetab[h] = sp; | |
} | |
return sp; | |
} | |
void build(char *prefix[NPREF], FILE *f) { | |
char buf[100], fmt[10]; | |
char *str; | |
/* e.g. fmt = "%99s" */ | |
sprintf(fmt, "%%%ds", sizeof(buf)-1); | |
while (fscanf(f, fmt, buf) != EOF) { | |
str = insert(buf); | |
add(prefix, str); | |
} | |
} | |
void add(char *prefix[NPREF], char *suffix) { | |
State *sp; | |
sp = lookup(prefix, 1); | |
addsuffix(sp, suffix); | |
memmove(prefix, prefix+1, (NPREF-1)*sizeof(prefix[0])); | |
prefix[NPREF-1] = suffix; | |
} | |
void addsuffix(State *sp, char *suffix) { | |
Suffix *suf; | |
suf = (Suffix *) malloc(sizeof(Suffix)); | |
suf->word = suffix; | |
suf->next = sp->suf; | |
sp->suf = suf; | |
} | |
void generate(int nwords) { | |
State *sp; | |
Suffix *suf; | |
char *prefix[NPREF], *w; | |
int i, nmatch; | |
int r; | |
for (i = 0; i < NPREF; i++) | |
prefix[i] = NONWORD; | |
for (i = 0; i < nwords; i++) { | |
sp = lookup(prefix, 0); | |
nmatch = 0; | |
for (suf = sp->suf; suf != NULL; suf = suf->next) { | |
if ((r = rand()) % ++nmatch == 0) | |
w = suf->word; | |
} | |
if (strcmp(w, NONWORD) == 0) | |
break; | |
printf("%s\n", w); | |
memmove(prefix, prefix+1, (NPREF-1)*sizeof(prefix[0])); | |
prefix[NPREF-1] = w; | |
} | |
} | |
void cleanup(void) { | |
State *sp; | |
Suffix *suf; | |
String *str; | |
void *tmp; | |
int i; | |
for (i = 0; i < NHASH; i++) { | |
for (sp = statetab[i]; sp != NULL; ) { | |
for (suf = sp->suf; suf != NULL; ) { | |
tmp = suf->next; | |
free(suf); | |
suf = (Suffix *)tmp; | |
} | |
tmp = sp->next; | |
free(sp); | |
sp = (State *)tmp; | |
} | |
} | |
for (i = 0; i < MAXGEN; i++) { | |
for (str = strtab[i]; str != NULL; ) { | |
free(str->str); | |
tmp = str->next; | |
free(str); | |
str = (String *)tmp; | |
} | |
} | |
} | |
int main(void) { | |
int i, nwords = MAXGEN; | |
char *prefix[NPREF]; | |
for (i = 0; i < NPREF; i++) | |
prefix[i] = NONWORD; | |
build(prefix, stdin); | |
add(prefix, NONWORD); | |
generate(nwords); | |
cleanup(); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment