Skip to content

Instantly share code, notes, and snippets.

@edma2
Created October 22, 2011 20:09
Show Gist options
  • Save edma2/1306431 to your computer and use it in GitHub Desktop.
Save edma2/1306431 to your computer and use it in GitHub Desktop.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
enum {
NPREF = 2, /* Number of prefix words */
NHASH = 4093, /* Size of hash table */
MAXGEN = 10000 /* Maximum words generated */
};
char NONWORD[] = "\n";
typedef struct Suffix Suffix;
struct Suffix {
char *word;
Suffix *next;
};
typedef struct State State;
struct State {
char *pref[NPREF];
Suffix *suf;
State *next;
};
typedef struct String String;
struct String {
char *str;
String *next;
};
/* Hash tables */
State *statetab[NHASH];
String *strtab[MAXGEN];
unsigned int hash(char *s[], int n);
State *lookup(char *prefix[NPREF], int create);
void build(char *prefix[NPREF], FILE *f);
void add(char *prefix[NPREF], char *suffix);
void addsuffix(State *sp, char *suffix);
void generate(int nwords);
char *insert(char *str);
void cleanup(void);
/*
* Insert string into hash table if not already present.
* Return address of existing string or newly created string.
*/
char *insert(char *str) {
String *s;
int h;
h = hash(&str, 1);
for (s = strtab[h]; s != NULL; s = s->next) {
if (strcmp(s->str, str) == 0) /* found */
return s->str;
}
s = malloc(sizeof(String));
s->str = strdup(str);
s->next = strtab[h];
strtab[h] = s;
return s->str;
}
/* Return the hash value of an array of n strings */
unsigned int hash(char *s[], int n) {
enum { MULTIPLIER = 37 };
unsigned int h;
unsigned char *p;
int i;
h = 0;
for (i = 0; i < n; i++) {
for (p = (unsigned char *)s[i]; *p != '\0'; p++)
h = MULTIPLIER * h + *p;
}
return h % NHASH;
}
State *lookup(char *prefix[NPREF], int create) {
int i, h;
State *sp;
h = hash(prefix, NPREF);
for (sp = statetab[h]; sp != NULL; sp = sp->next) {
for (i = 0; i < NPREF; i++) {
/*
* The hash table for strings gaurantees that identical
* strings are stored in the same memory location, thus
* a direct comparison is not only correct, but also
* faster.
*/
if (prefix[i] != sp->pref[i])
break;
}
if (i == NPREF)
return sp;
}
if (create) {
sp = (State *)malloc(sizeof(State));
for (i = 0; i < NPREF; i++)
sp->pref[i] = prefix[i];
sp->suf = NULL;
sp->next = statetab[h];
statetab[h] = sp;
}
return sp;
}
void build(char *prefix[NPREF], FILE *f) {
char buf[100], fmt[10];
char *str;
/* e.g. fmt = "%99s" */
sprintf(fmt, "%%%ds", sizeof(buf)-1);
while (fscanf(f, fmt, buf) != EOF) {
str = insert(buf);
add(prefix, str);
}
}
void add(char *prefix[NPREF], char *suffix) {
State *sp;
sp = lookup(prefix, 1);
addsuffix(sp, suffix);
memmove(prefix, prefix+1, (NPREF-1)*sizeof(prefix[0]));
prefix[NPREF-1] = suffix;
}
void addsuffix(State *sp, char *suffix) {
Suffix *suf;
suf = (Suffix *) malloc(sizeof(Suffix));
suf->word = suffix;
suf->next = sp->suf;
sp->suf = suf;
}
void generate(int nwords) {
State *sp;
Suffix *suf;
char *prefix[NPREF], *w;
int i, nmatch;
int r;
for (i = 0; i < NPREF; i++)
prefix[i] = NONWORD;
for (i = 0; i < nwords; i++) {
sp = lookup(prefix, 0);
nmatch = 0;
for (suf = sp->suf; suf != NULL; suf = suf->next) {
if ((r = rand()) % ++nmatch == 0)
w = suf->word;
}
if (strcmp(w, NONWORD) == 0)
break;
printf("%s\n", w);
memmove(prefix, prefix+1, (NPREF-1)*sizeof(prefix[0]));
prefix[NPREF-1] = w;
}
}
void cleanup(void) {
State *sp;
Suffix *suf;
String *str;
void *tmp;
int i;
for (i = 0; i < NHASH; i++) {
for (sp = statetab[i]; sp != NULL; ) {
for (suf = sp->suf; suf != NULL; ) {
tmp = suf->next;
free(suf);
suf = (Suffix *)tmp;
}
tmp = sp->next;
free(sp);
sp = (State *)tmp;
}
}
for (i = 0; i < MAXGEN; i++) {
for (str = strtab[i]; str != NULL; ) {
free(str->str);
tmp = str->next;
free(str);
str = (String *)tmp;
}
}
}
int main(void) {
int i, nwords = MAXGEN;
char *prefix[NPREF];
for (i = 0; i < NPREF; i++)
prefix[i] = NONWORD;
build(prefix, stdin);
add(prefix, NONWORD);
generate(nwords);
cleanup();
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment