Skip to content

Instantly share code, notes, and snippets.

@justinbowes
Last active August 29, 2015 14:02
Show Gist options
  • Save justinbowes/ed4667adf47bf6ca615e to your computer and use it in GitHub Desktop.
Save justinbowes/ed4667adf47bf6ca615e to your computer and use it in GitHub Desktop.
Markov chain random name generator. Takes a string of one name per line and generates new names. Has some dependencies on the rest of XPL, but it should be pretty straightforward to fix most of them (tips in the comments).
// xpl/res/markov.c. ©2014 Informi Software Inc. Use and modify this file for any purpose without attribution.
#include <ut/uthash.h> // get from https://github.com/troydhanson/uthash
#include <xpl/mem/memory.h> // you can use calloc(sizeof(type), 1) instead of xpl_calloc_type(type),
// strdup instead of xpl_strdup, free instead of xpl_free, etc.
#include "markov.h"
typedef struct link_entry {
char suffix;
size_t frequency;
UT_hash_handle hh;
} link_entry_t;
typedef struct prefix_entry {
char *prefix;
bool novel;
size_t total;
link_entry_t *link_table;
UT_hash_handle hh;
} prefix_entry_t;
struct xpl_markov {
size_t prefix_length;
prefix_entry_t *prefix_table;
};
static void prefix_add_suffix(prefix_entry_t *prefix, const char suffix) {
assert(prefix);
link_entry_t *existing_link;
HASH_FIND(hh, prefix->link_table, &suffix, sizeof(suffix), existing_link);
if (!existing_link) {
// Create and insert
existing_link = xpl_calloc_type(link_entry_t);
existing_link->suffix = suffix;
HASH_ADD(hh, prefix->link_table, suffix, sizeof(suffix), existing_link);
}
++existing_link->frequency;
++prefix->total;
}
static char prefix_get_random_suffix(prefix_entry_t *prefix, const float random_value) {
assert(prefix);
float rf = random_value - truncf(random_value);
size_t index = (size_t)(rf * prefix->total);
size_t index_accum = 0;
link_entry_t *el, *tmp;
HASH_ITER(hh, prefix->link_table, el, tmp) {
index_accum += el->frequency;
if (index_accum > index) return el->suffix;
}
if (!el) return '\0';
return el->suffix;
}
static bool is_delimiter(const char *delimiters, char c) {
return strchr(delimiters, c);
}
xpl_markov_t*xpl_markov_new(const char *data_source, size_t data_length, const char *delimiters, size_t prefix_length) {
xpl_markov_t *markov = xpl_calloc_type(xpl_markov_t);
char current_prefix[prefix_length + 1];
memset(current_prefix, 0, prefix_length + 1);
markov->prefix_length = prefix_length;
const char *data = data_source;
char c;
const char *end = data + data_length;
bool next_is_novel = false;
while (data < end) {
c = (char)tolower(*data);
if (is_delimiter(delimiters, c)) c = '\0';
// Prefix long enough?
if (strlen(current_prefix) == prefix_length) {
prefix_entry_t *existing_prefix;
HASH_FIND(hh, markov->prefix_table, &current_prefix, prefix_length, existing_prefix);
if (!existing_prefix) {
existing_prefix = xpl_calloc_type(prefix_entry_t);
existing_prefix->prefix = xpl_strdup(current_prefix);
existing_prefix->novel = next_is_novel;
HASH_ADD_KEYPTR(hh, markov->prefix_table, existing_prefix->prefix, prefix_length, existing_prefix);
}
prefix_add_suffix(existing_prefix, c);
next_is_novel = true;
}
if (c == '\0') {
// clear prefix
memset(current_prefix, 0, prefix_length + 1);
next_is_novel = false;
} else {
memmove(current_prefix, current_prefix + 1, prefix_length - 1);
current_prefix[prefix_length - 1] = c;
}
++data;
}
return markov;
}
size_t xpl_markov_generate_from_prefix(xpl_markov_t *self,
xpl_markov_random_source *frng,
void *frng_data,
const char *prefix,
char *out,
size_t out_length) {
char *current_prefix = out;
memset(out, 0, out_length);
strncat(out, prefix, strlen(out) - out_length - 1);
prefix_entry_t *existing_prefix;
do {
HASH_FIND(hh, self->prefix_table, current_prefix, self->prefix_length, existing_prefix);
if (!existing_prefix) {
if (strlen(out) == self->prefix_length) out[0] = '\0';
return strlen(out);
}
float rf = frng(frng_data);
char append = prefix_get_random_suffix(existing_prefix, rf);
current_prefix[self->prefix_length] = append;
if (append == '\0') return strlen(out);
++current_prefix;
} while (strlen(out) + 1 < out_length);
return strlen(out);
}
size_t xpl_markov_generate_random(xpl_markov_t *self,
xpl_markov_random_source *frng,
void *frng_data,
bool allow_novel_prefix,
char *out,
size_t out_length) {
float rf;
do {
rf = frng(frng_data);
} while (rf < 0.f || rf >= 1.f);
size_t index = (size_t)(rf * HASH_COUNT(self->prefix_table));
prefix_entry_t *el, *tmp;
size_t i = 0;
HASH_ITER(hh, self->prefix_table, el, tmp) {
if (i == index) {
if ((strchr(el->prefix, ' ') != NULL) || (el->novel && !allow_novel_prefix))
// recurse and pick a different prefix
return xpl_markov_generate_random(self, frng, frng_data, allow_novel_prefix, out, out_length);
return xpl_markov_generate_from_prefix(self, frng, frng_data, el->prefix, out, out_length);
}
++i;
}
assert(false);
out[0] = '\0';
return 0;
}
void xpl_markov_destroy(xpl_markov_t **m) {
assert(m && *m);
xpl_markov_t *self = *m;
prefix_entry_t *pel, *ptmp;
HASH_ITER(hh, self->prefix_table, pel, ptmp) {
link_entry_t *lel, *ltmp;
HASH_ITER(hh, pel->link_table, lel, ltmp) {
HASH_DEL(pel->link_table, lel);
xpl_free(lel);
}
HASH_DEL(self->prefix_table, pel);
xpl_free(pel->prefix);
xpl_free(pel);
}
}
// xpl/res/markov.c. ©2014 Informi Software Inc. Use and modify this file for any purpose without attribution.
#ifndef xpl_res_markov.h
#define xpl_res_markov.h
#include <xpl/platform/platform.h> // includes <sttdef.h>, <stdint.h>, <string.h>, ...
typedef struct xpl_markov xpl_markov_t;
typedef float xpl_markov_random_source (void *); // a function that is passed whatever you supplied as frng_data and
// returns a random float between 0 and 1
xpl_markov_t *xpl_markov_new(const char *data_source, size_t data_length, const char *delimiters, size_t prefix_length);
size_t xpl_markov_generate_from_prefix(xpl_markov_t *self,
xpl_markov_random_source *frng,
void *frng_data,
const char *prefix,
char *out,
size_t out_length);
size_t xpl_markov_generate_random(xpl_markov_t *self,
xpl_markov_random_source *frng,
void *frng_data,
bool allow_novel_prefix,
char *out,
size_t out_length);
void xpl_markov_destroy(xpl_markov_t **m);
#endif
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment