Skip to content

Instantly share code, notes, and snippets.

@JarrettBillingsley
Created November 18, 2012 10:47
Show Gist options
  • Save JarrettBillingsley/4104522 to your computer and use it in GitHub Desktop.
Save JarrettBillingsley/4104522 to your computer and use it in GitHub Desktop.
module anagramgen;
import tango.core.Array;
import Path = tango.io.Path;
import tango.io.Stdout;
import tango.io.stream.DataFile;
import tango.io.stream.TextFile;
const ulong[] primes = [2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97, 101];
const Min = 12;
const Max = 30;
ulong toRepr(char[] w)
{
ulong ret = 1;
foreach(c; w)
ret *= primes[c - 'a'];
return ret;
}
struct Word
{
char[6] buf;
size_t len;
ulong repr;
static Word opCall(char[] w)
{
Word ret = void;
ret.buf[0 .. w.length] = w;
ret.len = w.length;
ret.repr = toRepr(w);
return ret;
}
char[] w()
{
return buf[0 .. len];
}
}
Word[] readWords(char[] file)
{
Word[] ret;
scope f = new TextFileInput(file);
foreach(line; f)
ret ~= Word(line);
return ret;
}
size_t[][ulong] findAnagrams(Word[] words)
{
size_t[Max] anagrams;
size_t anidx = 0;
bool addAn(size_t a)
{
if(anidx >= anagrams.length)
return false;
anagrams[anidx++] = a;
return true;
}
size_t[][ulong] table;
mainLoop: foreach(ref w; words)
{
if(w.len < 6 || w.repr in table)
continue;
anidx = 0;
foreach(i, ref other; words)
if(w.repr % other.repr == 0)
if(!addAn(i))
continue mainLoop;
if(anidx > Min)
table[w.repr] = anagrams[0 .. anidx].dup;
}
return table;
}
bool[] haveGarbage(Word[] words, size_t[][ulong] table)
{
auto ret = new bool[](words.length);
foreach(anagram; table)
foreach(w; anagram)
ret[w] = true;
if(ret.find(false) == ret.length)
return null;
else
return ret;
}
void writeOptimizedWordlist(Word[] words, bool[] used, char[] filename)
{
scope outFile = new TextFileOutput(filename);
bool first = true;
foreach(i, flag; used)
{
if(flag)
{
if(first)
first = false;
else
outFile.newline;
outFile(words[i].w);
}
}
outFile.flush().close();
}
void writeDataFile(Word[] words, size_t[][ulong] table, char[] filename)
{
scope outFile = new DataFileOutput(filename);
// char[] table
outFile.int16(cast(short)words.length);
foreach(ref w; words)
{
outFile.int8(cast(byte)w.w.length);
outFile.write(w.w);
}
// Anagram table
outFile.int16(cast(short)table.length);
foreach(anagram; table)
{
outFile.int8(cast(byte)anagram.length);
foreach(idx; anagram)
outFile.int16(cast(short)idx);
}
outFile.flush().close();
}
void main()
{
char[] inFile = "anagrams.txt";
Path.PathParser pp;
pp.parse(inFile);
auto words = readWords(inFile);
auto table = findAnagrams(words);
if(auto used = haveGarbage(words, table))
{
Stdout.formatln("File {} has unused words. Optimizing...", inFile).flush();
auto outFileName = pp.path ~ pp.name ~ "_opt" ~ pp.suffix;
writeOptimizedWordlist(words, used, outFileName);
words = readWords(outFileName);
table = findAnagrams(words);
}
else
Stdout.formatln("File {} is already optimal.", inFile).flush();
Stdout.formatln("Writing data file...").flush();
writeDataFile(words, table, pp.path ~ pp.name ~ ".dat");
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment