Skip to content

Instantly share code, notes, and snippets.

@allbinmani
Last active April 17, 2018 11:02
Show Gist options
  • Save allbinmani/c1d5362cb88fb354eb9a4093b18853ca to your computer and use it in GitHub Desktop.
Save allbinmani/c1d5362cb88fb354eb9a4093b18853ca to your computer and use it in GitHub Desktop.
Markov in Java
import java.nio.file.Path;
import java.util.List;
import java.util.Random;
import java.util.ArrayList;
public class Markov {
/** Regular expression for breaking up words. */
private static final String WORD_REGEX = "(?<=\\w\\W)";
/** Regular expression for getting individual characters. */
private static final String CHAR_REGEX = "(?<=.)";
public static void main(String[] args) {
StringChain sc = new StringChain(2);
List<String> words = new ArrayList<String>();
words.add("markov");
words.add("chain");
words.add("how");
words.add("are");
words.add("markov");
words.add("chains");
words.add("built");
words.add("you");
words.add("ask?");
words.add("later");
words.add("markov");
words.add("chains");
words.add("will");
words.add("predict");
words.add("text");
sc.addItems(words.iterator());
System.out.println( String.join(" ", sc.generate(10, new Random()) ) );
}
private static void addFile(StringChain chain, String regex, Path file) {
}
}
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Random;
import java.util.stream.Collectors;
public class StringChain {
private HashMap<String, List<Suffix>> map;
private int ORDER;
public StringChain(int order){
ORDER = order;
map = new HashMap<String, List<Suffix>>();
}
class Prefix {
public String prefix;
Prefix(List<String> pfx) {
prefix = String.join("|+|", pfx);
}
}
class Suffix {
public String suffix;
public int count;
Suffix(String sfx) {
suffix = sfx;
count = 1;
}
void increaseCount() {
count++;
}
}
private List<String> initialPrefix() {
ArrayList<String> prefix = new ArrayList<String>(ORDER);
for(int i=0; i < ORDER; i++) {
prefix.add("<BLANK>");
}
return prefix;
}
public void addItems(Iterator<String>itemIter) {
List<String> prefix = initialPrefix();
while(itemIter.hasNext()) {
String suffix = itemIter.next();
Prefix pfx = new Prefix(prefix);
List<Suffix> sfxs = map.get(pfx.prefix);
if(sfxs == null) {
sfxs = new ArrayList<Suffix>();
map.put(pfx.prefix, sfxs);
sfxs.add(new Suffix(suffix));
} else {
// Suffix exists?
Suffix existing = null;
for(int i=0; i < sfxs.size(); i++) {
if(sfxs.get(i).suffix.equals(suffix)) {
existing = sfxs.get(i);
existing.increaseCount();
break;
}
}
if(existing == null) {
sfxs.add(new Suffix(suffix));
}
}
prefix.remove(0);
prefix.add(suffix);
}
}
public List<String> generate(int n, Random rand) {
List<String> prefix = initialPrefix();
List<String> output = new ArrayList<String>(n);
for(int i=0; i < n; i++) {
String next = null;
Prefix pfx = new Prefix(prefix);
List<Suffix> sfxs = map.get(pfx.prefix);
if(sfxs == null) {
next = "<BLANK>";
} else {
// Pick from sfxs
float sum=0;
float r = rand.nextFloat();
int picked = 0;
for(int j=0; j < sfxs.size(); j++) {
Suffix s = sfxs.get(j);
sum += s.count;
}
for(int j=0; j < sfxs.size(); j++) {
Suffix s = sfxs.get(j);
float prob = (float)s.count / sum;
if(prob > r) {
picked = j;
break;
}
}
next = sfxs.get(picked).suffix;
}
if(next != null) {
output.add(next);
prefix.remove(0);
prefix.add(next);
}
}
return output;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment