Skip to content

Instantly share code, notes, and snippets.

@anastasop
Created November 20, 2012 14:30
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save anastasop/4118242 to your computer and use it in GitHub Desktop.
Save anastasop/4118242 to your computer and use it in GitHub Desktop.
A toy program that generates sentences in english where the first word in one letter long, the second word 2 letters and so on.
/*
An Amazing sentence in English from
https://plus.google.com/photos/117176908342196183611/albums/5812811361700087857/5812811362489240738
> I do not know where family doctors acquired illegibly
> perplexing handwriting; nevertheless, extraordinary
> pharmaceutical intellectuality counterbalancing
> indecipherability transendentalizes intercommunication's
> incomprehensibleness
The first word is one letter long, the second word is two letters,
the third word three letters long and it goes on like this
until the twentieth word
This is a java program that tries to generate such sentences.
It read a large text file and it builds a graph, where the nodes
are words and edges go from a word to another iff i) the second word
has exactly one letter more and ii) there is a place in the text file
that it follows the first word.
After constructing the graph it traverses it to generate sentences.
This version generates all possible sentences, which is a large amount
of data. Maybe i should add a bit randomness in it to make it more
practical.
I tried it with the Odyssey and the Bible from the project gutenberg.
The best i could get was 9/10-word sentences, readable but pretty meaningless
Odyssey: I to and till night sprang towards Penelope therefore
Bible: O ye his seed shall thrust another brethren therefore understand
Enjoy
Spyros http://twitter.com/anastasop
*/
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
class Word {
String word;
Set<Word> followers = new HashSet<Word>();
Word(String s) {
word = s;
}
void addFollower(Word w) {
followers.add(w);
}
int length() {
return word.length();
}
@Override
public int hashCode() {
return word.hashCode();
}
@Override
public boolean equals(Object obj) {
return word.equals(obj);
}
}
public class IncreasingBlabla {
public static void main(String[] args) {
if (args.length != 2) {
System.err.println("usage: blabla <min sentence len> <file>");
System.exit(2);
}
int minLength = Integer.valueOf(args[0]);
String text = "";
try {
File f = new File(args[1]);
byte[] content = new byte[(int)f.length()];
InputStream ist = new FileInputStream(f);
ist.read(content); // lazy, but it's OK for local files
ist.close();
text = new String(content, "UTF-8");
} catch (Exception e) {
System.err.println("error: " + e.getMessage());
System.exit(2);
}
Map<String, Word> words = new HashMap<String, Word>();
Word start = new Word("");
words.put("", start);
Word prevWord = start;
String[] textTokens = text.split("\\s");
for (String textToken: textTokens) {
String[] wordTokens = textToken.split("\\p{Punct}");
for (String wordToken: wordTokens) {
Word currWord = words.get(wordToken);
if (currWord == null) {
currWord = new Word(wordToken);
words.put(wordToken, currWord);
}
if (currWord.length() - prevWord.length() == 1) {
prevWord.addFollower(currWord);
}
prevWord = currWord;
}
}
for (Word startWord: words.values()) {
if (startWord.length() == 1) {
traverseGraph(startWord, minLength, new ArrayList<Word>());
}
}
}
static void traverseGraph(Word w, int minLength, List<Word> currSentence) {
currSentence.add(w);
if (w.followers.size() == 0) {
if (currSentence.size() >= minLength) {
for (Word cw: currSentence) {
System.out.print(cw.word);
System.out.print(" ");
}
System.out.println("");
}
} else {
for (Word nextWord: w.followers) {
traverseGraph(nextWord, minLength, currSentence);
}
}
currSentence.remove(currSentence.size() - 1);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment