Skip to content

Instantly share code, notes, and snippets.

@aphyr
Created August 8, 2016 20:48
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save aphyr/ed39e43dda1b1bf76e0ee46ecd8b21a6 to your computer and use it in GitHub Desktop.
Save aphyr/ed39e43dda1b1bf76e0ee46ecd8b21a6 to your computer and use it in GitHub Desktop.
import java.io.File;
import java.io.IOException;
import java.util.Random;
import java.util.Scanner;
/**
* Boggle.
*
* @author Kyle Kingsbury
*
*/
public class Boggle {
public static int DEFAULT_SIZE = 8; // Default dimensions of grid
public static int MIN_PREFIX_LENGTH = 3; // Smallest number of characters
// in a prefix tree
public static int MIN_WORD_LENGTH = 3; // Lowest number of letters in a
// word.
public static int MAX_WORD_LENGTH = 64; // Highest number of letters in a
// match.
public static int INITIAL_WORD_TARGET_LENGTH = 7; // Reasonable standard
// for
// exhaustive search
private int size; // Dimensions of grid
private Cell[][] grid; // Boggle grid
private Random generator = new Random();
/**
* Class Cell represents a character Cell in the grid.
*/
private class Cell {
boolean visited;
char c;
public Cell(char c) {
this.c = c;
visited = false;
}
public String toString() {
return Character.toString(c);
}
}
/**
* Class Dictionary contains references for checking the word status of a
* given target
*/
private static class Dictionary {
public BinarySearchTree index;
/**
* Prefix is an array of binary search trees where the tree located at
* element i in the array contains all substrings of length i at index 0
* in the dictionary file. Hence, the word 'catch' is stored in [1] as
* 'c', [2] as 'ca', [3] as 'cat', etc. This allows the search function
* to abort search paths which can never result in words.
*/
public BinarySearchTree[] prefix;
/**
* Constructs a dictionary
*
* @param file File to parse
* @throws IOException If file can't be read.
*/
public Dictionary(File file) throws IOException {
index = new BinarySearchTree();
prefix = new BinarySearchTree[MAX_WORD_LENGTH];
// Initialize prefix BSTs
for (int i = 0; i < prefix.length; i++) {
prefix[i] = new BinarySearchTree();
}
String word;
Scanner scanner = new Scanner(file);
scanner.useDelimiter("[\\n]"); // Separate tokens using newlines
// Add words to indexes
word: while (scanner.hasNext()) {
// Get word
word = scanner.next();
if (word.length() > MAX_WORD_LENGTH) {
// Word is too long, skip
continue word;
}
// Lowercase
word = word.toLowerCase();
// Add word to primary dictionary
index.add(word);
// Add word prefixes to prefix dictionaries
for (int i = MIN_PREFIX_LENGTH; i < prefix.length && i < word.length(); i++) {
prefix[i].add(word.substring(0, i));
}
}
}
}
/**
* Creates a new boggle board of the default dimensions, filled with random
* characters.
*
*/
public Boggle() {
generator = new Random();
size = DEFAULT_SIZE;
grid = new Cell[size][size];
for (int i = 0; i < grid.length; i++) {
for (int j = 0; j < grid[i].length; j++) {
grid[i][j] = new Cell(randomChar());
}
}
}
/**
* Searches the board for matches in the given Dictionary. Prints matches as
* they are found.
*
* @param dictionary The dictionary to check against.
* @return A tree of found words.
*/
public BinarySearchTree findWords(Dictionary dictionary) {
BinarySearchTree matches = new BinarySearchTree();
StringBuffer charBuffer = new StringBuffer();
for (int i = 0; i < size; i++) {
for (int j = 0; j < size; j++) {
findWords(dictionary, matches, i, j, charBuffer);
}
}
return matches;
}
/**
* Searches the board for matches in the given BinarySearchTree. Prints
* matches as they are found.
*
* Note that using class Dictionary is preferred instead.
*
* @param index The dictionary to check against.
* @return A tree of found words.
*/
public BinarySearchTree findWords(BinarySearchTree index) {
BinarySearchTree matches = new BinarySearchTree();
StringBuffer charBuffer = new StringBuffer();
/*
* It's possible to perform an exhaustive search for all words in the
* board of a certain length or below. Searches for candidate words at
* greater depths take exponentially larger numbers of recursive calls.
* Hence, we search exhaustively below a certain target depth. When that
* search has completed, we incrementally search for longer words.
*/
int min_word_length = MIN_WORD_LENGTH;
// Start with lower-length words, and then work upwards
for (int max_word_length = INITIAL_WORD_TARGET_LENGTH; max_word_length <= MAX_WORD_LENGTH; max_word_length++) {
for (int i = 0; i < size; i++) {
for (int j = 0; j < size; j++) {
findWords(index, matches, min_word_length, max_word_length,
i, j, charBuffer);
}
}
// Having completed the exhaustive pass, we set min_target_depth to
// max_target_depth to avoid repeat word checks
if (min_word_length == 0) {
min_word_length = max_word_length;
}
min_word_length++;
}
return matches;
}
/**
* Recursively searches a Cell for words
*
* @param dictionary Dictionary of valid words
* @param matches Words found in the board
* @param i Vertical index of word in board
* @param j Horizontal index of word in board
* @param charBuffer Characters this method has traversed
*/
private void findWords(Dictionary dictionary, BinarySearchTree matches,
int i, int j, StringBuffer charBuffer) {
// Whether or not this is a valid beginning to a word
boolean valid_word_prefix = true;
// Add this cell to character buffer
charBuffer.append(grid[i][j].c);
// Mark cell as visited
grid[i][j].visited = true;
// Set word
String word = new String(charBuffer);
// Word candidate
if (charBuffer.length() >= MIN_WORD_LENGTH
&& charBuffer.length() <= MAX_WORD_LENGTH) {
// The number of characters we have is within the acceptable range
// for a word.
// Check word for word-i-ness.
if (dictionary.index.find(word)) {
// This is a word
if (matches.add(word)) {
// We haven't seen this word before
System.out.println(matches.size() + ": " + word);
}
}
}
/*
* Check word for word-ability Essentially, we look to see if this word
* *might* be a word later by checking if it exists in a prefix tree,
* built from progressively larger initial substrings of words.
*/
if (charBuffer.length() >= MIN_PREFIX_LENGTH
&& charBuffer.length() < MAX_WORD_LENGTH) {
// System.out.println("Testing word " + word + " of length "
// + charBuffer.length() + " for prefix.");
// We can test this word to see if it's a prefix
if (!dictionary.prefix[charBuffer.length()].find(word)) {
// This is not a valid word prefix
// System.out.println("Not a potential word");
valid_word_prefix = false;
}
}
if (valid_word_prefix && charBuffer.length() < MAX_WORD_LENGTH) {
// This is a valid word prefix (will be a word later), and adding
// another letter will not exceed the target maximum word length.
// Check recursively.
checkI: for (int iTarget = i - 1; iTarget <= i + 1; iTarget++) {
if (iTarget < 0 || iTarget >= size) {
// index iTarget is out of bounds
continue checkI;
}
checkJ: for (int jTarget = j - 1; jTarget <= j + 1; jTarget++) {
if (jTarget < 0 || jTarget >= size) {
// index jTarget is out of bounds
continue checkJ;
}
if (grid[iTarget][jTarget].visited == false) {
// Target Cell has not been visited
findWords(dictionary, matches, iTarget, jTarget, charBuffer);
}
}
}
}
// Since we're returning control, this Cell is no longer visited
grid[i][j].visited = false;
// Remove Cell from character stack
charBuffer.deleteCharAt(charBuffer.length() - 1);
}
/**
* Recursively searches a Cell for words
*
* Note that using class Dictionary is preferred instead.
*
* @param index BinarySearchTree of valid words
* @param matches Words found in the board
* @param min_word_length Minimum size of a word candidate
* @param max_word_length Maximum size of a word candidate
* @param i Vertical index of word in board
* @param j Horizontal index of word in board
* @param charBuffer Characters this method has traversed
*/
private void findWords(BinarySearchTree index, BinarySearchTree matches,
int min_word_length, int max_word_length, int i, int j,
StringBuffer charBuffer) {
// Add this Cell to character buffer
charBuffer.append(grid[i][j].c);
// Mark Cell as visited
grid[i][j].visited = true;
// Word candidate
if (charBuffer.length() >= min_word_length
&& charBuffer.length() <= max_word_length) {
// The number of characters we have is within the acceptable range
// for a word.
// Get current word
String word = new String(charBuffer);
// Check word for word-i-ness.
if (index.find(word)) {
// This is a word
if (matches.add(word)) {
System.out.println(matches.size() + ": " + word);
}
}
}
if (charBuffer.length() < max_word_length) {
// Adding another letter will not exceed the target maximum word
// length. Check recursively.
checkI: for (int iTarget = i - 1; iTarget <= i + 1; iTarget++) {
if (iTarget < 0 || iTarget >= size) {
// index iTarget is out of bounds
continue checkI;
}
checkJ: for (int jTarget = j - 1; jTarget <= j + 1; jTarget++) {
if (jTarget < 0 || jTarget >= size) {
// index jTarget is out of bounds
continue checkJ;
}
if (grid[iTarget][jTarget].visited == false) {
// Target Cell has not been visited
findWords(index, matches, min_word_length,
max_word_length, iTarget, jTarget, charBuffer);
}
}
}
}
// Since we're returning control, this Cell is no longer visited
grid[i][j].visited = false;
// Remove Cell from character stack
charBuffer.deleteCharAt(charBuffer.length() - 1);
}
/**
* Returns a human-readable representation of the board.
*/
public String toString() {
String str = "|";
for (int j = 0; j < size * 4 - 1; j++) {
str = str + '-';
}
str = str + "|\n";
for (int i = 0; i < size; i++) {
str = str + "| ";
for (int j = 0; j < size; j++) {
str = str + grid[i][j].c;
str = str + " | ";
}
str = str + "\n|";
for (int j = 0; j < size * 4 - 1; j++) {
str = str + '-';
}
str = str + "|\n";
}
return str;
}
// Main methods
public static void main(String[] args) {
try {
System.out.println("Done.");
// Generate boggle board
Boggle board = new Boggle();
// Display board
System.out.println(board);
// Get dictionary
System.out.print("Building dictionary... ");
long startIndexTime = System.currentTimeMillis();
//BinarySearchTree index = buildIndex(new File(args[0]));
Dictionary dictionary = new Dictionary(new File(args[0]));
System.out.println("Done.");
// Search board with dictionary
long startSearchTime = System.currentTimeMillis();
//board.findWords(index);
board.findWords(dictionary);
long finishTime = System.currentTimeMillis();
System.out.println("Indexed dictionary in " + ((startSearchTime - startIndexTime) / 1000.0) + " seconds.");
System.out.println("Searched board in " + ((finishTime - startSearchTime) / 1000.0) + " seconds.");
System.out.println("Total running time: " + ((finishTime - startIndexTime) / 1000.0) + " seconds.");
} catch (IOException e) {
System.out.println("Not a dictionary file: " + args[0]);
return;
} catch (IndexOutOfBoundsException e) {
System.out.println("Usage: Boggle [dictionary]");
}
}
// Helper methods
/**
* Builds an index file from disk Note that using class Dictionary is
* preferred istead.
*/
private static BinarySearchTree buildIndex(File file) throws IOException {
BinarySearchTree index = new BinarySearchTree();
/*
* With help from Professor David Liben-Nowell, CS127
*/
Scanner scanner = new Scanner(file);
scanner.useDelimiter("[\\n]"); // Separate tokens using newlines
while (scanner.hasNext()) {
index.add(scanner.next().toLowerCase());
}
return index;
}
/**
* Returns a random lowercase character from a to z roughly matching the
* frequency distribution of english text.
*/
private char randomChar() {
float i = generator.nextInt(1000000);
i = i / 1000000;
if (i < .08167) {
return 'a';
}
if (i < .09659) {
return 'b';
}
if (i < .12441) {
return 'c';
}
if (i < .16694) {
return 'd';
}
if (i < .29396) {
return 'e';
}
if (i < .31624) {
return 'f';
}
if (i < .33639) {
return 'g';
}
if (i < .39733) {
return 'h';
}
if (i < .46699) {
return 'i';
}
if (i < .46852) {
return 'j';
}
if (i < .47624) {
return 'k';
}
if (i < .51649) {
return 'l';
}
if (i < .54055) {
return 'm';
}
if (i < .60804) {
return 'n';
}
if (i < .68311) {
return 'o';
}
if (i < .70240) {
return 'p';
}
if (i < .70335) {
return 'q';
}
if (i < .76322) {
return 'r';
}
if (i < .82649) {
return 's';
}
if (i < .91705) {
return 't';
}
if (i < .94463) {
return 'u';
}
if (i < .95441) {
return 'v';
}
if (i < .97801) {
return 'w';
}
if (i < .97951) {
return 'x';
}
if (i < .99925) {
return 'y';
}
if (i < 1) {
return 'z';
} else {
// Failsafe
return 'e';
}
}
}
/**
* Binary search tree for string terms
* Implements AVL balancing algorithm as described in
* Koffman and Wolfgang: Objects, Abstraction, Data Structures, and Design
*
* @author Kyle Kingsbury
*/
import java.io.*;
public class BinarySearchTree implements Serializable {
private static final long serialVersionUID = 2L;
protected Node root; // Root of the tree
private int size; // Number of elements in the tree;
// Recursive method persistance variables
// This is why methods should have multiple return values.
boolean add_success; // A node was successfully added to the tree
boolean add_height_increase; // The height of the tree has increased.
private class Node implements Serializable {
private static final long serialVersionUID = 2L;
public String term;
public int balance;
/*
* The balance of a node is the difference of the height of its
* sub-trees. If the left sub-tree is larger, balance is negative. If
* the right is larger, the balance is zero.
*/
public Node left;
public Node right;
/**
* Node constructor
*
* @param term Term this node represents
*/
public Node(String term) {
this.term = term;
balance = 0;
left = null;
right = null;
}
public String toString() {
String str = term + "(" + balance + ")";
if (left != null || right != null) {
str = str + "[";
if (left != null) {
str = str + left.toString();
}
str = str + ", ";
if (right != null) {
str = str + right.toString();
}
str = str + ']';
}
return str;
}
}
/**
* BinarySearchTree Constructor
*
*/
public BinarySearchTree() {
root = null;
size = 0;
}
/**
* Add term and index to tree. If term exists, adds the index to the
* existing term. If term does not exist, creates a new node.
*
* @param term The term
*/
public boolean add(String term) {
// Perform recursive search
add_success = false;
add_height_increase = false;
root = add(term, root);
if (add_success) {
size += 1;
}
return add_success;
}
/**
* Recursive helper for add
*
* @param term Term
* @param node Tree to add in.
* @return Root node of the updated subtree.
*/
private Node add(String term, Node node) {
// Base case: This node is undefined
if (node == null) {
// Add node
add_success = true;
return new Node(term);
}
// Compute relative location of term to this node
int bias = term.compareTo(node.term);
// Base case: This is the correct term.
if (bias == 0) {
// Node is already defined.
return node;
}
if (node.left == null && node.right == null) {
// The next recursion will create a new child and increase the
// height of this tree. Since there are no children, and we haven't
// returned already, the term does not exist in this tree and will
// be added.
add_height_increase = true;
}
// Recursive case: What subtree does the term belong in?
if (bias < 0) {
// Term is in the left subtree
// Add term to left subtree
node.left = add(term, node.left);
if (add_height_increase) {
// The left subtree has increased in height
node.balance -= 1;
if (node.balance <= -2) {
// Rebalance
if (node.left.balance >= 1) {
// CASE: left-right imbalance
// Adjust balances
if (node.left.right.balance <= -1) {
// CASE: left-right-left insertion
node.left.balance = 0;
node.left.right.balance = 0;
node.balance = 1;
} else {
// CASE: left-right-right insertion
node.left.balance = -1;
node.left.right.balance = 0;
node.balance = 0;
}
// Perform rotation
node.left = rotateLeft(node.left);
// Right rotation is done for both cases
} else {
// CASE: left-left imbalance
// Adjust balances
node.left.balance = 0;
node.balance = 0;
// Right rotation is done for both trees
}
// Check to see if this operation balanced the tree
if (node.left.balance == 0) {
// No further rebalancing is necessary.
// The addition resulted in a more balanced tree. Since
// balancing operations always result in a tree that is
// equal to or less than the height of the original,
// there is no need to rebalance further.
add_height_increase = false;
}
return rotateRight(node);
}
}
} else {
// Term is in the right subtree
// Add term to right subtree
node.right = add(term, node.right);
if (add_height_increase) {
// The right subtree has increased in height
node.balance += 1;
if (node.balance >= 2) {
// Rebalance
if (node.right.balance <= -1) {
// CASE: right-left imbalance
// Adjust balances
if (node.right.left.balance >= 1) {
// CASE: right-left-right insertion
node.right.balance = 0;
node.right.left.balance = 0;
node.balance = -1;
} else {
// CASE: right-left-left insertion
node.right.balance = 1;
node.right.left.balance = 0;
node.balance = 0;
}
// Perform rotation
node.right = rotateRight(node.right);
// Left rotation is done for both cases
} else {
// CASE: right-right imbalance
// Adjust balances
node.right.balance = 0;
node.balance = 0;
// Left rotation is done for both trees
}
// Check to see if this operation balanced the tree
if (node.right.balance == 0) {
// No further rebalancing is necessary.
// The addition resulted in a more balanced tree. Since
// balancing operations always result in a tree that is
// equal to or less than the height of the original,
// there is no need to rebalance further.
add_height_increase = false;
}
return rotateLeft(node);
}
}
}
// No changes
return node;
}
/**
* Finds the given term in the tree.
*
* @param term The term to search for
* @return True if term is defined. Otherwise, returns false.
*/
public boolean find(String term) {
if (root == null) {
// Empty tree
return false;
}
// Call recursive helper
return findNode(term, root);
}
/**
* Finds the given term in the tree.
*
* @param term The term to search for
* @param node The current node we are checking
* @return True if node exists, otherwise false.
*/
private boolean findNode(String term, Node node) {
// Recursive case: Check the appropriate subtree
if (term.compareTo(node.term) < 0) {
// Term is in the left subtree
if (node.left == null) {
// Left subtree is null
return false;
} else if (node.left.term.equals(term)) {
// Left node contains term
return true;
} else {
// Left subtree is non-null, recursively find
return findNode(term, node.left);
}
} else {
// Term is in the right subtree
if (node.right == null) {
// Right subtree is null
return false;
} else if (node.right.term.equals(term)) {
// Right node contains term
return true;
} else {
// Right subtree is non-null, recursively find
return findNode(term, node.right);
}
}
}
/**
* @return Number of elements in the tree.
*/
public int size() {
return size;
}
public String toString() {
if (root == null) {
// Base case: empty tree
return "[]";
}
return root.toString();
}
// Private helper methods
/**
* Rotates a given root node of a tree right.
*
* @param node Root node of the tree to rotate.
* @return The root of the altered tree.
*/
private Node rotateRight(Node node) {
Node temp = node.left;
node.left = temp.right;
temp.right = node;
return temp;
}
/**
* Rotates a given root node of a tree left;
*
* @param node Root node of the tree to rotate.
* @return The root of the altered tree.
*/
private Node rotateLeft(Node node) {
Node temp = node.right;
node.right = temp.left;
temp.left = node;
return temp;
}
}
Kyle Kingsbury
First, please only submit one copy - not a folder in a folder. I do not know which one to grade, so I picked one and hoped it was the better version. If it was an accident, let me know somehow.
There is no need to overkill an assignment - in this one, you created a Dictionary class, a balanced BST, multiple methods for finding words, an Index... It all becomes very complex to handle, and especially complex for another person to understand. Sometimes, you just have to make a choice in terms of implementation - it'll save you coding time, and others the time of understanding what you did. Also, simpler is often times better. For example, instead of creating a whole new Dictionary class, you could have provided some way to look up parts of a String in your BST class. That way, another person only has to look at one new method, rather than a whole class.
Find a good mix between optimization, style, and simplicity. Somewhere in there is code that is efficient, while at the same time easy to understand.
On top of all that, since you made Dictionary private, I have to use your main to run your optimized version. What if I wanted to port my Boggle solver to another program? -4
Doing everything you did takes skill. A balanced tree is difficult to make. Prefix checking is implemented in Dictionary. +5
In the same vein, there's no need to comment every single line of code. Comment enough that someone can understand what blocks of code can do. If a line of code is simple, no need to comment (and clutter up the page). Ex: line 92 of Boggle.java doesn't need to be commented, since (via the method name) it's obvious what is being done.
In the directions, it says your program should print out all words less than 9 letters - which includes one or two letter words. In other words, MIN_WORD_LENGTH should be 1. -1
---
Final grade: 50/50
Kyle Kingsbury
PS5
Inserting a sorted sequence into a binary search tree without any balancing
operations ensures that the tree will be linear, i.e. each node has only one
child. Hence, operations are O(n) instead of O(log n). This is because for
every term a[n] in the input sequence, a[n+1] > a[n]. Hence, a[n] > a[x] for
all x < n. Hence, an insert operation will *always* insert to the right of
every existing node, and produce a linear tree.
My BinarySearchTree implements a balanced AVL tree, without keeping track of
the heights of the respective subtrees. It does this through some clever case
analysis. The difference in speed as compared to tracking the heights is
negligible, and this implementation saves memory.
INITIAL APPROACH:
Searching directly for words of all lengths is a process that takes, on
average, the same time to locate all terms. Hence, the time between locating
terms is relatively high. Because we are attempting to locate the maximum
number of terms within the given time limit, my implementation performs an
exhaustive search for all terms less than a constant length (6 characters).
This search can be performed in only a few seconds. Because smaller words are
far more likely to occur in the board than longer ones, this search also
returns a large proportion of the potential matches.
The remaining time is given to searching for terms of increasing length. While
this implementation repeats recursive calls, it is actually much faster than
the "more efficient" general case search. Without optimizing for shorter
terms, a 1-minute search on test boards returns approximately 18% of the words
for those boards. With the search biased for shorter terms, it locates
approximately 98% of the terms within one minute.
The initial maximum word length can be adjusted depending on the speed of the
computer to yield optimum results. 6 characters is a conservative estimate,
and 7 performs marginally better on my (fast) computer.
REFINED APPROACH:
At every recursive search call, we can assert that the current string of
characters is not a valid word candidate, and hence any further searching is
useless. To do this, we build an array of word prefixes of length 1, 2, 3,
etc, and fold it into the class Dictionary which also contains the complete
dictionary tree. This means that, at worse case, (8-1)*n extra recursive calls
are made for a string of length n, instead of roughly 8^n calls. To limit the
time spent building the index, a minimum length is established for a prefix.
This approach solves the entire search tree, with no word length limitations,
in about .273 seconds. The search function is O(1) with respect to the word
length limit where n is greater than the length of the longest word in the
dictionary, as opposed to an exponential function. This method results in
additional preprocessing time and higher memory utilization due to the extra
prefix BSTs, but much faster search execution.
---
Note that the original algorithm was limited to words of 9 characters or less.
The optimized algorithm was limited only by the dimensions of the board.
Time to search boards (seconds):
Parse Search Total
Original: 1.864 172.653 174.517
Optimized: 2.849 0.265 3.114
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment