Skip to content

Instantly share code, notes, and snippets.

@seanstickle
Created December 31, 2013 21:32
Show Gist options
  • Save seanstickle/8202428 to your computer and use it in GitHub Desktop.
Save seanstickle/8202428 to your computer and use it in GitHub Desktop.
#!/usr/bin/env bash
# -------------------------------------------------------------------
#
# DIRECTORY DESCENDING WORD LADDER
#
# -------------------------------------------------------------------
#
# (c) 2012 Sean Stickle, sean.stickle@gmail.com
#
# This script takes two words, FROM and TO, and makes a connecting
# series of words which vary, at each step, by only one letter.
#
# We do this in the following way:
#
# 1. Create a directory named after the FROM word, and change
# into that directory.
# 2. Grep the wordlist for all words that are Hamming distance
# of 1 from the current directory's name.
# 3. Create subdirectories named after each of those words.
# 4. Check to see if there are any directory paths that terminate
# with the TO word.
# 5. If not, repeat step 2 through 4 until a directory path
# that terminates in the TO word is found.
# 6. Print out the path.
# 7. Terminate.
#
# If there are no matches, the script exits with return code of 1.
#
# WARNING: This script abuses the file system to accomplish a
# fairly mundane data processing task, and may cause depletion of
# your inodes.
#
# -------------------------------------------------------------------
#
# EXAMPLE RUNS (on a 2009 MacBook Pro, SATA hard drive):
#
# dog -> dig Results: /dog/dig
# Time: 1 sec
#
# dog -> dad Results: /dog/dag/dad
# Time: 11 secs
#
# dog -> cat Results: /dog/cog/cag/cat
# Time: 6 mins, 3 secs
#
# lead -> gold Results: /lead/load/goad/gold
# Time: 1 min, 19 secs
#
# word -> test Results: /word/wort/tort/tost/test
# Time: 50 mins, 23 secs
#
# -------------------------------------------------------------------
# Set WORDLIST to the location of the Unix words file, or
# whatever similarly-formatted word file of your own.
WORDLIST=/usr/share/dict/words
# Where the script is running from
HOME=$(pwd)
# FROM is the source word for the ladder. TO is the target
# word for the ladder. They must be of the same length.
FROM=$1
TO=$2
# Check that we have both a FROM and a TO word
if [ -z $FROM ] || [ -z $TO ]; then
echo "You need to enter both a FROM and a TO string."
exit
fi
# Word ladders are for strings of the same length.
if [ ! ${#FROM} == ${#TO} ]; then
echo "Length of FROM and TO strings must be the same."
exit
fi
# Check to see if a directory is already in the ALL_DIRS array.
# If it is, that means we've already tried processing it, which
# means that we haven't created any deeper directories to work
# on. This means that there are no further links in possible
# ladders, and we've reached the end of our search without
# finding a complete ladder.
dir_already_processed () {
local DIR=$1
shift
local ALL_DIRS=("$@")
for X in "${ALL_DIRS[@]}"; do
[[ $X == $DIR ]] && return 0
done
return 1
}
# In each directory, we keep a list of all the other words that
# have had directories created elsewhere on the same level or in
# a more root-ward path. Since some other branch is exploring
# the chain from those words, we don't want to create a duplicate
# branch, as it would be redundant. This list of words allows us
# to skip creating those redundant branches.
#
# We need to know the actual words that are being processed else-
# where, not the whole path, so we strip off everything but the
# basename.
make_usedwords () {
find $HOME/$FROM -type d | xargs -n1 basename > usedwords
}
# Take a word, find all variations in the wordlist of Hamming
# distance 1, and create directories for all (except for those
# words that have directories already created at a level closer
# to the root.
make_subdirs () {
DIRECTORY=$1
# Replace a letter of the word with a period ("."), which
# will be used as a regex match for "any character". Do this
# for all the letters, and we have a set of regexes that will
# match all Hamming distance 1 words in the wordlist.
I=0
REGEXES=""
while [ $I -lt ${#DIRECTORY} ]; do
CAR=${DIRECTORY:0:$I}
CDR=${DIRECTORY:$I+1}
REGEXES="$REGEXES"" "$CAR.$CDR
let I=I+1
done
# Match the regexes against the wordlist.
for REGEX in $REGEXES; do
grep "^$REGEX$" $WORDLIST >> matches
done
# Create usedword files in this directory, so that we don't
# create any subdirectories with words already being worked
# on in some other directory branch.
make_usedwords
# Take all the matches, subtract the words that have been
# used at a higher directory level, and create subdirectories
# for each word.
grep -v -f usedwords matches | xargs -n1 mkdir -p
}
# Check to see if one or more directories exists with the
# target word.
check_for_success () {
if [ $(find $HOME/$FROM -type d -name $TO | wc -l) -eq 0 ]; then
SUCCESS=1
else
SUCCESS=0
fi
}
# Each time we run, we want to only process the deepest directories.
# Some branches may terminate earlier than others, if they exhaust
# possible variations. We only want to work on the currently active
# potential word ladders.
get_deepest_directories() {
# Get the length of the longest paths. Since all word variations
# are of equal length, path length is directly proportional to
# directory depth.
DIRLENGTH=$(find . -type d | awk '{if (length > x) {x = length}} END {print x}')
# Get all the directories that have the same longest path length.
find . -type d | awk -v dirlength=$DIRLENGTH '{if (length == dirlength) {print $0}}'
}
# Start the search. Create our root word, and basic setup.
mkdir -p $FROM
cd $FROM
make_usedwords
cd ..
# Initialize our success flag (initially set to false), and the
# array that will track all processed directories.
SUCCESS=1
ALL_DIRS=()
# Climb the ladder.
while [ $SUCCESS -ne 0 ]; do
for DIR in $(get_deepest_directories); do
# If we've already processed this directory, it means
# we've made no deeper directories, which means we've
# exhausted possible options, and we're done.
if dir_already_processed "$DIR" "${ALL_DIRS[@]}"; then
echo "No path from $FROM to $TO."
exit 1
# Otherwise, keep on processing.
else
cd $DIR
make_subdirs $(basename $DIR)
cd $HOME
ALL_DIRS+=($DIR)
fi
done
check_for_success
done
# Print out all matching paths, and exit with success.
find $HOME/$FROM -type d -name $TO | sed s#$HOME##
exit 0
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment