Created
June 4, 2015 21:27
-
-
Save gabriel/d3cc454c58cc902e086b to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# | |
# Usage: | |
# tree.rb -l <low-value> -h <hi-value> -f <file> | |
# Function: | |
# Read in the file, one value per line. Then output all words between | |
# [<low-value>,<hi-value>] inclusive to standard output, in sorted order. | |
# This is a test-of-concept for a server function that will preload a binary-tree | |
# and then serve queries in O(log(n) + m) time, where n is the number of | |
# items in the input file, and m is the number of items in the output set. | |
# Shortcut: | |
# It would be more correct to use a red-black tree, or a treap, or a splay-tree, | |
# to guarantee balance, but we're going to use a hack: shuffle the input file randomly | |
# and insert in that order. We'd need to be really unlucky to get degenerate behavior. | |
# | |
require 'optparse' | |
# | |
# Shuffle array randomly | |
# | |
def shuffle(array) | |
rands = array.collect { |w| [w, rand] } | |
rands.sort_by { |r| r[0] }.collect { |r| r[0] } | |
end | |
# | |
# A Node has three fields: a value, a left pointer (to another node) and | |
# a right pointer (to a right node). | |
# | |
class Node | |
attr_reader :value | |
def initialize(value) | |
@value = value | |
end | |
# | |
# Insert the value into this subtree. If less than our | |
# root's value, insert into the left subtree. Otherwise, insert into | |
# the right subtree. | |
# | |
def insert(value) | |
if value < @value | |
if @left.nil? | |
@left = Node.new(value) | |
else | |
@left.insert(value) | |
end | |
else | |
if @right.nil? | |
@right = Node.new(value) | |
else | |
@right.insert(value) | |
end | |
end | |
end | |
# | |
# Find all items between [low,hi] in this subtree. Push them onto | |
# output in sorted order. | |
# | |
def find_range(low, hi, output) | |
in_range_left = !(@value < low) | |
in_range_right = !(hi < @value) | |
if !@left.nil? and in_range_left | |
@left.find_range(low, hi, output) | |
end | |
if in_range_left and in_range_right | |
output << @value | |
end | |
if !@right.nil? and in_range_left | |
@right.find_range(low, hi, output) | |
end | |
end | |
end | |
# | |
# The tree class is what we build up out of nodes. We only need to set the | |
# root node here. | |
# | |
class Tree | |
# | |
# Build a new tree by reading in all of the file, line-by-line, | |
# shuffling the data randomly, and then adding the data item-by-item | |
# to a new tree. | |
# | |
def load(filename) | |
words = [] | |
File.open(filename, "r") do |f| | |
f.each_line do |word| | |
words << word.strip | |
end | |
end | |
# Shuffle the data randomly, so we don't wind up with a degenerate tree | |
# unless we got really unlucky! | |
shuffle(words) | |
# Pick an element to be the root; doesn't matter front or back | |
@root = Node.new(words.shift) unless @root | |
# Insert each item one-by-one | |
words.each do |word| | |
@root.insert(word) | |
end | |
end | |
# | |
# Output an array of all items that are between [low,hi], inclusive. Do so | |
# in sorted order. | |
# | |
def find_range(low, hi) | |
output = [] | |
@root.find_range(low, hi, output) | |
output | |
end | |
end | |
options = {} | |
opt_parser = OptionParser.new do |opts| | |
opts.banner = "Usage: tree.rb [options]" | |
opts.on("-l", "--low WORD", "Low word") do |low| | |
options[:low] = low | |
end | |
opts.on("-h", "--hi WORD", "High word") do |hi| | |
options[:hi] = hi | |
end | |
opts.on("-f", "--file FILE", "File") do |file| | |
options[:file] = file | |
end | |
opts.on_tail("-h", "--help", "Show this message") do | |
puts opts | |
exit | |
end | |
end | |
opt_parser.parse! | |
fail "Low missing" if options[:low].nil? | |
fail "Hi missing" if options[:hi].nil? | |
fail "File missing" if options[:file].nil? | |
tree = Tree.new | |
tree.load(options[:file]) | |
output = tree.find_range(options[:low], options[:hi]) | |
puts output | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment