Skip to content

Instantly share code, notes, and snippets.

@IronSavior
Last active August 29, 2015 14:08
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save IronSavior/ba9d4738531aefde82d2 to your computer and use it in GitHub Desktop.
Save IronSavior/ba9d4738531aefde82d2 to your computer and use it in GitHub Desktop.
A collection class with built-in lookup tables.
# Author: Erik Elmore <erik@erikelmore.com>
# License: Public Domain
# A lazy-loaded collection with built-in lookup indexing.
# Good for using lots of memory.
class IndexedCollection
include Enumerable
DEFAULT_DATA = Array[].freeze
DEFAULT_LOADER = ->(){ DEFAULT_DATA }.freeze
# Create a new collection with no index.
# @parm Proc Loader--A proc that will retrieve the collection contents. Default loader creates an empty Array.
def initialize( &blk )
@loader = block_given?? blk : DEFAULT_LOADER
@indexers = Hash[]
@indexed = Hash[]
@data = Array[]
@loaded = false
end
# Replace the loader proc and clear data/index
def loader( &blk )
raise ArgumentError, 'Loader proc required but not given' unless block_given?
@loader = blk
clear
self
end
# Unload data and index
def clear
@loaded = false
@data.clear
@indexed.clear
self
end
# Clear data/index and execute loader proc
def reload
clear
@data.replace @loader.call.to_a
@loaded = true
@data
end
# Direct access to the collection, executing the loader proc if necessary.
def data
reload unless @loaded
@data
end
# Size of the collection. This can cause the loader to execute.
def size
data.size
end
# The customary enumerator method
def each
return data.to_enum unless block_given?
data.each{|i| yield i }
end
# Add a new indexer
def index_by( name, &blk )
raise ArgumentError, 'Must provide indexer proc.' unless block_given?
@indexers[name] = blk
@indexed.delete name
self
end
# Add simple attribute indexers
def index( *attrs )
attrs.each{|attr| index_by attr, &attr }
self
end
# True if self can index by the given name
def indexed_by?( name )
@indexers.key? name
end
# Build any outstanding indices.
def build_index
@indexers.reject{|name, *| @indexed.key? name }.each{ |name, indexer|
@indexed[name] = data.group_by &indexer
}
self
end
# Search an index, building it first, if necessary.
# If the index doesn't exist and a block is given, the index is created with the block as the indexer.
# @return Array objects in collection that match the given value
def find_by( name, value, &blk )
index_by name, &blk if !indexed_by?(name) && block_given?
raise ArgumentError, 'No block given and no index with name %s.' % name unless indexed_by? name
build_index if @indexed[name].nil?
@indexed[name][value] || []
end
# List of indexes servable by this collection
def indices
@indexers.keys
end
alias_method :indexes, :indices
# List of indexes that are already initialized
def complete_indices
@indexed.keys
end
alias_method :complete_indexes, :complete_indices
# List of indexes that are yet to be initialized
def incomplete_indices
indices - complete_indices
end
alias_method :incomplete_indexes, :incomplete_indices
# IRB doesn't need to tell us everything
def inspect
'#<%s indices=%s%s @loaded=%s @data.size=%d>' % Array[
self.class,
indices,
incomplete_indices.empty?? nil : ' (incomplete: %s)' % [incomplete_indices],
@loaded,
@data.size
]
end
end
if __FILE__ == $0
class ClassWithACache
attr_reader :a1
def initialize( v )
@a1 = v
end
class << self
def expensive_fetch
puts 'Expensive fetch operation!'
[*1..10_000_000].map{|v| new v }
end
def cache
@cache ||= IndexedCollection.new{ expensive_fetch }.index(:a1).index_by(:even){|i| i.a1.even? }
end
def even
cache.find_by :even, true
end
def odd
cache.find_by :even, false
end
def find_by_a1( v )
cache.find_by(:a1, v).first
end
end
end
require 'benchmark'
def collection
c = IndexedCollection.new(&->(){ [*1..10_000_000] })
c.index_by(:even, &:even?)
c.index_by(:fives){|i| i % 5 }
c.index_by(:threes){|i| i % 3 }
c.index_by(:hundreds){|i| i % 100 }
c
end
def bm
Benchmark.bm do |bm|
bm.report{
c = collection
c.find_by(:even, 0).size
c.find_by(:fives, 0).size
c.find_by(:threes, 0).size
c.find_by(:hundreds, 0).size
c.find_by(:thousands, 0){|i| i % 1000 }.size
}
bm.report{
c = collection
c.find_by(:even, 0).size
c.find_by(:fives, 0).size
c.find_by(:threes, 0).size
c.find_by(:hundreds, 0).size
c.find_by(:thousands, 0){|i| i % 1000 }.size
}
bm.report{
c = collection
c.find_by(:even, 0).size
c.find_by(:fives, 0).size
c.find_by(:threes, 0).size
c.find_by(:hundreds, 0).size
c.find_by(:thousands, 0){|i| i % 1000 }.size
}
bm.report{
c = collection
c.find_by(:even, 0).size
c.find_by(:fives, 0).size
c.find_by(:threes, 0).size
c.find_by(:hundreds, 0).size
c.find_by(:thousands, 0){|i| i % 1000 }.size
}
bm.report{
c = collection
c.find_by(:even, 0).size
c.find_by(:fives, 0).size
c.find_by(:threes, 0).size
c.find_by(:hundreds, 0).size
c.find_by(:thousands, 0){|i| i % 1000 }.size
}
end
end
bm
end # if __FILE__ == $0
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment