Created
October 18, 2013 03:32
-
-
Save ssimeonov/7036107 to your computer and use it in GitHub Desktop.
Choice sequence lexicographic position finder designed to work with any choice set. Optimized to remove tail recursion and eliminate intermediate data structures by using a continually mutating state set. Benchmark shows an operation taking 40 microseconds. The tail recursion version that builds intermediate data structures is 5x slower at 200+ …
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class ChoiceSequence | |
MAX_CHOICES = 26 | |
MAX_SEQUENCE_LENGTH = 25 | |
FACTORIAL = (1..MAX_SEQUENCE_LENGTH).reduce([1]) do |memo, n| | |
memo << memo.last * n | |
end.freeze | |
def self.position(seq) | |
length = seq.length | |
max_permutations = FACTORIAL[length] | |
count_less_than = Array.new(MAX_CHOICES, 0) | |
running_count_less_than = 0 | |
count = Array.new(MAX_CHOICES, 0) | |
seq.each { |elem| count[elem] += 1 } | |
count.each_with_index do |elem_count, i| | |
max_permutations = max_permutations / FACTORIAL[elem_count] if elem_count > 0 | |
count_less_than[i] = running_count_less_than | |
running_count_less_than += elem_count | |
end | |
pos = 0 | |
seq.each do |elem| | |
if length == 1 | |
pos += 1 | |
else | |
pos += max_permutations / length * count_less_than[elem] | |
max_permutations = max_permutations * count[elem] / length | |
length -= 1 | |
count[elem] -= 1 | |
((elem+1)...MAX_CHOICES).each { |x| count_less_than[x] -= 1 } | |
end | |
end | |
pos | |
end | |
end | |
class ChoiceSequenceTest | |
START_OFFSET = 'A'.ord | |
def self.test | |
puts pos('A') # 1 | |
puts pos('AA') # 1 | |
puts pos('AAAB') # 1 | |
puts pos('ABAB') # 2 | |
puts pos('BAAA') # 4 | |
puts pos('GOOGLE') # 88 | |
puts pos('QUESTION') # 24572 | |
puts pos('BOROVETZ') # 637 | |
puts pos('SIMEONOV') # 15544 | |
end | |
def self.pos(str) | |
ChoiceSequence.position(bytes(str.upcase)) | |
end | |
def self.bytes(str) | |
bytes = str.bytes | |
bytes.map! { |byte| byte - START_OFFSET } | |
bytes | |
end | |
end | |
class ChoiceSequenceBench | |
ALPHABET = ('A'..'Z').to_a | |
ALPHABET_LENGTH = ALPHABET.length | |
def self.bm(n = 1000) | |
Benchmark.bmbm do |bm| | |
strings = build_strings(n) | |
bm.report name do | |
strings.each do |str| | |
ChoiceSequenceTest.pos(str) | |
end | |
end | |
end | |
end | |
def self.build_strings(n) | |
Array.new(n).tap do |strings| | |
strings.each_index do |i| | |
strings[i] = build_string | |
end | |
end | |
end | |
def self.build_string | |
len = rand(ChoiceSequence::MAX_SEQUENCE_LENGTH - 1) + 2 | |
(0...len).map { ALPHABET[rand(ALPHABET_LENGTH)] }.join | |
end | |
end | |
ChoiceSequenceTest.test | |
ChoiceSequenceBench.bm(100000) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment