Skip to content

Instantly share code, notes, and snippets.

@jtolio
Created August 12, 2012 04:45
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jtolio/3329792 to your computer and use it in GitHub Desktop.
Save jtolio/3329792 to your computer and use it in GitHub Desktop.
xor re-sort from a numerically sorted list
#!/usr/bin/env python
import time
import bisect
import random
class ArrayWrapper(object):
"""This class allows access to an array via binary search, while counting
the number of binary search calls.
"""
def __init__(self, array):
last = 0
max_val = 0
for x in array:
assert last <= x
last = x
if x > max_val: max_val = x
self.values = array
self.max_bits = max_val.bit_length()
self.finds = 0
def find(self, key):
"""Returns the first key found in the array rval such that key <= rval
or None if no such key exists.
"""
self.finds += 1
index = bisect.bisect_left(self.values, key)
if index == len(self.values): return None
return self.values[index]
def xorsort(array, target, limit=float('inf')):
"""Given a sorted array of integers, a target integer, and a limit, returns
up to limit integers sorted by xor-distance from target.
"""
max_bits = max(array.max_bits, target.bit_length())
results = []
def helper(start, end, bit_mask, val):
"""this helper function considers possible values that have xor
distance from the target between [start, end), where start and end
are xor distances. bit_mask tracks the bits we care about so far, and
val is passed if we already know a value exists in this range.
"""
if len(results) >= limit:
# we have enough results, let's quit
return
if val is None:
# we weren't given a seed value for this range, so see if a value
# exists
val = array.find((target ^ start) & bit_mask)
if val is None or val ^ target < start or end <= val ^ target:
# no value exists in this range. give up on this range
return
if end - start <= 1:
# this range can't contain more than just this value. we're done
results.append(val)
return
# make a new bit_mask that cares about one more bit, and split the
# range
new_bit_mask = (bit_mask >> 1) | (2 ** (max_bits - 1))
midpoint = start + (end - start) / 2
if val ^ target < midpoint:
# the val we looked up is on the left of the midpoint
helper(start, midpoint, new_bit_mask, val)
helper(midpoint, end, new_bit_mask, None)
else:
# it's on the right.
helper(start, midpoint, new_bit_mask, None)
helper(midpoint, end, new_bit_mask, val)
# kick it off with the full range of values
helper(0, 2 ** max_bits, 0, None)
return results
def main():
"""some testing"""
limit = 5
xorsort_timing = 0
standard_sort_timing = 0
for _ in xrange(10):
array = ArrayWrapper(list(sorted(set(random.randrange(1, 2**256)
for _ in xrange(100000)))))
target = random.randrange(1, 2**256)
xorsort_start = time.time()
xorsort_rv = xorsort(array, target, limit)
xorsort_time = time.time() - xorsort_start
xorsort_timing += xorsort_time
print "required %d binary searches for %d values in %f seconds" % (
array.finds, len(array.values), xorsort_time)
standard_sort_start = time.time()
standard_sort_rv = [val for _dist, val in sorted((val ^ target, val)
for val in array.values)]
standard_sort_timing += time.time() - standard_sort_start
assert xorsort_rv == standard_sort_rv[:limit]
print "total xorsort time:\t\t", xorsort_timing
print "total standard sort time:\t", standard_sort_timing
if __name__ == "__main__": main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment