Created
August 12, 2012 04:45
-
-
Save jtolio/3329792 to your computer and use it in GitHub Desktop.
xor re-sort from a numerically sorted list
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import time | |
import bisect | |
import random | |
class ArrayWrapper(object): | |
"""This class allows access to an array via binary search, while counting | |
the number of binary search calls. | |
""" | |
def __init__(self, array): | |
last = 0 | |
max_val = 0 | |
for x in array: | |
assert last <= x | |
last = x | |
if x > max_val: max_val = x | |
self.values = array | |
self.max_bits = max_val.bit_length() | |
self.finds = 0 | |
def find(self, key): | |
"""Returns the first key found in the array rval such that key <= rval | |
or None if no such key exists. | |
""" | |
self.finds += 1 | |
index = bisect.bisect_left(self.values, key) | |
if index == len(self.values): return None | |
return self.values[index] | |
def xorsort(array, target, limit=float('inf')): | |
"""Given a sorted array of integers, a target integer, and a limit, returns | |
up to limit integers sorted by xor-distance from target. | |
""" | |
max_bits = max(array.max_bits, target.bit_length()) | |
results = [] | |
def helper(start, end, bit_mask, val): | |
"""this helper function considers possible values that have xor | |
distance from the target between [start, end), where start and end | |
are xor distances. bit_mask tracks the bits we care about so far, and | |
val is passed if we already know a value exists in this range. | |
""" | |
if len(results) >= limit: | |
# we have enough results, let's quit | |
return | |
if val is None: | |
# we weren't given a seed value for this range, so see if a value | |
# exists | |
val = array.find((target ^ start) & bit_mask) | |
if val is None or val ^ target < start or end <= val ^ target: | |
# no value exists in this range. give up on this range | |
return | |
if end - start <= 1: | |
# this range can't contain more than just this value. we're done | |
results.append(val) | |
return | |
# make a new bit_mask that cares about one more bit, and split the | |
# range | |
new_bit_mask = (bit_mask >> 1) | (2 ** (max_bits - 1)) | |
midpoint = start + (end - start) / 2 | |
if val ^ target < midpoint: | |
# the val we looked up is on the left of the midpoint | |
helper(start, midpoint, new_bit_mask, val) | |
helper(midpoint, end, new_bit_mask, None) | |
else: | |
# it's on the right. | |
helper(start, midpoint, new_bit_mask, None) | |
helper(midpoint, end, new_bit_mask, val) | |
# kick it off with the full range of values | |
helper(0, 2 ** max_bits, 0, None) | |
return results | |
def main(): | |
"""some testing""" | |
limit = 5 | |
xorsort_timing = 0 | |
standard_sort_timing = 0 | |
for _ in xrange(10): | |
array = ArrayWrapper(list(sorted(set(random.randrange(1, 2**256) | |
for _ in xrange(100000))))) | |
target = random.randrange(1, 2**256) | |
xorsort_start = time.time() | |
xorsort_rv = xorsort(array, target, limit) | |
xorsort_time = time.time() - xorsort_start | |
xorsort_timing += xorsort_time | |
print "required %d binary searches for %d values in %f seconds" % ( | |
array.finds, len(array.values), xorsort_time) | |
standard_sort_start = time.time() | |
standard_sort_rv = [val for _dist, val in sorted((val ^ target, val) | |
for val in array.values)] | |
standard_sort_timing += time.time() - standard_sort_start | |
assert xorsort_rv == standard_sort_rv[:limit] | |
print "total xorsort time:\t\t", xorsort_timing | |
print "total standard sort time:\t", standard_sort_timing | |
if __name__ == "__main__": main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment