Skip to content

Instantly share code, notes, and snippets.

@matchaxnb
Created December 19, 2023 22:28
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save matchaxnb/3c2a7b5b1828d1ba1a43811a87babe20 to your computer and use it in GitHub Desktop.
Save matchaxnb/3c2a7b5b1828d1ba1a43811a87babe20 to your computer and use it in GitHub Desktop.
binary search stuff, for fun
"""we play with large sorted integer arrays"""
import timeit
from typing import Union, Tuple
import random
ar_a = list(range(0, 10))
ar_b = list(range(-5, 5, 1))
ar_c = list(range(-100, 100))
ar_d = list(range(90, 101))
ar_solution = [0, 1, 2, 3, 4]
def intersect_trivial(a: list, b: list):
"""use the python API"""
return list(set(a).intersection(set(b)))
def intersect(a: list, b: list):
"""intersection alg for sorted int arrays
for each element of a:
compare it to each element of slice[start:] of b:
if it matches
start <- index in b
start over with next element
"""
met_bound = 0
res = []
for el in a:
start = 0
for i, el2 in enumerate(b[start:]):
if el2 == el:
start = i
res.append(el)
break
elif el2 > el:
start = i
break
return res
reports_depth = []
def lookup(needle: int, haystack: Union[range, list], ex=0):
"""binary partition for lookup, very naive algorithm that will copy slices around and thus be memory-intensive"""
lh = len(haystack)
mid = haystack[int(lh / 2)]
if lh == 1:
reports_depth.append(ex)
return haystack[0] == needle
elif mid > needle:
return lookup(needle, haystack[0:int(lh / 2)], ex + 1)
elif mid < needle:
return lookup(needle, haystack[int(lh / 2):], ex + 1)
else:
# found
reports_depth.append(ex)
return True
def lookup_opti(needle: int, haystack: Union[range, list], start=0, end=None, ex=0, path=None):
"""binary partition that does not copy the haystack but rather just passes around indices, which should be a bit nicer on memory"""
if end is None:
end = len(haystack)
lh = end - start
mid = haystack[int(lh / 2) + start]
if lh == 1:
reports_depth.append(ex)
return haystack[0] == needle
elif mid > needle:
# take first half - same start but different end
return lookup_opti(needle, haystack, start, start + int(lh / 2), ex + 1)
elif mid < needle:
# take second half - same end but different start
return lookup_opti(needle, haystack, start + int(lh / 2), end, ex + 1)
else:
# found
reports_depth.append(ex)
return True
stk = range(0, 20_000_000, 2) # using range allows to have a shallow symbolic structure that does not take up memory and doesn't need to be passed by value (python slices are slice-by-value)
lstk = list(stk)
print(len(stk))
def lookup_giga_opti(needle: int, haystack: Union[list, range], pre_check_size=1000) -> bool:
"""use a gross lookup of where a value could be before with specified granularity before doing a native test of existence in a dumb, linear way"""
start_bound, end_bound = gross_lookup_opti(needle, haystack, pre_check_size)
to_chk = haystack[start_bound:end_bound]
# return needle in to_chk # let's not cheat lol
for elem in to_chk:
if needle == elem:
return True
elif elem > needle:
return False
return False
def gross_lookup_opti(needle: int, haystack: Union[list, range], set_size=1000) -> Tuple[int, int]:
"""do a gross lookup of where an element could be in a sorted haystack and return its best guess, with a given size parameter"""
s_size = len(haystack)
start_point = 0
mid_point = start_point + int(s_size / 2)
while s_size > set_size:
midval = haystack[mid_point]
# update the set size
s_size = mid_point - start_point
if midval >= needle:
# in case the middle value is more than our value we know
# our value may only be on the left side of the set
# so let's set mid_point to the middle of the left side
# and check again
mid_point = start_point + int(s_size / 2)
continue
elif midval < needle:
# then we know the needle may only be on the right side of the
# set. we shift the start point to the mid point and offset the
# mid point to the center of the remainder
start_point = mid_point
mid_point = start_point + int(s_size / 2)
continue
return (start_point, mid_point)
def do_test():
"""this proves that my code outperforms naive 'in'
my optimized code is faster on values, and has consistent time for matches and non-matches because it will do just anything between 20 and 24 executions for any result (bounded by n so that 2**(n-1) < len(haystack) and 2**n > len(haystack))
"""
print("my code")
print(timeit.timeit(lambda: lookup(50051, stk), number=10000))
print(timeit.timeit(lambda: lookup(50000, stk), number=10000))
print(timeit.timeit(lambda: lookup(500000, stk), number=10000))
print(timeit.timeit(lambda: lookup(999999900000, stk), number=10000))
print("my opti code")
print(timeit.timeit(lambda: lookup_opti(50051, stk), number=10000))
print(timeit.timeit(lambda: lookup_opti(50000, stk), number=10000))
print(timeit.timeit(lambda: lookup_opti(500000, stk), number=10000))
print(timeit.timeit(lambda: lookup_opti(999999900000, stk), number=10000))
print("my opti code on values")
print(timeit.timeit(lambda: lookup_opti(50051, lstk), number=1000))
print(timeit.timeit(lambda: lookup_opti(50000, lstk), number=1000))
print(timeit.timeit(lambda: lookup_opti(500000, lstk), number=1000))
print(timeit.timeit(lambda: lookup_opti(999999900000, lstk), number=1000))
print("native on range")
print(timeit.timeit(lambda: 50051 in stk, number=10000))
print(timeit.timeit(lambda: 50000 in stk, number=10000))
print(timeit.timeit(lambda: 500000 in stk, number=10000))
print(timeit.timeit(lambda: 999999900000 in stk, number=10000))
print("native on values")
print(timeit.timeit(lambda: 50051 in lstk, number=1000))
print(timeit.timeit(lambda: 50000 in lstk, number=1000))
print(timeit.timeit(lambda: 500000 in lstk, number=1000))
print("just doing that dastard non-match 10 times")
print(timeit.timeit(lambda: 999999900000 in lstk, number=10))
print(min(reports_depth), max(reports_depth))
print(do_test.__doc__)
def ri(a, b):
return random.randint(a, b)
def do_smaller_test():
num_to_test = ri(100, 9_999_999)
print(f"testing number {num_to_test} and random neighbors...")
print("basic binpart lookup")
print(timeit.timeit(lambda: lookup(num_to_test + ri(-100, 100), stk), number=10000))
print("mere binpart opti")
print(timeit.timeit(lambda: lookup_opti(num_to_test + ri(-100, 100), stk), number=10000))
print("giga binpart opti with various linear lookup sizes")
for i in (1, 2, 3, 6, 12, 25, 50, 100, 200, 400, 800, 1600):
print(f"\t{i}")
print(timeit.timeit(lambda: lookup_giga_opti(num_to_test + ri(-100, 100), stk, i), number=10000))
#do_test()
do_smaller_test()

execution example

testing number 4192140 and random neighbors...
basic binpart lookup
0.24330062000080943
mere binpart opti
0.16545362501346972
giga binpart opti with various linear lookup sizes
    1
0.10087132500484586
    2
0.09616238399758004
    3
0.10445303999586031
    6
0.15915612000389956
    12
0.15437415400811005
    25
0.10395679700013716
    50
0.09354111501306761    
    100
0.0990587440028321    
    200
0.12257004099956248    
    400
0.16662622400326654
    800
0.1676528810057789
    1600
0.16182698900229298
(finished in 3.73s)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment