Skip to content

Instantly share code, notes, and snippets.

@qb20nh
Created February 4, 2022 17:14
Show Gist options
  • Save qb20nh/06ad21054205b62c01d2887ccf3377f3 to your computer and use it in GitHub Desktop.
Save qb20nh/06ad21054205b62c01d2887ccf3377f3 to your computer and use it in GitHub Desktop.
from time import sleep, time
import os
def binary_search(condition, low, high):
if callable(condition) and high > low:
mid = int((high + low) / 2)
res = condition(mid)
if res == 0:
return mid
elif res < 0:
return binary_search(condition, low, mid - 1)
else:
return binary_search(condition, mid + 1, high)
def find_json_property(idx):
print(f'seeking to {idx}')
file.seek(idx)
read_success = False
preview_bytes = 1024
read_content = None
offset = 0
while not read_success:
file.seek(idx+offset)
try:
read_content = file.read(preview_bytes).decode("utf-8")
read_success = True
except Exception:
offset += 1
print(f'content: {read_content}')
target = 2500000000
print('querying DB for property songID') # query db here
sleep(1) # assume every query takes about 1 second
if abs(idx - target) <= preview_bytes: # change this to json property match result
return 0
elif idx > target:
return -1
else:
return 1
file_path = "C:/json/song_data_file.json" # replace this with your file location
start_time = time()
size_in_bytes = os.path.getsize(file_path)
print(f'file size: {size_in_bytes}')
file = open(file_path, 'rb')
result = binary_search(find_json_property, 0, size_in_bytes)
end_time = time()
print(f'found at {result}')
print(f'elapsed {end_time-start_time}')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment