Skip to content

Instantly share code, notes, and snippets.

@n1chre
Last active June 28, 2018 16:51
Show Gist options
  • Save n1chre/e5ab31523463b11c0e9c93d4dc29ee27 to your computer and use it in GitHub Desktop.
Save n1chre/e5ab31523463b11c0e9c93d4dc29ee27 to your computer and use it in GitHub Desktop.
Binary search a line in a file having all lines formatted the same way
from __future__ import print_function
from time import time
class BSFile(object):
def __init__(self, filename, key_func):
self.filename = filename
self.key_func = key_func
def find(self, key):
'''
Get file iterator from first line for which key(line) >= <given_key>
Catch: it will never start from first line
'''
f = open(self.filename, 'r')
f.seek(0, 2) # seek to end
lo, mid, hi = 0, 0, f.tell()-1
f.seek(0) # return back to start
while lo < hi:
mid = (lo+hi) // 2
f.seek(mid)
if mid != 0:
if not f.readline(): # just to be sure not to consume half line
return f # EOF
mid = f.tell()
if mid >= hi:
lo = mid
else:
line = f.readline()
if not line:
return f # EOF
if key > self.key_func(line):
lo = mid
else:
hi = mid - 1
f.seek(mid)
return f
if __name__ == '__main__':
bsf = BSFile('test.txt', lambda line: int(line.split()[0]))
for i in range(20):
print('*** KEY: %02d ***' % i)
with bsf.find(i) as reader:
for line in reader:
print(line.strip())
print()
*** KEY: 00 ***
1 2
2 3
3 4
5 5
5 6
6 7
9 8
14 123123
14 12324
14 1324
14 34324234
17 asdfsad
*** KEY: 01 ***
1 2
2 3
3 4
5 5
5 6
6 7
9 8
14 123123
14 12324
14 1324
14 34324234
17 asdfsad
*** KEY: 02 ***
2 3
3 4
5 5
5 6
6 7
9 8
14 123123
14 12324
14 1324
14 34324234
17 asdfsad
*** KEY: 03 ***
3 4
5 5
5 6
6 7
9 8
14 123123
14 12324
14 1324
14 34324234
17 asdfsad
*** KEY: 04 ***
5 5
5 6
6 7
9 8
14 123123
14 12324
14 1324
14 34324234
17 asdfsad
*** KEY: 05 ***
5 5
5 6
6 7
9 8
14 123123
14 12324
14 1324
14 34324234
17 asdfsad
*** KEY: 06 ***
6 7
9 8
14 123123
14 12324
14 1324
14 34324234
17 asdfsad
*** KEY: 07 ***
14 12324
14 1324
14 34324234
17 asdfsad
*** KEY: 08 ***
14 12324
14 1324
14 34324234
17 asdfsad
*** KEY: 09 ***
14 12324
14 1324
14 34324234
17 asdfsad
*** KEY: 10 ***
14 12324
14 1324
14 34324234
17 asdfsad
*** KEY: 11 ***
14 12324
14 1324
14 34324234
17 asdfsad
*** KEY: 12 ***
14 12324
14 1324
14 34324234
17 asdfsad
*** KEY: 13 ***
14 12324
14 1324
14 34324234
17 asdfsad
*** KEY: 14 ***
14 12324
14 1324
14 34324234
17 asdfsad
*** KEY: 15 ***
17 asdfsad
*** KEY: 16 ***
17 asdfsad
*** KEY: 17 ***
17 asdfsad
*** KEY: 18 ***
*** KEY: 19 ***
1 1
1 2
2 3
3 4
5 5
5 6
6 7
9 8
14 123123
14 12324
14 1324
14 34324234
17 asdfsad
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment