-
-
Save yto/184bf3806d05f26cbd38171ec25cc3fb to your computer and use it in GitHub Desktop.
ソート済みテキストを二分探索する look コマンドの pure Python 実装
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# see https://chalow.net/2023-10-16-1.html | |
import sys | |
class Searcher: | |
def __init__(self, filename, buffer_size=10000): | |
self.f = open(filename, 'rb') | |
self.f.seek(0,2) | |
self.length = self.f.tell() | |
self.buffer_size = buffer_size | |
def go_to_line_top(self, p): | |
self.f.seek(p) | |
self.f.readline() | |
line_last = self.f.tell() | |
self.f.seek(max(0, p - self.buffer_size)) | |
while True: | |
line = self.f.readline() | |
if not line or line_last == self.f.tell(): | |
break | |
self.f.seek(line_last - len(line)) | |
return line | |
def find(self, string): | |
low = 0 | |
high = self.length | |
while low < high: | |
mid = (low+high)//2 | |
line = self.go_to_line_top(mid) | |
if line < string: | |
low = mid + 1 | |
else: | |
high = mid | |
self.go_to_line_top(low) | |
result = [] | |
while True: | |
line = self.f.readline() | |
if not line or not line.startswith(string): | |
break | |
result.append(line.rstrip(b'\r\n')) | |
return result | |
def main(): | |
sortedFile = sys.argv[1] | |
lookup = Searcher(sortedFile) | |
for searchString in sys.stdin: | |
searchString = searchString.rstrip().encode('utf8') | |
for line in lookup.find(searchString): | |
print(line.decode()) | |
return 0 | |
if __name__ == '__main__': | |
exit(main()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment