Last active
March 1, 2016 15:09
-
-
Save yuuki0xff/1e83bb4dc85a460e2cf8 to your computer and use it in GitHub Desktop.
lookコマンドのpythonによる実装
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import sys | |
class Searcher: | |
""" | |
こちらの実装を参考にしました | |
http://www.logarithmic.net/pfh/blog/01186620415 | |
""" | |
def __init__(self, filename): | |
self.f = open(filename, 'rb') | |
self.f.seek(0,2) | |
self.length = self.f.tell() | |
def find(self, string): | |
result = [ ] | |
low = 0 | |
high = self.length | |
lastPos = 0 | |
# 1行目をcheck | |
self.f.seek(0) | |
line = self.f.readline() | |
if not line.startswith(string): | |
# 2行目以降の初めて該当する行、またはその直前の行を二分探索する | |
while low < high: | |
mid = (low+high)//2 | |
self.f.seek(mid) | |
self.f.readline() # 途中の行は読み捨てる | |
line = self.f.readline() | |
if lastPos == self.f.tell(): | |
break | |
lastPos = self.f.tell() | |
if line < string: | |
low = max(0, self.f.tell() - 2) # 改行文字の手前まで移動 | |
else: | |
high = mid | |
# 一致する全ての行をresultに追加 | |
self.f.seek(max(0, lastPos - len(line))) | |
while True: | |
line = self.f.readline() | |
if not line: break | |
if not line.startswith(string) and line > string: break | |
if line < string: continue | |
result.append(line.rstrip(b'\r\n')) | |
return result | |
def main(): | |
searchString = sys.argv[1].encode('utf8') | |
sortedFile = '/usr/share/dict/words' | |
if len(sys.argv) == 3: | |
sortedFile = sys.argv[2] | |
lookup = Searcher(sortedFile) | |
for line in lookup.find(searchString): | |
sys.stdout.buffer.write(line) | |
sys.stdout.buffer.write(b'\n') | |
return 0 | |
if __name__ == '__main__': | |
exit(main()) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment