Skip to content

Instantly share code, notes, and snippets.

@DM-
Forked from darkf/build_index.py
Created April 24, 2012 03:02
Show Gist options
  • Select an option

  • Save DM-/2475861 to your computer and use it in GitHub Desktop.

Select an option

Save DM-/2475861 to your computer and use it in GitHub Desktop.
Very basic text indexer/searcher in Python
import sys, os, glob, re, pickle
if len(sys.argv) != 2:
print "USAGE: %s DIR" % sys.argv[0]
sys.exit(1)
DIR = sys.argv[1]
INDEX = {}
FILES = []
for path,_,dirs in os.walk(DIR):
FILES.extend([os.path.join(path, x) for x in dirs])
def tokenizeLine(s):
return [x.lower() for x in re.split(r"[\s'\".,!?;:]", s) if x != ""]
for file in FILES:
with open(file, "r") as f:
for line in f:
for lineno,word in enumerate(tokenizeLine(line)):
if not word in INDEX:
INDEX[word] = []
INDEX[word].append((file, lineno+1))
with open("index.p", "wb") as f:
pickle.dump(INDEX, f)
import sys, pickle, os
if len(sys.argv) != 2:
print "USAGE: %s WORD" % sys.argv[0]
sys.exit(1)
WORD = sys.argv[1].lower()
INDEX = None
def places(l):
s = ""
FILES = {}
for i,(file,lineno) in enumerate(l):
if not file in FILES:
FILES[file] = []
FILES[file].append(lineno)
for file in FILES:
s += file
s += ":\n"
lines = []
for line in sorted(FILES[file]):
if line in lines: continue
lines.append(line)
s += " "
s += "line "
s += str(line)
s += "\n"
return s
with open("index.p", "rb") as f:
INDEX = pickle.load(f)
if not INDEX[WORD]:
print "Word '%s' not found" % WORD
sys.exit(1)
else:
print "Word '%s' found in %s" % (WORD, places(INDEX[WORD]))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment