Skip to content

Instantly share code, notes, and snippets.

@GaretJax
Created July 30, 2011 04:48
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save GaretJax/1115208 to your computer and use it in GitHub Desktop.
Save GaretJax/1115208 to your computer and use it in GitHub Desktop.
import sys
import re
def iterwords(fh):
for number, line in enumerate(fh):
for word in re.split(r'\s+', line.strip()):
# Preprocess the words here, for example to strip out punctuation
# (the following example is sloooow, compile this regex if you
# really want to use it):
#
# word = re.sub(r'[,.:]', '', word)
#
word = word.lower()
yield number, word
def search(fh, query):
query = re.split(r'\s+', query.strip().lower())
matches = []
words = iterwords(fh)
for line, word in words:
lines_count = 1
current_line = line
for keyword in query:
if keyword == word:
next_line, word = next(words)
if next_line > current_line:
lines_count += 1
current_line = next_line
else:
break
else:
matches.append((line, lines_count))
return tuple(matches)
if __name__ == '__main__':
query = sys.argv[1]
fh = open(sys.argv[2])
matches = search(fh, query)
# Form here on it's only presentation
fh.seek(0)
lines = enumerate(fh)
for lineno, linecount in matches:
number, line = next(lines)
while number < lineno:
number, line = next(lines)
result_lines = [line]
for i in range(linecount-1):
result_lines.append(next(lines)[1])
print "Match found on line {0} (spawning {1} lines):\n > {2}".format(
lineno, linecount, ' > '.join(result_lines).strip())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment