GaretJax/search.py

## search.py
import sys
import re


def iterwords(fh):
    for number, line in enumerate(fh):
        for word in re.split(r'\s+', line.strip()):
            # Preprocess the words here, for example to strip out punctuation
            # (the following example is sloooow, compile this regex if you
            # really want to use it):
            #
            #     word = re.sub(r'[,.:]', '', word)
            #
            word = word.lower()
            yield number, word


def search(fh, query):
    query = re.split(r'\s+', query.strip().lower())
    matches = []
    words = iterwords(fh)

    for line, word in words:
        lines_count = 1
        current_line = line
        for keyword in query:
            if keyword == word:
                next_line, word = next(words)
                if next_line > current_line:
                    lines_count += 1
                    current_line = next_line
            else:
                break
        else:
            matches.append((line, lines_count))

    return tuple(matches)


if __name__ == '__main__':
    query = sys.argv[1]
    fh = open(sys.argv[2])

    matches = search(fh, query)

    # Form here on it's only presentation
    fh.seek(0)
    lines = enumerate(fh)
    for lineno, linecount in matches:
        number, line = next(lines)
        while number < lineno:
            number, line = next(lines)

        result_lines = [line]

        for i in range(linecount-1):
            result_lines.append(next(lines)[1])

        print "Match found on line {0} (spawning {1} lines):\n > {2}".format(
            lineno, linecount, ' > '.join(result_lines).strip())
	import sys
	import re


	def iterwords(fh):
	for number, line in enumerate(fh):
	for word in re.split(r'\s+', line.strip()):
	# Preprocess the words here, for example to strip out punctuation
	# (the following example is sloooow, compile this regex if you
	# really want to use it):
	#
	# word = re.sub(r'[,.:]', '', word)
	#
	word = word.lower()
	yield number, word



	def search(fh, query):
	query = re.split(r'\s+', query.strip().lower())
	matches = []
	words = iterwords(fh)

	for line, word in words:
	lines_count = 1
	current_line = line
	for keyword in query:
	if keyword == word:
	next_line, word = next(words)
	if next_line > current_line:
	lines_count += 1
	current_line = next_line
	else:
	break
	else:
	matches.append((line, lines_count))

	return tuple(matches)



	if __name__ == '__main__':
	query = sys.argv[1]
	fh = open(sys.argv[2])

	matches = search(fh, query)

	# Form here on it's only presentation
	fh.seek(0)
	lines = enumerate(fh)
	for lineno, linecount in matches:
	number, line = next(lines)
	while number < lineno:
	number, line = next(lines)

	result_lines = [line]

	for i in range(linecount-1):
	result_lines.append(next(lines)[1])

	print "Match found on line {0} (spawning {1} lines):\n > {2}".format(
	lineno, linecount, ' > '.join(result_lines).strip())