correl/logerrors.py

## logerrors.py
import sys
import argparse
from itertools import ifilter
from fuzzywuzzy import fuzz

MATCH_THRESHOLD = 90

def parse_file(filename, predicate=None):
    with open(filename) as f:
        return aggregate_errors(f, predicate)

def aggregate_errors(lines, predicate=None):
    return reduce(store_similar,
                  ifilter(predicate, lines) if predicate else lines,
                  {})

def store_similar(acc, string):
    match = find_similar(acc, string)
    if match:
        acc[match["key"]].append((match["ratio"], string))
    else:
        acc[string] = [(100, string)]
    return acc

def find_similar(acc, string):
    results = map(lambda s: (fuzz.token_set_ratio(s, string), s),
                  acc.iterkeys())
    filtered = filter(lambda r: r[0] >= MATCH_THRESHOLD,
                      results)
    ranked = sorted(filtered)
    return dict(zip(["ratio", "key"], ranked[-1])) if ranked else None

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("searchstring")
    parser.add_argument("filename")
    parser.add_argument("-t", "--threshold",
                        type=int,
                        help="Fuzzy match percentage threshold")
    args = parser.parse_args()

    if args.threshold:
        MATCH_THRESHOLD = args.threshold

    aggregated = parse_file(args.filename,
                            lambda line: args.searchstring in line)
    for entry, matches in aggregated.iteritems():
        print("{:<10} {}".format(len(matches), entry))
	import sys
	import argparse
	from itertools import ifilter
	from fuzzywuzzy import fuzz

	MATCH_THRESHOLD = 90

	def parse_file(filename, predicate=None):
	with open(filename) as f:
	return aggregate_errors(f, predicate)

	def aggregate_errors(lines, predicate=None):
	return reduce(store_similar,
	ifilter(predicate, lines) if predicate else lines,
	{})

	def store_similar(acc, string):
	match = find_similar(acc, string)
	if match:
	acc[match["key"]].append((match["ratio"], string))
	else:
	acc[string] = [(100, string)]
	return acc

	def find_similar(acc, string):
	results = map(lambda s: (fuzz.token_set_ratio(s, string), s),
	acc.iterkeys())
	filtered = filter(lambda r: r[0] >= MATCH_THRESHOLD,
	results)
	ranked = sorted(filtered)
	return dict(zip(["ratio", "key"], ranked[-1])) if ranked else None

	if __name__ == "__main__":
	parser = argparse.ArgumentParser()
	parser.add_argument("searchstring")
	parser.add_argument("filename")
	parser.add_argument("-t", "--threshold",
	type=int,
	help="Fuzzy match percentage threshold")
	args = parser.parse_args()

	if args.threshold:
	MATCH_THRESHOLD = args.threshold

	aggregated = parse_file(args.filename,
	lambda line: args.searchstring in line)
	for entry, matches in aggregated.iteritems():
	print("{:<10} {}".format(len(matches), entry))