Skip to content

Instantly share code, notes, and snippets.

@wonzbak
Created July 9, 2015 14:24
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save wonzbak/3072c1ddf9fb1a224404 to your computer and use it in GitHub Desktop.
Save wonzbak/3072c1ddf9fb1a224404 to your computer and use it in GitHub Desktop.
Fuzzy file finder
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
"""Find a file in a list in a fuzzy way.
"""
from __future__ import division
import os
import sys
import time
import re
import readline # for history with up
import argparse
from pprint import pprint as pp
def benchIt(func):
"""Decorator, display execution time of a function.
"""
def wrapper(*args, **kwargs):
start = time.time()
ret = func(*args)
duration = time.time() - start
sys.stderr.write("{} executed in {:.5f}s\n".format(func.__name__, duration))
return ret
return wrapper
class AnsiColor:
"""ANSI escape sequences
"""
BLUE = '\033[94m'
BROWN = '\033[94m'
PURPLE = '\033[95m'
CYAN = '\033[96m'
DARKCYAN = '\033[36m'
GREEN = '\033[92m'
GREEN = '\033[92m'
RED = '\033[91m'
YELLOW = '\033[93m'
BOLD = '\033[1m'
UNDERLINE = '\033[4m'
END = '\033[0m'
@staticmethod
def boldize(string):
return "%s%s%s" % (AnsiColor.BOLD, string, AnsiColor.END)
class Item (object):
"""Class representing an item which is a match
"""
def __init__(self, string, score, groups):
self.string = string
self.score = score
self.groups = groups
def __str__(self):
return self.string
def wrapItem(self, before, after):
"""Wrap match char with before and after.
"""
parts = []
nb_group = len(self.groups)
# split item string
for i, group in enumerate(self.groups):
first = True if i == 0 else False
last = True if i == nb_group - 1 else False
if first:
# from start to begining of first group
startPos = 0
endPos = self.groups[i]['start']
parts.append(self.string[startPos:endPos])
else:
# between 2 groups
startPos = self.groups[i - 1]['end']
endPos = self.groups[i]['start']
parts.append(self.string[startPos:endPos])
if last:
# from last end group to end of string
endPos = self.groups[i]['end']
parts.append(self.string[endPos:])
# highlight char.
wrapped_string = ""
for i, part in enumerate(parts):
wrapped_string += part
if i < nb_group:
wrapped_string += "{}{}{}".format(before, self.groups[i]['car'], after)
return wrapped_string
def highlightToTerminal(self):
return self.wrapItem(AnsiColor.BOLD, AnsiColor.END)
class FuzzySearch(object):
def __init__(self, collection):
self.collection = collection
@staticmethod
def getNbCharBetweenGroup(groups):
"""Count character between groups
"""
nb_char = 0
for i, group in enumerate(groups):
if i == 0:
continue
nb_char += groups[i]['start'] - groups[i - 1]['end']
return nb_char
def searchReverse(self, search):
"""Filter items base on search filter, same as fuzzyFind
but with inverted search and items.
"""
filtered_items = []
pattern_parts = []
r_search = search[::-1] # reversed search
escape_car = ['.']
for car in r_search:
if car in escape_car:
pattern_parts.append(r"(\%s)" % car)
else:
pattern_parts.append(r"(%s)" % car)
pattern = '.*?'.join(pattern_parts)
regex = re.compile(pattern, re.I)
for item in self.collection:
r_item = item[::-1] # reversed item
match = regex.search(r_item)
if match:
groups = []
for i, group in enumerate(match.groups()):
groupData = {
'start': match.span(i + 1)[0],
'end': match.span(i + 1)[1],
'car': group
}
groups.append(groupData)
nb_chars = FuzzySearch.getNbCharBetweenGroup(groups)
# the less there is character between each group, the more the score is
# the less there is character between the last group and the end of line,
# the more th score is
score = (len(search) / len(item) * 100) + (1 / (nb_chars + match.start()) * 10000)
# reverse groups
item_length = len(item)
reversed_groups = list(reversed(groups))
for group_ in reversed_groups:
start = item_length - group_['end']
end = item_length - group_['start']
group_['start'] = start
group_['end'] = end
filtered_items.append(
(
score,
item,
reversed_groups,
)
)
return [Item(item, score_, group)
for score_, item, group in sorted(filtered_items, reverse=True)]
def fuzzyFind(self, search):
"""Filter items base on search filter
"""
filtered_items = []
pattern_parts = []
for car in search:
pattern_parts.append("(%s)" % car)
pattern = '.*?'.join(pattern_parts)
print "DEBUG Pattern: {}".format(pattern)
regex = re.compile(pattern, re.I)
for item in self.collection:
match = regex.search(item)
if match:
groups = []
for i, group in enumerate(match.groups()):
groupData = {
'start': match.span(i + 1)[0],
'end': match.span(i + 1)[1],
'car': group
}
groups.append(groupData)
nb_chars = FuzzySearch.getNbCharBetweenGroup(groups)
score = nb_chars + match.start()
filtered_items.append(
(
score,
item,
groups,
)
)
return [Item(item, score_, start_, group)
for score_, start_, item, group in sorted(filtered_items)]
class Application(object):
"""Load file of a directory and filter them.
"""
def __init__(self,):
self.args = self.cli()
self.files = self.loadFiles(self.args.directory, self.args.ext)
self.fuzzySearcher = FuzzySearch(self.files)
def cli(self):
parser = argparse.ArgumentParser(description='fuzzy search on file')
parser.add_argument('directory', help='root directory')
parser.add_argument('--ext', default=None, help='include only files with an extension')
parser.add_argument('-f', '--filter', default=None, help='filter to apply on the files list')
parser.add_argument('--with-score', default=False, action="store_true", help='Display score')
parser.add_argument('-v', '--verbosity', action="count", help='Verbosity')
return parser.parse_args()
def run(self):
"""Run the command.
"""
if self.args.filter:
self.filterFiles(self.args.filter)
else:
# interactive command line
try:
self.userInputloop()
except (KeyboardInterrupt):
pass
except (EOFError):
# exit
pass
def loadFiles(self, directory=None, extension=None):
"""Return list of all file in the directory with the extension.
"""
if directory is None:
directory = '.'
directory = os.path.abspath(os.path.expanduser(directory))
if not os.path.isdir(directory):
raise Exception("Directory {} not exist".format(directory))
files = []
root_position = len(directory) + 1
start_time = time.time()
for dirpath, dirnames, filenames in os.walk(directory):
for name in filenames:
projectPath = dirpath[root_position:]
if extension is not None:
if name.endswith(".{}".format(extension)):
files.append("{}/{}".format(projectPath, name))
else:
files.append("{}/{}".format(projectPath, name))
duration = time.time() - start_time
if self.args.verbosity:
if extension is not None:
sys.stderr.write(
"Got {} {} files from {} in {:.5f}s\n"
.format(len(files), extension, directory, duration))
else:
sys.stderr.write("Got {} files from {} in {:.5f}s" .format(len(files), directory, duration))
return files
def filterFiles(self, filter_):
"""Display filtered file.
"""
start_time = time.time()
filtered_files = self.fuzzySearcher.searchReverse(filter_)
duration = time.time() - start_time
for item in filtered_files:
if self.args.with_score:
print '{:.0f} {}'.format(item.score, item.highlightToTerminal())
else:
print '{}'.format(item.highlightToTerminal())
if (self.args.verbosity):
sys.stderr.write("Got {} item(s) in {:.5f}s\n".format(len(filtered_files), duration))
def userInputloop(self):
while True:
try:
user_input = raw_input('filter: ')
if user_input:
self.filterFiles(user_input)
except KeyboardInterrupt as e:
print e
if __name__ == '__main__':
app = Application()
app.run()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment