Skip to content

Instantly share code, notes, and snippets.

@deanishe
Created October 29, 2013 23:59
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save deanishe/7224860 to your computer and use it in GitHub Desktop.
Save deanishe/7224860 to your computer and use it in GitHub Desktop.
Benchmarking filtering of Spotlight (mdfind) search results.
#!/usr/bin/env python
# encoding: utf-8
"""
"""
from __future__ import print_function
import sys
import os
from subprocess import check_output
from time import time
from fnmatch import fnmatch
from collections import defaultdict
ITERATIONS = 10
SEARCH_PATHS = [os.path.expanduser(p) for p in [
u'~/',
]]
QUERIES = [
u'name:present',
u'name:the'
]
EXCLUDE_DIRS = [os.path.expanduser(p) for p in [
u'~/Documents/ESL/Planning',
u'~/Dropbox',
u'~/Desktop',
u'~/Library',
u'~/Code',
u'~/Google Drive',
]]
EXCLUDE_PATTERNS = [
u'*.oo3',
u'*.pdf',
u'*.docx',
u'*.xls',
u'*.md',
u'*.py',
u'*.jpg',
u'*.png',
u'*.odt',
u'*.doc'
]
EXCLUDE_FILETYPES = [
u'.oo3',
u'.pdf',
u'.docx',
u'.xls',
u'.md',
u'.py',
u'.jpg',
u'.png',
u'.odt',
u'.doc'
]
def spotlight(query=None, folders=None):
args = [u'mdfind']
if folders:
for f in folders:
args = args + [u'-onlyin', f]
if query:
args.append(query)
# print(u'command : {}'.format(args))
output = check_output(args).decode(u'utf-8')
results = [l.strip() for l in output.split(u'\n') if l.strip()]
return results
def main():
print(u'Benchmarking folder excludes ...')
times = defaultdict(list)
for i in range(ITERATIONS):
print(u'')
for query in QUERIES:
print(u"Searching for '{}' ...\n".format(query))
t = time()
spotlight_results = spotlight(query, SEARCH_PATHS)
d = time() - t
print(u'{} Spotlight results in {:0.4f} secs'.format(len(spotlight_results), d))
times[u'raw spotlight'].append(d)
t = time()
# results = spotlight(query, SEARCH_PATHS)
results = spotlight_results
for path in EXCLUDE_DIRS:
results = [r for r in results if not r.startswith(path)]
d = time() - t
print(u'{} results after dir filters in {:0.4f} secs'.format(len(results), d))
times[u'dir filter'].append(d)
t = time()
results = spotlight_results
for pattern in EXCLUDE_PATTERNS:
results = [r for r in results if not fnmatch(r, pattern)]
d = time() - t
print(u'{} results after glob filters in {:0.4f} secs'.format(len(results), d))
times[u'glob filter'].append(d)
t = time()
results = spotlight_results
for ext in EXCLUDE_FILETYPES:
results = [r for r in results if not r.lower().endswith(ext)]
d = time() - t
print(u'{} results after filetype filters in {:0.4f} secs'.format(len(results), d))
times[u'filetype filter'].append(d)
t = time()
results = spotlight_results
for path in EXCLUDE_DIRS:
results = [r for r in results if not r.startswith(path)]
for ext in EXCLUDE_FILETYPES:
results = [r for r in results if not r.lower().endswith(ext)]
for pattern in EXCLUDE_PATTERNS:
results = [r for r in results if not fnmatch(r, pattern)]
d = time() - t
print(u'{} results after all filters in {:0.4f} secs'.format(len(results), d))
times[u'all filters'].append(d)
print(u'-' * 70)
print(u'')
print(u'RESULTS')
print(u'=======\n')
for key, times in sorted(times.items()):
average = sum(times) / ITERATIONS
print(u'{} : {:0.4f} secs average'.format(key, average))
print(u'')
return 0
if __name__ == '__main__':
sys.exit(main())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment