Skip to content

Instantly share code, notes, and snippets.

@marcwebbie
Last active December 27, 2015 08:29
Show Gist options
  • Save marcwebbie/7297141 to your computer and use it in GitHub Desktop.
Save marcwebbie/7297141 to your computer and use it in GitHub Desktop.
testing performance of filtering list for best match
from timeit import timeit
from difflib import SequenceMatcher
import difflib
import re
word_list = ["balla", "baby"]
song_list = [
"Give Em Some Mo",
"Fall'n (Feat. G.I.B.)",
"Balla Baby",
"Jackpot The Pimp Pt. 2 (Skit)",
"Leave Wit Me (Feat. R. Kelly & Ziggy)",
"Make That Ass Talk (Feat. Ziggy)",
"I Do",
"We Clubbin",
"We Do (Feat. Bun B)",
"Wurr Da Git It Gurlz At (Feat. G.I.B.)",
"Bring Da Beef (Feat. G.I.B.)",
"Powerballin' (Outro)",
"Balla Baby (Remix) (Feat. Lil' Flip & Boozie)",
"What Up Wit It (Feat. G.I.B.)",
"Don't Really Care"
]
def get_best_match_dict():
result_dict = {}
for song in song_list:
match_count = 0
for word in word_list:
if word in song.lower().split():
match_count += 1
if match_count:
result_dict[song] = match_count - len(song.split())
import operator
return max(result_dict.items(), key=operator.itemgetter(1))[0]
def get_best_match_list():
result_list = []
for song in song_list:
match_count = 0
for word in word_list:
if word in song.lower().split():
match_count += 1
if match_count:
result_list.append((match_count - len(song.split()), song))
return max(result_list)[1]
def get_best_match_regex():
return max((len(re.findall('|'.join(word_list), song.lower())) - len(song.split()), song)
for song in song_list)
def get_best_match_difflib():
wlist = ' '.join(word_list)
return max((s for s in song_list),
key=lambda x: SequenceMatcher(None, wlist, x.lower()).ratio())
def get_best_match_difflib_close_match():
wlist = ' '.join(word_list)
return difflib.get_close_matches(wlist, song_list)[0]
fstr = "Function: {}, Time: {}, result: {}"
print(fstr.format(
'get_best_match_dict()',
timeit('get_best_match_dict()',
setup='from __main__ import get_best_match_dict', number=1000),
get_best_match_dict()
))
print(fstr.format(
'get_best_match_list()',
timeit('get_best_match_list()',
setup='from __main__ import get_best_match_list', number=1000),
get_best_match_list()
))
print(fstr.format(
'max + list comprehension',
timeit(
'max([(len([w for w in word_list if w in song.lower().split()]), song) for song in song_list], key=lambda x: x[0] - len(x[1].split()))',
setup='from __main__ import word_list, song_list', number=1000),
get_best_match_list()
))
print(fstr.format(
'max + list comprehension',
timeit(
'max((len([w for w in word_list if w in song.lower().split()]) - len(song.split()), song) for song in song_list)',
setup='from __main__ import word_list, song_list', number=1000),
get_best_match_list()
))
print(fstr.format(
'get_best_match_regex()',
timeit('get_best_match_regex()',
setup='from __main__ import get_best_match_regex', number=1000),
get_best_match_list()
))
print(fstr.format(
'get_best_match_difflib()',
timeit('get_best_match_difflib()',
setup='from __main__ import get_best_match_difflib', number=1000),
get_best_match_list()
))
print(fstr.format(
'get_best_match_difflib_close_match()',
timeit('get_best_match_difflib_close_match()',
setup='from __main__ import get_best_match_difflib_close_match', number=1000),
get_best_match_list()
))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment