Navigation Menu

Skip to content

Instantly share code, notes, and snippets.

@mayhem
Created February 7, 2023 17:41
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mayhem/78d1c867e43885d42b09c7a3f9efc06f to your computer and use it in GitHub Desktop.
Save mayhem/78d1c867e43885d42b09c7a3f9efc06f to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
import json
import re
from time import time
from random import randint
import psycopg2
import psycopg2.extras
from unidecode import unidecode
import config
TEST_STRINGS = [
"massiveattackteardropbartclaessentomfallbootleg",
"squirrelnutzippersivefoundanewbaby",
"giantsand1helvakowboysong",
"ericserrathepantryhideout",
"bobdylanalabamagetaway",
"bobdylanleopardskinpillboxhatincomplete",
"thereplacementsyouaintgottadancestudiodemo",
"steviewonderijustcalledtosayiloveyou",
"thestonerosesmadeofstone808statemix",
"yolatengotomcourtenayacousticversion",
]
def encode_string(text):
return unidecode(re.sub(" +", " ", re.sub(r'[^\w ]+', '', text)).strip().lower())
def fuck_string_up(text, num_chars_to_remove):
for i in range(num_chars_to_remove):
remove = randint(0, len(text))
text = text[:remove] + text[remove+1:]
return text
def lookup_test(num_chars_to_remove):
print("Remove %s characters:" % num_chars_to_remove)
total_time = 0.0
with psycopg2.connect(config.MBID_MAPPING_DATABASE_URI) as mb_conn:
with mb_conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as curs:
for text in TEST_STRINGS:
query = """SELECT artist_name, recording_name, lookup_nows, similarity(lookup_nows, %s) AS sml
FROM mapping.content_resolver
WHERE lookup_nows %% %s
ORDER BY sml DESC, lookup_nows
LIMIT 1"""
lookup = fuck_string_up(text, num_chars_to_remove)
t0 = time()
curs.execute(query, (lookup, lookup))
row = curs.fetchone()
if row is None:
print("%s not found!!" % lookup)
if text != row["lookup_nows"]:
print("Found wrong entry for:\n %s (search)\n %s (result)" % (text, row["lookup_nows"]))
t1 = time()
total_time += t1 - t0
print(f"avg time per lookup: {total_time/len(TEST_STRINGS)}\n")
if __name__ == "__main__":
for i in range(5):
lookup_test(i+1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment