Created
February 7, 2023 17:41
-
-
Save mayhem/78d1c867e43885d42b09c7a3f9efc06f to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import json | |
import re | |
from time import time | |
from random import randint | |
import psycopg2 | |
import psycopg2.extras | |
from unidecode import unidecode | |
import config | |
TEST_STRINGS = [ | |
"massiveattackteardropbartclaessentomfallbootleg", | |
"squirrelnutzippersivefoundanewbaby", | |
"giantsand1helvakowboysong", | |
"ericserrathepantryhideout", | |
"bobdylanalabamagetaway", | |
"bobdylanleopardskinpillboxhatincomplete", | |
"thereplacementsyouaintgottadancestudiodemo", | |
"steviewonderijustcalledtosayiloveyou", | |
"thestonerosesmadeofstone808statemix", | |
"yolatengotomcourtenayacousticversion", | |
] | |
def encode_string(text): | |
return unidecode(re.sub(" +", " ", re.sub(r'[^\w ]+', '', text)).strip().lower()) | |
def fuck_string_up(text, num_chars_to_remove): | |
for i in range(num_chars_to_remove): | |
remove = randint(0, len(text)) | |
text = text[:remove] + text[remove+1:] | |
return text | |
def lookup_test(num_chars_to_remove): | |
print("Remove %s characters:" % num_chars_to_remove) | |
total_time = 0.0 | |
with psycopg2.connect(config.MBID_MAPPING_DATABASE_URI) as mb_conn: | |
with mb_conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as curs: | |
for text in TEST_STRINGS: | |
query = """SELECT artist_name, recording_name, lookup_nows, similarity(lookup_nows, %s) AS sml | |
FROM mapping.content_resolver | |
WHERE lookup_nows %% %s | |
ORDER BY sml DESC, lookup_nows | |
LIMIT 1""" | |
lookup = fuck_string_up(text, num_chars_to_remove) | |
t0 = time() | |
curs.execute(query, (lookup, lookup)) | |
row = curs.fetchone() | |
if row is None: | |
print("%s not found!!" % lookup) | |
if text != row["lookup_nows"]: | |
print("Found wrong entry for:\n %s (search)\n %s (result)" % (text, row["lookup_nows"])) | |
t1 = time() | |
total_time += t1 - t0 | |
print(f"avg time per lookup: {total_time/len(TEST_STRINGS)}\n") | |
if __name__ == "__main__": | |
for i in range(5): | |
lookup_test(i+1) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment