Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
(Snippet) Reading File with Progressbar(TQDM), but it's okay to not have.
#!env python3
"""TestResult scorer for MusicID (in-house project)."""
import mmap
import os
import re
import sys
try:
from tqdm import tqdm
except ImportError:
tqdm = lambda *i, **kwargs: i[0] # pylint:disable=invalid-name
# pylint: disable=too-few-public-methods
class FileReaderWithTQDM(object):
"""Iterater for file reading with TQDM library."""
def __init__(self, filename, **kwargs):
self.tqdm = tqdm(open(filename, "r"),
total=self.get_line_number(filename), **kwargs)
def __iter__(self):
return self
@staticmethod
def get_line_number(this):
"""Fast line number getter."""
# from http://stackoverflow.com/a/850962/1877942
fp = open(this, "r+")
buf = mmap.mmap(fp.fileno(), 0)
lines = 0
while buf.readline():
lines += 1
return lines
def __next__(self):
"""Need to Implement with self.tqdm.__next__()."""
raise NotImplementedError
class MappingTableLoader(FileReaderWithTQDM):
"""MusicID Mapping Table Format."""
REGEX = re.compile(r"\"(.*)\" \"(.*)\"")
def __next__(self):
cur = self.REGEX.findall(self.tqdm.__next__())
try:
return cur[0]
except IndexError:
return
class ResultLoader(FileReaderWithTQDM):
"""MusicID TestResult Format."""
REGEX = {'findTest': re.compile("Seek Time = .*"),
'getTestRank': re.compile("[0-9]+\t[0-9]+\t(.*)")}
def __next__(self):
for line in self.tqdm: # spend trash data.
if len(self.REGEX['findTest'].findall(line)):
break # found!
data = self.tqdm.__next__().strip() # use next line.
results = []
for line in self.tqdm:
query = self.REGEX['getTestRank'].findall(line)
if len(query):
results.extend(query)
else:
break
return {'Data': data, 'Results': results}
def musicid_results_scorer(files):
"""Load tables, and Score results while reading."""
train_table = {key: value for key, value in MappingTableLoader(
files[0], desc='TrainData', leave=True)}
test_table = {key: value for key, value in MappingTableLoader(
files[1], desc='TestData ', leave=True)}
scores = [0 for _ in range(0, 11)]
for test in ResultLoader(files[2], desc='Ranking ', leave=True):
target = os.path.basename(test_table[test['Data'] + '.pcm'])
score = 0
for cur in test['Results']:
if target == os.path.basename(train_table[cur + '.pcm']):
break
else:
score += 1
scores[score] += 1
total = sum(scores)
# XXX(minhoryang.voice@gmail.com) : Chunghee asked to print like these.
print('Scores:')
for i in range(0, 10):
print('\tHit %sth - %s' % (i+1, scores[i]))
print('\tNotFound - %s' % (scores[10],))
print("Top 1: %s" % (scores[0] / total,))
print("Top 5: %s" % (sum(scores[:5]) / total,))
print("Top 10: %s" % (sum(scores[:10]) / total,))
if __name__ == "__main__":
if len(sys.argv) != 4:
print("%s $TrainMappingTable $TestMappingTable $result" % (
sys.argv[0],))
else:
musicid_results_scorer(sys.argv[1:])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.