Skip to content

Instantly share code, notes, and snippets.

@JamesOwers
Last active March 12, 2019 11:20
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save JamesOwers/afc8163f4a5da84587079147bb9bf9a0 to your computer and use it in GitHub Desktop.
Save JamesOwers/afc8163f4a5da84587079147bb9bf9a0 to your computer and use it in GitHub Desktop.
p4p_scores.py
import pandas as pd
import numpy as np
from tqdm import tqdm
import seaborn as sns
import matplotlib.pyplot as plt
from glob import glob
def get_scores(true_df, gen_df, start_onset, increment=1.0, max_t=None):
"""
Calculates the inter-ontime interval scores. Returns a list of ioi
scores [ioi_(increment), ioi_(2*increment), ..., ioi_(max_t)].
References
----------
https://www.music-ir.org/mirex/wiki/2018:Patterns_for_Prediction#IOI
https://github.com/BeritJanssen/PatternsForPrediction/blob/master/evaluate_prediction.py
Parameters
----------
true_df: DataFrame
A dataframe containing: the onset times (measured in number of
quarter notes since start) of the true continuation, and the pitches.
The dataframe should have colums 'onset' and 'pitch' relating to the
onset times and the midinote pitch number respectively
gen_df: DataFrame
As above, but for the generated continuation
start_onset: float
the onset time of the final note played in the prime associated with
these continations
increment: float
number of quarter notes to increment by in each iteration
max_t: float, optional
the maximum time in crotchets to get the score for. If None, gets scores
till all onsets in both arrays have been assessed
Returns
-------
scores: dict
Containing:
ioi: dict
dict of ioi scores the first value is for ioi_1, the next is
for ioi_(1+increment), etc. until ioi_(max_t), keys are _tt
pitch: dict
sim of pitch scores
combo: dict
sim of combo scores
"""
if max_t is None:
# TODO: get the max_t
max_onset = max(true_df.onset.max(), gen_df.onset.max())
max_t = int(max_onset - start_onset)
score_names = ['ioi', 'pitch', 'combo']
scores = {name: {'precision': {}, 'recall': {}, 'f1': {}}
for name in score_names}
# clean data
true_df_copy = true_df.copy(deep=True)
gen_df_copy = gen_df.copy(deep=True)
for df in [true_df_copy, gen_df_copy]:
# These roundings are done in the original evaluation code
# which is linked in docstring references
df.loc[:, 'pitch'] = df.loc[:, 'pitch'].astype(int)
df.loc[:, 'onset'] = df.loc[:, 'onset'].round(2)
nr_steps = int(max_t / increment) # range is only for ints, not floats
for ii in range(1, nr_steps+1):
tt = ii*increment
max_onset = start_onset + tt
true_notes = [(row.onset, row.pitch) for idx, row in true_df_copy.iterrows()
if row.onset <= max_onset]
gen_notes = [(row.onset, row.pitch) for idx, row in gen_df_copy.iterrows()
if row.onset <= max_onset]
true_size = len(true_notes)
gen_size = len(gen_notes)
if true_size == 0 and gen_size == 0:
for name in score_names:
scores[name]['precision'][tt] = 1
scores[name]['recall'][tt] = 1
scores[name]['f1'][tt] = 1
elif true_size == 0: # and gen_size != 0:
for name in score_names:
scores[name]['precision'][tt] = 0
scores[name]['recall'][tt] = 0 # argument this should be NaN...
# but shouldn't be 1 because there
# is an implicit 'rest' note
scores[name]['f1'][tt] = 0
elif gen_size == 0: # and true_size != 0:
for name in score_names:
scores[name]['precision'][tt] = 0 # argument this should be NaN...
# but shouldn't be 1 because there
# is an implicit 'rest' note
scores[name]['recall'][tt] = 0
scores[name]['f1'][tt] = 0
else:
true_onsets, true_pitches = [list(x) for x in zip(*true_notes)]
gen_onsets, gen_pitches = [list(x) for x in zip(*gen_notes)]
# TODO: dedup polyphonic submissions? For now, assuming input is
# monophonic
# https://github.com/BeritJanssen/PatternsForPrediction/issues/5
# IOI score =======
# TODO: This needs to be adapted for polyphonic case
intersection_size = len(
set(true_onsets).intersection(set(gen_onsets))
)
precision = intersection_size / gen_size
recall = intersection_size / true_size
scores['ioi']['precision'][tt] = precision
scores['ioi']['recall'][tt] = recall
if (precision + recall) != 0:
scores['ioi']['f1'][tt] = (2*precision*recall)/(precision + recall)
else:
scores['ioi']['f1'][tt] = 0
# Pitch score =======
ptr1 = 0
ptr2 = 0
true_pitches.sort()
gen_pitches.sort()
nr_matches = 0
while ptr1 < true_size and ptr2 < gen_size:
if true_pitches[ptr1] == gen_pitches[ptr2]:
nr_matches += 1
ptr1 += 1
ptr2 += 1
elif true_pitches[ptr1] < gen_pitches[ptr2]:
ptr1 += 1
else:
ptr2 += 1
precision = nr_matches / gen_size
recall = nr_matches / true_size
scores['pitch']['precision'][tt] = precision
scores['pitch']['recall'][tt] = recall
if (precision + recall) != 0:
scores['pitch']['f1'][tt] = (2*precision*recall)/(precision + recall)
else:
scores['pitch']['f1'][tt] = 0
# Combo score =======
intersection_size = len(
set(true_notes).intersection(set(gen_notes))
)
precision = intersection_size / gen_size
recall = intersection_size / true_size
scores['combo']['precision'][tt] = precision
scores['combo']['recall'][tt] = recall
if (precision + recall) != 0:
scores['combo']['f1'][tt] = (2*precision*recall)/(precision + recall)
else:
scores['combo']['f1'][tt] = 0
return scores
def get_scores_old(true_df, gen_df, start_onset, increment=1.0, max_t=None):
"""
Same as get_scores, but as implemented in the competition. Has issue with
handling NaN cases for precision, recall, and F1. See notes of get_scores
for details.
"""
if max_t is None:
# TODO: get the max_t
max_onset = max(true_df.onset.max(), gen_df.onset.max())
max_t = int(max_onset - start_onset)
score_names = ['ioi', 'pitch', 'combo']
scores = {name: {'precision': {}, 'recall': {}, 'f1': {}}
for name in score_names}
# clean data
true_df_copy = true_df.copy(deep=True)
gen_df_copy = gen_df.copy(deep=True)
for df in [true_df_copy, gen_df_copy]:
# These roundings are done in the original evaluation code
# which is linked in docstring references
df.loc[:, 'pitch'] = df.loc[:, 'pitch'].astype(int)
df.loc[:, 'onset'] = df.loc[:, 'onset'].round(2)
nr_steps = int(max_t / increment) # range is only for ints, not floats
for ii in range(1, nr_steps+1):
tt = ii*increment
max_onset = start_onset + tt
true_notes = [(row.onset, row.pitch) for idx, row in true_df_copy.iterrows()
if row.onset <= max_onset]
gen_notes = [(row.onset, row.pitch) for idx, row in gen_df_copy.iterrows()
if row.onset <= max_onset]
true_size = len(true_notes)
gen_size = len(gen_notes)
# This is one of the issues - we should handle size zero cases
if true_size == 0 or gen_size == 0:
for name in score_names:
scores[name]['precision'][tt] = np.nan
scores[name]['recall'][tt] = np.nan
scores[name]['f1'][tt] = np.nan
else:
true_onsets, true_pitches = [list(x) for x in zip(*true_notes)]
gen_onsets, gen_pitches = [list(x) for x in zip(*gen_notes)]
# TODO: dedup polyphonic submissions? For now, assuming input is
# monophonic
# https://github.com/BeritJanssen/PatternsForPrediction/issues/5
# IOI score =======
# TODO: This needs to be adapted for polyphonic case
intersection_size = len(
set(true_onsets).intersection(set(gen_onsets))
)
precision = intersection_size / gen_size
recall = intersection_size / true_size
scores['ioi']['precision'][tt] = precision
scores['ioi']['recall'][tt] = recall
# This is one of the issues
if (precision + recall) != 0:
scores['ioi']['f1'][tt] = (2*precision*recall)/(precision + recall)
else:
scores['ioi']['f1'][tt] = np.nan
# Pitch score =======
ptr1 = 0
ptr2 = 0
true_pitches.sort()
gen_pitches.sort()
nr_matches = 0
while ptr1 < true_size and ptr2 < gen_size:
if true_pitches[ptr1] == gen_pitches[ptr2]:
nr_matches += 1
ptr1 += 1
ptr2 += 1
elif true_pitches[ptr1] < gen_pitches[ptr2]:
ptr1 += 1
else:
ptr2 += 1
precision = nr_matches / gen_size
recall = nr_matches / true_size
scores['pitch']['precision'][tt] = precision
scores['pitch']['recall'][tt] = recall
if (precision + recall) != 0:
scores['pitch']['f1'][tt] = (2*precision*recall)/(precision + recall)
else:
scores['pitch']['f1'][tt] = np.nan
# Combo score =======
intersection_size = len(
set(true_notes).intersection(set(gen_notes))
)
precision = intersection_size / gen_size
recall = intersection_size / true_size
scores['combo']['precision'][tt] = precision
scores['combo']['recall'][tt] = recall
if (precision + recall) != 0:
scores['combo']['f1'][tt] = (2*precision*recall)/(precision + recall)
else:
scores['combo']['f1'][tt] = np.nan
return scores
if __name__ == '__main__':
# Change to point towards a folder containing the unzipped data
DATA_LOC = './data'
# Change to run on different sizes
SIZE = 'small'
SIZE_PATH = f'{DATA_LOC}/PPDD-Jul2018_aud_mono_{SIZE}'
DESCRIPTOR_PATH = f'{DATA_LOC}/PPDD-Jul2018_aud_mono_{SIZE}/descriptor'
COLNAMES = ['onset', 'pitch', 'morph', 'dur', 'ch']
def get_fn(path):
return path.split('/')[-1].split('.')[0]
print('Reading csv files')
part = 'prime'
midi_path = f'{SIZE_PATH}/{part}_midi'
prime = {get_fn(path): pd.read_csv(path, names=COLNAMES)
for path in tqdm(glob(f'{SIZE_PATH}/{part}_csv/*'))}
part = 'cont_foil'
cont_foil = {get_fn(path): pd.read_csv(path, names=COLNAMES)
for path in tqdm(glob(f'{SIZE_PATH}/{part}_csv/*'))}
part = 'cont_true'
cont_true = {get_fn(path): pd.read_csv(path, names=COLNAMES)
for path in tqdm(glob(f'{SIZE_PATH}/{part}_csv/*'))}
fn_list = list(prime.keys())
fn = fn_list[0]
print('Scoring compositions with new scores')
scores = {}
for fn in tqdm(fn_list):
scores[fn] = get_scores(cont_true[fn], cont_foil[fn],
prime[fn].onset.iloc[-1],
increment=0.5, max_t=10.0)
print('Scoring compositions with old scores')
old_scores = {}
for fn in tqdm(fn_list):
old_scores[fn] = get_scores_old(cont_true[fn], cont_foil[fn],
prime[fn].onset.iloc[-1],
increment=0.5, max_t=10.0)
for score_type in ['pitch', 'ioi', 'combo']:
for metric in ['recall', 'precision', 'f1']:
data = {fn: scores[fn][score_type][metric] for fn in fn_list}
df = (pd.DataFrame
.from_dict(data, orient='index')
.reset_index()
.rename(columns={'index': 'fn'})
.melt(id_vars=['fn'], var_name='t', value_name='score')
)
df['score_type'] = 'revised_scores'
data2 = {fn: old_scores[fn][score_type][metric] for fn in fn_list}
df2 = (pd.DataFrame
.from_dict(data2, orient='index')
.reset_index()
.rename(columns={'index': 'fn'})
.melt(id_vars=['fn'], var_name='t', value_name='score')
)
df2['score_type'] = 'published_scores'
plt.figure()
sns.lineplot(x='t', y='score', hue='score_type',
data=pd.concat((df, df2), axis=0))
plt.title(f'{score_type} score, {metric} metric')
# plt.ylim([0, 1])
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment