Last active July 24, 2020 11:23
Function for sorting DCML harmony labels with respect to their features given a custom ordering logic.
import re
import pandas as pd
import numpy as np
from urllib.request import urlopen
REGEX = {}
def features2type(numeral, form=None, figbass=None):
""" Turns a combination of the three chord features into a chord type.
'M': Major triad
'm': Minor triad
'o': Diminished triad
'+': Augmented triad
'mm7': Minor seventh chord
'Mm7': Dominant seventh chord
'MM7': Major seventh chord
'mM7': Minor major seventh chord
'o7': Diminished seventh chord
'%7': Half-diminished seventh chord
'+7': Augmented (minor) seventh chord
'+M7': Augmented major seventh chord
if pd.isnull(numeral):
return numeral
form, figbass = tuple('' if pd.isnull(val) else val for val in (form, figbass))
if figbass in ['', '6', '64']:
if form in ['o', '+']:
return form
if form in ['%', 'M']:
if figbass == '':
return f"{form}7"
print(f"{form} is a seventh chord and cannot have figbass '{figbass}'")
return None
return 'm' if numeral.islower() else 'M'
# seventh chords
if form in ['o', '%', '+', '+M']:
return f"{form}7"
triad = 'm' if numeral.islower() else 'M'
seventh = 'M' if form == 'M' else 'm'
return f"{triad}{seventh}7"
def make_type_col(df, num_col='numeral', form_col='form', fig_col='figbass'):
""" Create a new Series with the chord type for every row of `df`.
Uses: features2type()
param_tuples = list(df[[num_col, form_col, fig_col]].itertuples(index=False, name=None))
result_dict = {t: features2type(*t) for t in set(param_tuples)}
return pd.Series([result_dict[t] for t in param_tuples], index=df.index, name='chordtype')
def sort_labels(labels, git_branch='master', drop_duplicates=True, verbose=True, **kwargs):
""" Sort a list of DCML labels following custom criteria.
Uses: split_labels()
labels : :obj:`collection` or :obj:`pandas.Series`
The labels you want to sort.
git_branch : :obj:`str`, optional
The branch of the DCMLab/standards repo from which you want to use the regEx.
drop_duplicates : :obj:`bool`, optional
By default, the function returns an ordered list of unique labels. Set to
False in order to keep duplicate labels. Note that where the ordered features
are identical, labels appear in the order of their occurrence.
verbose : .obj:`bool`, optional
By default, values that are missing from custom orderings are printed out.
Pass False to prevent that.
kwargs : {'values', 'occurrences', 'rvalues', 'roccurrences'}, :obj:`dict`, :obj:`list` or callable
Pass one argument for every feature that you want to sort in the order
in which features should be used for sorting. The arguments will be mapped
on the respective columns which should yield alpha-numeric values to be sorted.
.. highlight:: python
# Sort numerals by occurrences (descending), the figbass by occurrences (ascending), and
# the form column by the given order
sort_labels(labels, numeral='occurrences', figbass='roccurrences', form=['', '+', 'o', '%', 'M'])
# Sort numerals by custom ordering and each numeral by the (globally) most frequent chord types.
sort_labels(labels, numeral=['I', 'V', 'IV'], chordtype='occurrences')
# Sort relativeroots alphabetically and the numerals by a custom ordering which
# is equivalent to ['V', 'vii', '#vii']
sort_labels(labels, relativeroot='rvalues', numeral={'vii': 5, 'V': 0, '#vii': 10})
# Sort chord types by occurrences starting with the least frequent and sort their inversions
# following the given custom order
sort_labels(labels, chordtype='roccurrences', figbass=['2', '43', '65', '7'])
if len(kwargs) == 0:
raise ValueError("Pass at least one keyword argument for sorting...")
if not isinstance(labels, pd.core.series.Series):
if isinstance(labels, pd.core.frame.DataFrame):
raise TypeError("Pass only one column of your DataFrame.")
labels = pd.Series(labels)
if drop_duplicates:
labels = labels.drop_duplicates()
features = split_labels(labels, git_branch)
def make_keys(col, order):
def make_order_dict(it):
missing = [v for v in col.unique() if v not in it]
if len(missing) > 0 and verbose:
print(f"The following values were missing in the custom ordering for column {}:\n{missing}")
return {v: i for i, v in enumerate(list(it) + missing)}
if order in ['values', 'rvalues']:
keys = sorted(set(col)) if order == 'values' else reversed(sorted(set(col)))
order_dict = make_order_dict(keys)
elif order in ['occurrences', 'roccurrences']:
keys = col.value_counts(dropna=False).index if order == 'occurrences' else col.value_counts(dropna=False, ascending=True).index
order_dict = make_order_dict(keys)
elif order.__class__ is not dict:
order_dict = make_order_dict(order)
# order is expected to be a callable:
return np.vectorize(order)(col)
order_dict = order
return np.vectorize(order_dict.get)(col)
if 'chordtype' in kwargs:
features['chordtype'] = make_type_col(features)
key_cols = {col: make_keys(features[col], order) for col, order in kwargs.items() if col in features.columns}
df = pd.DataFrame(key_cols, index=features.index)
ordered_ix = df.sort_values(by=df.columns.to_list()).index
return labels.loc[ordered_ix]
def split_labels(labels, git_branch='master', dropna=True):
""" Split DCML harmony labels into their respective features using the regEx
from the indicated branch of the DCMLab/standards repository.
labels : :obj:`pandas.Series`
Harmony labels to be split.
git_branch : :obj:`str`, optional
The branch of the DCMLab/standards repo from which you want to use the regEx.
dropna : :obj:`bool`, optional
Drop rows where the regEx didn't match.
global REGEX
if git_branch not in REGEX:
url = f"{git_branch}/"
glo, loc = {}, {}
exec(urlopen(url).read(), glo, loc)
REGEX[git_branch] = re.compile(loc['regex'], re.VERBOSE)
regex = REGEX[git_branch]
cols = ['globalkey', 'localkey', 'pedal', 'chord', 'numeral', 'form', 'figbass', 'changes', 'relativeroot', 'pedalend', 'phraseend']
res = labels.str.extract(regex, expand=True)[cols]
if dropna:
return res.dropna(how='all').fillna('')
return res.fillna('')
# labels = pd.read_csv('labels.csv').label
# sort_labels(labels, chordtype='roccurrences')
