Last active
July 24, 2020 11:23
-
-
Save johentsch/b2266ed31398fb37e968c7e36da8bbf6 to your computer and use it in GitHub Desktop.
Function for sorting DCML harmony labels with respect to their features given a custom ordering logic.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
import pandas as pd | |
import numpy as np | |
from urllib.request import urlopen | |
REGEX = {} | |
def features2type(numeral, form=None, figbass=None): | |
""" Turns a combination of the three chord features into a chord type. | |
Returns | |
------- | |
'M': Major triad | |
'm': Minor triad | |
'o': Diminished triad | |
'+': Augmented triad | |
'mm7': Minor seventh chord | |
'Mm7': Dominant seventh chord | |
'MM7': Major seventh chord | |
'mM7': Minor major seventh chord | |
'o7': Diminished seventh chord | |
'%7': Half-diminished seventh chord | |
'+7': Augmented (minor) seventh chord | |
'+M7': Augmented major seventh chord | |
""" | |
if pd.isnull(numeral): | |
return numeral | |
form, figbass = tuple('' if pd.isnull(val) else val for val in (form, figbass)) | |
#triads | |
if figbass in ['', '6', '64']: | |
if form in ['o', '+']: | |
return form | |
if form in ['%', 'M']: | |
if figbass == '': | |
return f"{form}7" | |
print(f"{form} is a seventh chord and cannot have figbass '{figbass}'") | |
return None | |
return 'm' if numeral.islower() else 'M' | |
# seventh chords | |
if form in ['o', '%', '+', '+M']: | |
return f"{form}7" | |
triad = 'm' if numeral.islower() else 'M' | |
seventh = 'M' if form == 'M' else 'm' | |
return f"{triad}{seventh}7" | |
def make_type_col(df, num_col='numeral', form_col='form', fig_col='figbass'): | |
""" Create a new Series with the chord type for every row of `df`. | |
Uses: features2type() | |
""" | |
param_tuples = list(df[[num_col, form_col, fig_col]].itertuples(index=False, name=None)) | |
result_dict = {t: features2type(*t) for t in set(param_tuples)} | |
return pd.Series([result_dict[t] for t in param_tuples], index=df.index, name='chordtype') | |
def sort_labels(labels, git_branch='master', drop_duplicates=True, verbose=True, **kwargs): | |
""" Sort a list of DCML labels following custom criteria. | |
Uses: split_labels() | |
Parameters | |
---------- | |
labels : :obj:`collection` or :obj:`pandas.Series` | |
The labels you want to sort. | |
git_branch : :obj:`str`, optional | |
The branch of the DCMLab/standards repo from which you want to use the regEx. | |
drop_duplicates : :obj:`bool`, optional | |
By default, the function returns an ordered list of unique labels. Set to | |
False in order to keep duplicate labels. Note that where the ordered features | |
are identical, labels appear in the order of their occurrence. | |
verbose : .obj:`bool`, optional | |
By default, values that are missing from custom orderings are printed out. | |
Pass False to prevent that. | |
kwargs : {'values', 'occurrences', 'rvalues', 'roccurrences'}, :obj:`dict`, :obj:`list` or callable | |
Pass one argument for every feature that you want to sort in the order | |
in which features should be used for sorting. The arguments will be mapped | |
on the respective columns which should yield alpha-numeric values to be sorted. | |
globalkey | |
localkey | |
pedal | |
chord | |
numeral | |
form | |
figbass | |
changes | |
relativeroot | |
pedalend | |
phraseend | |
chordtype | |
Examples | |
-------- | |
.. highlight:: python | |
# Sort numerals by occurrences (descending), the figbass by occurrences (ascending), and | |
# the form column by the given order | |
sort_labels(labels, numeral='occurrences', figbass='roccurrences', form=['', '+', 'o', '%', 'M']) | |
# Sort numerals by custom ordering and each numeral by the (globally) most frequent chord types. | |
sort_labels(labels, numeral=['I', 'V', 'IV'], chordtype='occurrences') | |
# Sort relativeroots alphabetically and the numerals by a custom ordering which | |
# is equivalent to ['V', 'vii', '#vii'] | |
sort_labels(labels, relativeroot='rvalues', numeral={'vii': 5, 'V': 0, '#vii': 10}) | |
# Sort chord types by occurrences starting with the least frequent and sort their inversions | |
# following the given custom order | |
sort_labels(labels, chordtype='roccurrences', figbass=['2', '43', '65', '7']) | |
""" | |
if len(kwargs) == 0: | |
raise ValueError("Pass at least one keyword argument for sorting...") | |
if not isinstance(labels, pd.core.series.Series): | |
if isinstance(labels, pd.core.frame.DataFrame): | |
raise TypeError("Pass only one column of your DataFrame.") | |
labels = pd.Series(labels) | |
if drop_duplicates: | |
labels = labels.drop_duplicates() | |
features = split_labels(labels, git_branch) | |
def make_keys(col, order): | |
def make_order_dict(it): | |
missing = [v for v in col.unique() if v not in it] | |
if len(missing) > 0 and verbose: | |
print(f"The following values were missing in the custom ordering for column {col.name}:\n{missing}") | |
return {v: i for i, v in enumerate(list(it) + missing)} | |
if order in ['values', 'rvalues']: | |
keys = sorted(set(col)) if order == 'values' else reversed(sorted(set(col))) | |
order_dict = make_order_dict(keys) | |
elif order in ['occurrences', 'roccurrences']: | |
keys = col.value_counts(dropna=False).index if order == 'occurrences' else col.value_counts(dropna=False, ascending=True).index | |
order_dict = make_order_dict(keys) | |
elif order.__class__ is not dict: | |
try: | |
order_dict = make_order_dict(order) | |
except: | |
# order is expected to be a callable: | |
return np.vectorize(order)(col) | |
else: | |
order_dict = order | |
return np.vectorize(order_dict.get)(col) | |
if 'chordtype' in kwargs: | |
features['chordtype'] = make_type_col(features) | |
key_cols = {col: make_keys(features[col], order) for col, order in kwargs.items() if col in features.columns} | |
df = pd.DataFrame(key_cols, index=features.index) | |
ordered_ix = df.sort_values(by=df.columns.to_list()).index | |
return labels.loc[ordered_ix] | |
def split_labels(labels, git_branch='master', dropna=True): | |
""" Split DCML harmony labels into their respective features using the regEx | |
from the indicated branch of the DCMLab/standards repository. | |
Parameters | |
---------- | |
labels : :obj:`pandas.Series` | |
Harmony labels to be split. | |
git_branch : :obj:`str`, optional | |
The branch of the DCMLab/standards repo from which you want to use the regEx. | |
dropna : :obj:`bool`, optional | |
Drop rows where the regEx didn't match. | |
""" | |
global REGEX | |
if git_branch not in REGEX: | |
url = f"https://raw.githubusercontent.com/DCMLab/standards/{git_branch}/harmony.py" | |
glo, loc = {}, {} | |
exec(urlopen(url).read(), glo, loc) | |
REGEX[git_branch] = re.compile(loc['regex'], re.VERBOSE) | |
regex = REGEX[git_branch] | |
cols = ['globalkey', 'localkey', 'pedal', 'chord', 'numeral', 'form', 'figbass', 'changes', 'relativeroot', 'pedalend', 'phraseend'] | |
res = labels.str.extract(regex, expand=True)[cols] | |
if dropna: | |
return res.dropna(how='all').fillna('') | |
return res.fillna('') | |
# labels = pd.read_csv('labels.csv').label | |
# sort_labels(labels, chordtype='roccurrences') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment