Skip to content

Instantly share code, notes, and snippets.

@dkohlsdorf
Created August 31, 2020 19:34
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dkohlsdorf/84fbb994ab0f3cf9894dd326dcd41886 to your computer and use it in GitHub Desktop.
Save dkohlsdorf/84fbb994ab0f3cf9894dd326dcd41886 to your computer and use it in GitHub Desktop.
Labeling By Mayority
import shutil
import os
import pandas as pd
import numpy as np
def type_label(x):
labels = np.zeros(4)
for i in x:
labels[i] += 1
return np.argmax(labels)
cluster_labels = {}
for filename in os.listdir('.'):
if filename.startswith('test_sequential_clustering') and filename.endswith('.csv'):
df = pd.read_csv(filename, sep=',')
for _, row in df.iterrows():
if row['cluster'] not in cluster_labels:
cluster_labels[row['cluster']] = []
cluster_labels[row['cluster']].append(row['type'])
for c in cluster_labels.keys():
cluster_labels[c] = type_label(cluster_labels[c])
cname = "test_sequential_seq_cluster_{}.wav".format(c)
if cluster_labels[c] == 2:
outname = "clicks_test_sequential_seq_cluster_{}.wav".format(c)
else:
outname = "shape_test_sequential_seq_cluster_{}.wav".format(c)
if os.path.exists(cname):
shutil.copyfile(cname, outname)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment