Skip to content

Instantly share code, notes, and snippets.

@cidetraq
Created January 4, 2019 21:18
Show Gist options
  • Save cidetraq/6afac16d1ce229239d256d8cc19a7f91 to your computer and use it in GitHub Desktop.
Save cidetraq/6afac16d1ce229239d256d8cc19a7f91 to your computer and use it in GitHub Desktop.
getting some object oriented errors with kmeans_process function not recognized
class SequenceFeatureEnricher(object):
def __init__(self, regression_features=True, std_features=True, kmeans_features=True, masknan: float = None, n_clusters=8):
self.regression_features = regression_features
self.std_features = std_features
self.kmeans_features= kmeans_features
self.n_clusters=n_clusters
self.masknan = masknan
self.sample_sequences = []
self.sequence_features = []
# So we can map sequence features back to minmax values for scaling
self.sequence_features_scalar_map = []
if regression_features:
for f in range(d.ENRICH_START, d.NUM_INPUTS):
self.sequence_features_scalar_map.append(f)
self.sequence_features_scalar_map.append(f)
if std_features:
for f in range(d.ENRICH_START, d.NUM_INPUTS):
self.sequence_features_scalar_map.append(f)
def kmeans_process(self, nd: np.ndarray):
from sklearn.cluster import KMeans
kmeans=KMeans(n_clusters=self.n_clusters, n_jobs=-1, verbose=1).fit(nd)
return kmeans.labels_, kmeans.cluster_centers_
def process(self, nd: np.ndarray):
# Add some features
for sequence in range(0, nd.shape[0]):
features_to_add = []
if self.regression_features:
for f in range(d.ENRICH_START, d.NUM_INPUTS):
m = np.nansum(nd[sequence][:, f]) / np.nansum(np.arange(0, nd.shape[1]))
b = nd[sequence][:, f][0]
features_to_add.extend([m, b])
if self.std_features:
for f in range(d.ENRICH_START, d.NUM_INPUTS):
features_to_add.append(np.nanstd(nd[sequence][:, f]))
self.sample_sequences.append(nd[sequence])
self.sequence_features.append(features_to_add)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment