Skip to content

Instantly share code, notes, and snippets.

@masaponto
Last active February 8, 2017 16:38
Show Gist options
  • Save masaponto/79b30a63c3e58dc8b434ea6f92601b5c to your computer and use it in GitHub Desktop.
Save masaponto/79b30a63c3e58dc8b434ea6f92601b5c to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
from elm import ELM
from sklearn.cluster import KMeans
import numpy as np
class ClusteringBasedELM(ELM):
"""
This script is clustering based extreme learning machine(CBELM),
that aims improvment of classification performance of ELM.
The difference between general ELM and CBELM is that how to generate random hidden weight.
Row vectors of ELM random hidden weights are generated around each cluster center,
that is obtained by clustering algorithm (i.e, k-means clustering) in CBELM.
We had excepted that the activation function of CBELM can be easy to activate, and get higher performance,
because an input datum and some weight row vectors can be closer in eukrid distance.
But actually the CBELM can not get enough performance :(.
This is because the k-means does not work well in complex datasets, and some other reason.
This script requires ELM script in https://github.com/masaponto/Python-ELM .
Copyright 2017 masaponto,
Published under the MIT LISENCE.
"""
def __init__(self, hid_num, n_clusters=4, radius=0.1, km_iter=300, n_init=10, a=1):
self.hid_num = hid_num
self.n_clusters = n_clusters
self.radius = radius
self.km_iter = km_iter
self.n_init = n_init
self.a = a
super().__init__(hid_num, a)
def __generate_hidden_weight(self, n_feature):
n = self.hid_num // self.n_clusters
centers = self.kmeans.cluster_centers_
centers = np.repeat(centers, n, axis=0)
diff = self.hid_num - centers.shape[0]
inds = np.random.randint(centers.shape[0], size=diff)
centers = np.append(centers, centers[inds], axis=0)
assert(centers.shape == (self.hid_num, n_feature))
w = np.random.uniform(-self.radius, self.radius, (self.hid_num, n_feature))
return w + centers
def fit(self, X, y):
"""
learning
Args:
X [[float]]: feature vectors of learnig data
y [float] : labels of leanig data
"""
# number of class, number of output neuron
self.out_num = max(y)
# add bias to feature vectors
X = self._add_bias(X)
# kmeans fitting
self.kmeans = KMeans(
init='k-means++', n_clusters=self.n_clusters, max_iter=self.km_iter, n_init=self.n_init)
self.kmeans.fit(X)
if self.out_num != 1:
y = np.array([self._ltov(self.out_num, _y) for _y in y])
# generate weights between input layer and hidden layer
self.W = self.__generate_hidden_weight(X.shape[1])
# find inverse weight matrix
_H = np.linalg.pinv(self._sigmoid(np.dot(self.W, X.T)))
self.beta = np.dot(_H.T, y)
return self
def main():
from sklearn.preprocessing import normalize
from sklearn.datasets import fetch_mldata
from sklearn.model_selection import train_test_split
db_name = 'australian'
data_set = fetch_mldata(db_name)
data_set.data = normalize(data_set.data)
X_train, X_test, y_train, y_test = train_test_split(
data_set.data, data_set.target, test_size=0.4)
elm = ELM(hid_num=10).fit(X_train, y_train)
celm = ClusteringBasedELM(hid_num=10).fit(X_train, y_train)
print("ELM Acc. %0.3f " % elm.score(X_test, y_test))
print("CBELM Acc. %0.3f " % celm.score(X_test, y_test))
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment