jpjacobs/som.ijs

## som.ijs
NB. Self-Organising map
require 'math/lapack/geev'
require 'tables/csv'
require 'viewmat'
NB. closeall_jviewmat_ ''
NB. parameters:
N         =: 30     NB. side of neuron map. (N -> (*:N) neurons)
ntrain    =: 25     NB. number of training samples per class
niter     =: 10000  NB. number of iterations
'a0 an'   =: 25 1 NB. Starting and ending learning factor
sizenei   =: 7      NB. side of the gaussian neighborhood
neighmode =: 'cap'  NB. Neighbor mode: 'tor' for toroidal wrapping
                    NB.   or 'cap' for capping at the edges
NB. =========================================================

NB. load data from http://archive.ics.uci.edu/ml/datasets/Iris
iris =: }: readcsv jpath '~temp/FisherIris.csv'
NB. split data and labels in training and mapping data
X =: ".&> 4 {."1 iris     NB. 150x4 data
X =: (% +/&.:*:)"1 X      NB. Normalize data.
classes =: ~. {:"1 iris   NB. 3 classes
Y =: classes i. {:"1 iris NB. 150 labels
NB. Take 25 samples of each class.
TrainId =: , (({~ntrain ? #)/. i.@#) Y
Xt =: X #~    TrainId e.~ i.#X
Xm =: X #~ -. TrainId e.~ i.#X
Yt =: Y #~    TrainId e.~ i.#Y
Ym =: Y #~ -. TrainId e.~ i.#Y
NB. =========================================================

NB. Utility functions
center =: -"1 +/%# 			NB. subtract column means
mp     =: +/ .*			NB. matrix product
cov    =: (mp"2 1)~&.|: ] % <:^:(1&<)@#	NB. Covariance (note, does NOT center)
NB. Principle component analysis
NB. x PCA y : x: number of principle components to return
NB.           y: data
pca =: 4 : 0
require 'math/lapack'
require 'math/lapack/geev'
c=. cov center y
'vec val' =. 2b110 geev_jlapack_ c
NB. column means
cm  =. (+/%#) y
vec =. x {."1 vec
val =. x {. val
vec;(vec +/ .*~ y -"1 cm);((+/\%+/)val);cm
)
NB. Squared euclidean distance
sqeuclidean =: +/@:*:@(-"1)
NB. Gaussian kernel
gauss =: +:@*:@[ (^@-@(%~ +/&:*:@]) % o.@[) (,:|:)@:(] #"0 i:@-:@<:@])
NB. Normalizes a filter, such that the sum of all elements equals 1.
normalize =: (% +/@,)

NB. Neighbors (and self) of linear index y in array of size x
neighbors  =: ((, normalize gauss sizenei) ,. (,/,."0 1/~ i:<.-:sizenei) +"1 #:)
NB. Weights and neighbors for eligible points (eg, in the map).
wncap =: [ (] #~ [ ( >"1 *./"1@:*. 0 0 <:"1 ]) }."1@]) neighbors NB. cap neighbors at edges
wntor =: [ ({.@] , [ | }.@])"1 neighbors                        NB. toroidal
wn =: ('wn',neighmode)~ NB. fix chosen mode

NB. Linear alpha between starting and stopping value
linalpha =: 2 : '(m + ] * (n - m) % [)'
NB. x alpha y : give value of alpha for x iterations in total, for iteration y
alpha =: a0 linalpha an

NB. Training verb:
NB. niterations;[Neurons] train data
NB. if Neurons are not given, they will be initialized
train =: 4 :0
'nit neur' =. 2$<^:(0=L.)y
t=.? x $ #y       NB. randomize input vectors
if. neur -: nit do. NB. initial neurons not given, initialize below
  v =.0{:: 2 pca y  NB. initialize weights as per 2 PCA's
  neur =. |: v +/ . * _0.5 +  ?(2 ,N,N)$ 0 NB. make linear combinations of first 2 PC's
  NB. neur =. ?(N, N , D) $0 NB. alternative: use random Neurons to start
end.

for_s. i.x do.
  samp =.(y{~t{~s)
  NB. find closest neighbor
  bmu  =. {. samp (I.@:= <./)@:,@:(sqeuclidean) neur
  NB. find it's neighbors and associated weights
  'wei nei'  =. ({."1 ; <@:(<@}."1)) (}: $neur) wn bmu
  NB. update neighboring neurons to move closer to the selected sample
  neur =. ((+ (x alpha s) * wei * (samp-"1])) nei{neur ) nei} neur
end.
neur
)

NB. Map inputs to neurons
NB. Neurons map Xm;Ym
map =: 4 : 0
'dat lab' =. 2$<^:(0=L.)y
if. dat -: lab do. NB. y had only one element.
  lab =. 0
end.
NB. find closest vector, put label in location.
dists    =. x sqeuclidean"1/ dat
results  =. (>:lab) ((N,N) <"1@([#: {.@I.@:(= <./)@:,@])"2 |: dists)} (N,N)$0
)
NB. make a U-matrix, showing euclidean distances between neighboring neurons
Umat =: |:@:((1 1 0 ,:3 3 4)&((+/%#)@,@(sqeuclidean/~)@:(,"2) ;. _3))
NB. =========================================================
NB. Run the analysis
Neurons =: niter train Xt
viewmat (_2 + N,N) $, Umat Neurons
viewmat ((Xt;Yt) (,. (>:#classes) ,. ])&(Neurons&map) (Xm;Ym)) ; 'test Vs. mapped'
viewmat"2 |: Neurons
	NB. Self-Organising map
	require 'math/lapack/geev'
	require 'tables/csv'
	require 'viewmat'
	NB. closeall_jviewmat_ ''
	NB. parameters:
	N =: 30 NB. side of neuron map. (N -> (*:N) neurons)
	ntrain =: 25 NB. number of training samples per class
	niter =: 10000 NB. number of iterations
	'a0 an' =: 25 1 NB. Starting and ending learning factor
	sizenei =: 7 NB. side of the gaussian neighborhood
	neighmode =: 'cap' NB. Neighbor mode: 'tor' for toroidal wrapping
	NB. or 'cap' for capping at the edges
	NB. =========================================================

	NB. load data from http://archive.ics.uci.edu/ml/datasets/Iris
	iris =: }: readcsv jpath '~temp/FisherIris.csv'
	NB. split data and labels in training and mapping data
	X =: ".&> 4 {."1 iris NB. 150x4 data
	X =: (% +/&.:*:)"1 X NB. Normalize data.
	classes =: ~. {:"1 iris NB. 3 classes
	Y =: classes i. {:"1 iris NB. 150 labels
	NB. Take 25 samples of each class.
	TrainId =: , (({~ntrain ? #)/. i.@#) Y
	Xt =: X #~ TrainId e.~ i.#X
	Xm =: X #~ -. TrainId e.~ i.#X
	Yt =: Y #~ TrainId e.~ i.#Y
	Ym =: Y #~ -. TrainId e.~ i.#Y
	NB. =========================================================

	NB. Utility functions
	center =: -"1 +/%# NB. subtract column means
	mp =: +/ .* NB. matrix product
	cov =: (mp"2 1)~&.\|: ] % <:^:(1&<)@# NB. Covariance (note, does NOT center)
	NB. Principle component analysis
	NB. x PCA y : x: number of principle components to return
	NB. y: data
	pca =: 4 : 0
	require 'math/lapack'
	require 'math/lapack/geev'
	c=. cov center y
	'vec val' =. 2b110 geev_jlapack_ c
	NB. column means
	cm =. (+/%#) y
	vec =. x {."1 vec
	val =. x {. val
	vec;(vec +/ .*~ y -"1 cm);((+/\%+/)val);cm
	)
	NB. Squared euclidean distance
	sqeuclidean =: +/@:*:@(-"1)
	NB. Gaussian kernel
	gauss =: +:@:@[ (^@-@(%~ +/&::@]) % o.@[) (,:\|:)@:(] #"0 i:@-:@<:@])
	NB. Normalizes a filter, such that the sum of all elements equals 1.
	normalize =: (% +/@,)

	NB. Neighbors (and self) of linear index y in array of size x
	neighbors =: ((, normalize gauss sizenei) ,. (,/,."0 1/~ i:<.-:sizenei) +"1 #:)
	NB. Weights and neighbors for eligible points (eg, in the map).
	wncap =: [ (] #~ [ ( >"1 ./"1@:. 0 0 <:"1 ]) }."1@]) neighbors NB. cap neighbors at edges
	wntor =: [ ({.@] , [ \| }.@])"1 neighbors NB. toroidal
	wn =: ('wn',neighmode)~ NB. fix chosen mode

	NB. Linear alpha between starting and stopping value
	linalpha =: 2 : '(m + ] * (n - m) % [)'
	NB. x alpha y : give value of alpha for x iterations in total, for iteration y
	alpha =: a0 linalpha an

	NB. Training verb:
	NB. niterations;[Neurons] train data
	NB. if Neurons are not given, they will be initialized
	train =: 4 :0
	'nit neur' =. 2$<^:(0=L.)y
	t=.? x $ #y NB. randomize input vectors
	if. neur -: nit do. NB. initial neurons not given, initialize below
	v =.0{:: 2 pca y NB. initialize weights as per 2 PCA's
	neur =. \|: v +/ . * _0.5 + ?(2 ,N,N)$ 0 NB. make linear combinations of first 2 PC's
	NB. neur =. ?(N, N , D) $0 NB. alternative: use random Neurons to start
	end.

	for_s. i.x do.
	samp =.(y{~t{~s)
	NB. find closest neighbor
	bmu =. {. samp (I.@:= <./)@:,@:(sqeuclidean) neur
	NB. find it's neighbors and associated weights
	'wei nei' =. ({."1 ; <@:(<@}."1)) (}: $neur) wn bmu
	NB. update neighboring neurons to move closer to the selected sample
	neur =. ((+ (x alpha s) * wei * (samp-"1])) nei{neur ) nei} neur
	end.
	neur
	)

	NB. Map inputs to neurons
	NB. Neurons map Xm;Ym
	map =: 4 : 0
	'dat lab' =. 2$<^:(0=L.)y
	if. dat -: lab do. NB. y had only one element.
	lab =. 0
	end.
	NB. find closest vector, put label in location.
	dists =. x sqeuclidean"1/ dat
	results =. (>:lab) ((N,N) <"1@([#: {.@I.@:(= <./)@:,@])"2 \|: dists)} (N,N)$0
	)
	NB. make a U-matrix, showing euclidean distances between neighboring neurons
	Umat =: \|:@:((1 1 0 ,:3 3 4)&((+/%#)@,@(sqeuclidean/~)@:(,"2) ;. _3))
	NB. =========================================================
	NB. Run the analysis
	Neurons =: niter train Xt
	viewmat (_2 + N,N) $, Umat Neurons
	viewmat ((Xt;Yt) (,. (>:#classes) ,. ])&(Neurons&map) (Xm;Ym)) ; 'test Vs. mapped'
	viewmat"2 \|: Neurons