Skip to content

Instantly share code, notes, and snippets.

@opatut
Created June 5, 2014 09:29
Show Gist options
  • Save opatut/2f56fc1d61dc96c31105 to your computer and use it in GitHub Desktop.
Save opatut/2f56fc1d61dc96c31105 to your computer and use it in GitHub Desktop.
# Data mining 2014, practical tutorial 5, example for
# *** Self-Organising Map ***
# Copyright agreement:
# Do whatever you want.
import numpy, scipy, sys, subprocess
iterations = int(sys.argv[2])
# set parameters
dim_a, dim_b = 44, 1 # map sizes
sigma = numpy.max([float(dim_a),float(dim_b)]) # initial map interaction range
epsilon = 0.03 # learning step size
# load data
datafile = sys.argv[1] if len(sys.argv) > 1 else "triangle.dat"
print "Using file", datafile
data = numpy.loadtxt(datafile,delimiter=' ')
dim_in = numpy.shape(data)[1]
num_data = numpy.shape(data)[0]
# create architecture
size_map = dim_a * dim_b
W = numpy.zeros((size_map, dim_in))
map_net = numpy.zeros(size_map)
map_act = numpy.zeros(size_map)
# initialize weights
for j in range(dim_in):
for k in range(size_map):
W[k,j] += numpy.random.uniform(numpy.min(data[:,j]),numpy.max(data[:,j]))
# for small data sets, repeat all-over again
for batch in range(iterations):
# iterate (one data point per iteration)
for iter in range(num_data):
# current data point
x = data[iter]
# neurons' inner activation
for k in range(size_map):
map_net[k] = numpy.dot(W[k]-x,W[k]-x)
# find winner
winner = scipy.argmin(map_net)
# activation with map interaction function
for k in range(size_map):
dist = numpy.sqrt((k/dim_b-winner/dim_b)**2 + (k%dim_b-winner%dim_b)**2)
map_act[k] = numpy.exp(-0.5 * dist**2 / sigma**2)
# learning step
for k in range(size_map):
W[k] += epsilon * map_act[k] * (x-W[k])
# reduce map interaction range
sigma *= 0.999
# occasionally give some feedback
if iter % 1000 == 0:
print "sigma=", sigma, "winner=", winner, "x=", x
# SOM finished - now visualise
# write weights as 2D- or 3D-point coordinates for display with gnuplot
f = open("out.dat", 'wb')
for i in range(dim_a):
for k in range(dim_b):
for j in range(dim_in):
val_ch = str(W[i*dim_b+k,j]) + " "
f.write(val_ch)
f.write("\n")
f.write("\n")
f.close()
# write gnuplot commands to draw grid
f = open("out.gnu", 'wb')
for i in range(dim_a):
for k in range(dim_b-1):
val_string = "set arrow from "
for j in range(dim_in):
val_string += str(W[i*dim_b+k,j])
if j < dim_in - 1:
val_string += ", "
val_string += " to "
for j in range(dim_in):
val_string += str(W[i*dim_b+k+1,j])
if j < dim_in - 1:
val_string += ", "
val_string += " nohead\n"
f.write(val_string)
for k in range(dim_b):
for i in range(dim_a-1):
val_string = "set arrow from "
for j in range(dim_in):
val_string += str(W[i*dim_b+k,j])
if j < dim_in - 1:
val_string += ", "
val_string += " to "
for j in range(dim_in):
val_string += str(W[(i+1)*dim_b+k,j])
if j < dim_in - 1:
val_string += ", "
val_string += " nohead\n"
f.write(val_string)
val_string = "set arrow from "
#f.write("replot\n")
f.close()
if "--plot" in sys.argv:
subprocess.call(["gnuplot",
"-e", "set term pngcairo",
"-e", 'set output "out/iteration-%04d.png"'%batch,
"-e", 'load "out.gnu"',
"-e", 'plot "%s" with dots, "out.dat" with points'%datafile
])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment