Skip to content

Instantly share code, notes, and snippets.

@PtrMan
Last active February 19, 2021 14:17
Show Gist options
  • Save PtrMan/6107380 to your computer and use it in GitHub Desktop.
Save PtrMan/6107380 to your computer and use it in GitHub Desktop.
Python implementation of Art1
# code is translated from orginal implementation from
# http://77.93.202.96/~xhudik/art/
from random import randint
class Art1(object):
def _countOnes(self, Instance):
Count = 0
for Element in Instance:
if Element == 1.0:
Count += 1
return Count
def _countCommonOnes(self, Instance1, Instance2):
Count = 0
assert len(Instance1) == len(Instance2)
for i in range(len(Instance1)):
if Instance1[i] == 1.0 and Instance2[i] == 1.0:
Count += 1
return Count
"""
Add a instance (Ek) to a particular cluster (it will make the prototype more similar)
The prototype will be more similar with an instance.
"""
def _addInstanceBin(self, Instance, PrototypeI):
# '1' which are not in a Instance[iinst] (but they are in Prototype)
# are nullifying in the Prototype
for i in range(len(Instance)):
if Instance[i] == 0 and self.Prototypes[PrototypeI][i] == 1:
self.Prototypes[PrototypeI][i] = 0
"""
It returns the prototype with highest score (which was not used yet)
The score is counted for a current instance (Ek)
If it is returned empty prototype -- was not possible (for any reason) to find it.
\param Instance Ek
\param Prototypes list of prototypes
\param Used list of already tested prototypes
\param Beta ?
"""
def _bestPrototypeBin(self, Instance, Prototypes, Used, Beta):
# prototypes with the same score
sameScores = []
# if the number of already used prototypes and the number of
# prototypes are the same return empty prototype. (no best prototype.)
if len(Used) == len(Prototypes):
return []
Scores = [-1.0] * len(Prototypes)
# set score for every prototype
# TODO< rewrite i to pythonic code >
for i in range(len(Prototypes)):
# search if prototype is not among already used prototypes
UsedBool = False
for j in range(len(Used)):
if Prototypes[i] == Used[j]:
UsedBool = True
break
if UsedBool:
continue
Scores[i] = float(self._countCommonOnes(Prototypes[i], Instance)) / float(Beta + self._countOnes(Prototypes[i]))
# find prototype with highest score
Higher = -1.0
# TODO< rewrite to pythonic way >
for i in range(len(Prototypes)):
if Scores[i] == Higher:
sameScores.append(Prototypes[i])
else:
if Scores[i] > Higher:
# erase the old list
sameScores = []
sameScores.append(Prototypes[i])
Higher = Scores[i]
# the result is an empty prototype
if len(sameScores) == 0:
return []
# the result is the only one possible best prototype
if len(sameScores) == 1:
return sameScores[0]
# if there is more best prototypes with the same score -- random choosing
RandomIndex = randint(0, len(sameScores)-1)
return sameScores[RandomIndex]
# returns -1 if it was not found
def _instanceInSequence(self, PrototypesSequence, InstanceI):
PrototypeNumber = -1
for i in range(len(PrototypesSequence)):
# every prototype is compound of different number of instances
for j in range(len(PrototypesSequence[i])):
if PrototypesSequence[i][j] == InstanceI:
PrototypeNumber = i
break
if PrototypeNumber != -1:
break
return PrototypeNumber
"""
Removing an instance(Ek) from the particular prototype.
Remove an instance with index 'iinst' in 'sample' from prototype
with index 'iprot' in 'prot'. But also remove particular index
from prototype sequence.
"""
# clean
def _removeInstance(self, Sample, InstanceI, PrototypeI):
# find and erase in the prototype sequence the instance which should be deleted
for i in range(len(self.PrototypesSequence[PrototypeI])):
if self.PrototypesSequence[PrototypeI][i] == InstanceI:
del self.PrototypesSequence[PrototypeI][i]
break
# if the particular prototype is empty now - delete whole prototype
# delete also line (prototype) in prototype sequence
if len(self.PrototypesSequence[PrototypeI]) == 0:
del self.Prototypes[PrototypeI]
del self.PrototypesSequence[PrototypeI]
else:
# if it is not empty - re-create it from the rest instances
# build prototype but without instance which should be deleted
# at first -- prototype is the first item in the prototype sequence
self.Prototypes[PrototypeI] = Sample[self.PrototypesSequence[PrototypeI][0]]
# it started from 2nd member because the first is already in (1st sample = prototype
# continually add others samples
for i in range(len(self.PrototypesSequence[PrototypeI]))[1:]:
self._addInstanceBin(Sample[self.PrototypesSequence[PrototypeI][i]], PrototypeI)
"""
find a particular instance(example Ek) or prototype in a sequence
and give as a result the index of instance(prototype) in the sequence
item means instance or prototype.
If must_find is set up as true and the example (item) has not been found -- write error
message and stop the program.
It is used by ART 1, ART 2A and ART 2A-C algorithms
"""
def _findItem(self, Samples, Instance, MustFind):
index = -1 # not found
for i in range(len(Samples)):
if Samples[i] == Instance:
index = i
break
if not MustFind:
return index
else:
if index != -1:
return index
else:
"""ERROR "\n\tfind_item(): Error: sample '" Instance "' was not find in sequence.\n"""
pass
"""
It create a cluster by creating of new prototype. And it also create a new sequence in prot_seq
One line in prot_seq = one cluster represented by a prototype
"""
def _createCluster(self, Instance, InstanceI):
# check if the instance is not presented in some other prototype
# if the instance is already included in some prototype - recreate the prototype
# and exclude this instance
prot_number = self._instanceInSequence(self.PrototypesSequence, InstanceI)
if prot_number != -1:
self._removeInstance(Instance, InstanceI, prot_number)
# create a new prototype
self.Prototypes.append(Instance[InstanceI])
# create a new prototype sequence and insert the first index of instance
self.PrototypesSequence.append( [InstanceI] )
# Samples : is the input
def doArt(self, Samples, Parameters):
# list of all prototypes
self.Prototypes = []
# the best set of prototypes in all passes
PrototypesBest = []
# sequences of samples Ek from which prototype has been created
# it is possible to reconstruct a prototype from the sequence
self.PrototypesSequence = []
# the best sequence of prototypes of the whole history
PrototypesSequenceBest = []
# list of prototypes which were used already
Used = []
# this is used for counting how many instances were transformed
# into another prototype (in %)
Fluctuation = 100.0
# the lowest error of the whole history
# it is initialized as some impossible number (higher than 100% can't be), to avoid problems with first iteration
FluctuationBest = 120.0
# how many times it run throughout the samples
Pass = 0
while Pass < Parameters.PassNumber and Fluctuation > Parameters.Error:
Changed = [False] * len(Samples)
for SampleI in range(len(Samples)):
# zeroing 'used' prototypes
Used = []
while True:
# TODO< rename P >
# find the best prototype for current Ek
P = self._bestPrototypeBin(Samples[SampleI], self.Prototypes, Used, Parameters.Beta)
# if there is no best prototype (list of prototypes is empty,
# or number of used and number of prototypes are the same
if len(P) == 0:
# check if the instance is not included already in some other prototype
PrototypeIndex = self._instanceInSequence(self.PrototypesSequence, SampleI)
if PrototypeIndex != -1:
# if so, remove it (recreate prototype--without the instance)
self._removeInstance(Samples, SampleI, PrototypeIndex)
self._createCluster(Samples, SampleI)
Changed[SampleI] = True
break
Used.append(P)
# measure1 = (how many 1 have P and sample[SampleI](Ek) in common) / (beta+how many 1 has P)
# to have 1's in common = have 1's on the same positions
Measure1 = float(self._countCommonOnes(P, Samples[SampleI])) / (Parameters.Beta+float(self._countOnes(P)))
# measure2 = (how many 1 has Ek) / (beta + number of dimensions )
Measure2 = float(self._countOnes(Samples[SampleI])) / (Parameters.Beta + float(len(Samples[SampleI])))
# if P and sample[SampleI] are similar enough
if Measure1 >= Measure2:
# similarity = (how many 1 have Ek and P in common) / (how many 1's has Ek)
# to have 1's in common = have 1's on the same positions
Similarity = 0.0
if self._countOnes(Samples[SampleI]) > 0:
Similarity = float(self._countCommonOnes(P, Samples[SampleI])) / float(self._countOnes(Samples[SampleI]))
# if the similarity is sufficient -- than sample[SampleI] going to be a member of P
if Similarity >= Parameters.Vigilance:
# if the instance is already included in some prototype -- find it
PrototypeIndex = self._instanceInSequence(self.PrototypesSequence, SampleI)
if PrototypeIndex != -1:
# test if founded prototype is not actual one in that case go for another Ek
if self.Prototypes[PrototypeIndex] == P:
break
else:
# re-build prototype - without the sample (Ek)
self._removeInstance(Sample, SampleI, PrototypeIndex)
# index of P(current prototype) in prototypes
PrototypeIndex = self._findItem(self.Prototypes, P, True)
# add instance to the current prototype
self._addInstanceBin(Samples[SampleI], PrototypeIndex)
self.PrototypesSequence[PrototypeIndex].append(SampleI)
Changed[SampleI] = True
break
else:
# try other best P
continue
else:
# if prototype is not enough similar to instance(sample[SampleI]) then create a new prototype
# check if the instance is not already in some other prototype
PrototypeIndex = self._instanceInSequence(self.PrototypesSequence, SampleI)
if PrototypeIndex != -1:
# if so, remove it (recreate prototype--without the instance)
self._removeInstance(Samples, SampleI, PrototypeIndex)
self._createCluster(Samples, SampleI)
Changed[SampleI] = True
break
# WHILE
if len(self.Prototypes) != len(Samples):
break
# count statistics for this pass
number_changed=0;
for ChangedItem in Changed:
if ChangedItem:
number_changed += 1
Fluctuation = (float(number_changed)/len(Samples))*100.0
Pass += 1
print("Pass: " + str(Pass) + ", Fluctuation: " + str(Fluctuation) + "%" + ", clusters: " + str(len(self.Prototypes)))
# test if this iteration has not lower error
if Fluctuation < FluctuationBest:
PrototypesBest = self.Prototypes
PrototypesSequenceBest = self.PrototypesSequence
FluctuationBest = Fluctuation
# TODO
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment