Last active
February 19, 2021 14:17
-
-
Save PtrMan/6107380 to your computer and use it in GitHub Desktop.
Python implementation of Art1
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# code is translated from orginal implementation from | |
# http://77.93.202.96/~xhudik/art/ | |
from random import randint | |
class Art1(object): | |
def _countOnes(self, Instance): | |
Count = 0 | |
for Element in Instance: | |
if Element == 1.0: | |
Count += 1 | |
return Count | |
def _countCommonOnes(self, Instance1, Instance2): | |
Count = 0 | |
assert len(Instance1) == len(Instance2) | |
for i in range(len(Instance1)): | |
if Instance1[i] == 1.0 and Instance2[i] == 1.0: | |
Count += 1 | |
return Count | |
""" | |
Add a instance (Ek) to a particular cluster (it will make the prototype more similar) | |
The prototype will be more similar with an instance. | |
""" | |
def _addInstanceBin(self, Instance, PrototypeI): | |
# '1' which are not in a Instance[iinst] (but they are in Prototype) | |
# are nullifying in the Prototype | |
for i in range(len(Instance)): | |
if Instance[i] == 0 and self.Prototypes[PrototypeI][i] == 1: | |
self.Prototypes[PrototypeI][i] = 0 | |
""" | |
It returns the prototype with highest score (which was not used yet) | |
The score is counted for a current instance (Ek) | |
If it is returned empty prototype -- was not possible (for any reason) to find it. | |
\param Instance Ek | |
\param Prototypes list of prototypes | |
\param Used list of already tested prototypes | |
\param Beta ? | |
""" | |
def _bestPrototypeBin(self, Instance, Prototypes, Used, Beta): | |
# prototypes with the same score | |
sameScores = [] | |
# if the number of already used prototypes and the number of | |
# prototypes are the same return empty prototype. (no best prototype.) | |
if len(Used) == len(Prototypes): | |
return [] | |
Scores = [-1.0] * len(Prototypes) | |
# set score for every prototype | |
# TODO< rewrite i to pythonic code > | |
for i in range(len(Prototypes)): | |
# search if prototype is not among already used prototypes | |
UsedBool = False | |
for j in range(len(Used)): | |
if Prototypes[i] == Used[j]: | |
UsedBool = True | |
break | |
if UsedBool: | |
continue | |
Scores[i] = float(self._countCommonOnes(Prototypes[i], Instance)) / float(Beta + self._countOnes(Prototypes[i])) | |
# find prototype with highest score | |
Higher = -1.0 | |
# TODO< rewrite to pythonic way > | |
for i in range(len(Prototypes)): | |
if Scores[i] == Higher: | |
sameScores.append(Prototypes[i]) | |
else: | |
if Scores[i] > Higher: | |
# erase the old list | |
sameScores = [] | |
sameScores.append(Prototypes[i]) | |
Higher = Scores[i] | |
# the result is an empty prototype | |
if len(sameScores) == 0: | |
return [] | |
# the result is the only one possible best prototype | |
if len(sameScores) == 1: | |
return sameScores[0] | |
# if there is more best prototypes with the same score -- random choosing | |
RandomIndex = randint(0, len(sameScores)-1) | |
return sameScores[RandomIndex] | |
# returns -1 if it was not found | |
def _instanceInSequence(self, PrototypesSequence, InstanceI): | |
PrototypeNumber = -1 | |
for i in range(len(PrototypesSequence)): | |
# every prototype is compound of different number of instances | |
for j in range(len(PrototypesSequence[i])): | |
if PrototypesSequence[i][j] == InstanceI: | |
PrototypeNumber = i | |
break | |
if PrototypeNumber != -1: | |
break | |
return PrototypeNumber | |
""" | |
Removing an instance(Ek) from the particular prototype. | |
Remove an instance with index 'iinst' in 'sample' from prototype | |
with index 'iprot' in 'prot'. But also remove particular index | |
from prototype sequence. | |
""" | |
# clean | |
def _removeInstance(self, Sample, InstanceI, PrototypeI): | |
# find and erase in the prototype sequence the instance which should be deleted | |
for i in range(len(self.PrototypesSequence[PrototypeI])): | |
if self.PrototypesSequence[PrototypeI][i] == InstanceI: | |
del self.PrototypesSequence[PrototypeI][i] | |
break | |
# if the particular prototype is empty now - delete whole prototype | |
# delete also line (prototype) in prototype sequence | |
if len(self.PrototypesSequence[PrototypeI]) == 0: | |
del self.Prototypes[PrototypeI] | |
del self.PrototypesSequence[PrototypeI] | |
else: | |
# if it is not empty - re-create it from the rest instances | |
# build prototype but without instance which should be deleted | |
# at first -- prototype is the first item in the prototype sequence | |
self.Prototypes[PrototypeI] = Sample[self.PrototypesSequence[PrototypeI][0]] | |
# it started from 2nd member because the first is already in (1st sample = prototype | |
# continually add others samples | |
for i in range(len(self.PrototypesSequence[PrototypeI]))[1:]: | |
self._addInstanceBin(Sample[self.PrototypesSequence[PrototypeI][i]], PrototypeI) | |
""" | |
find a particular instance(example Ek) or prototype in a sequence | |
and give as a result the index of instance(prototype) in the sequence | |
item means instance or prototype. | |
If must_find is set up as true and the example (item) has not been found -- write error | |
message and stop the program. | |
It is used by ART 1, ART 2A and ART 2A-C algorithms | |
""" | |
def _findItem(self, Samples, Instance, MustFind): | |
index = -1 # not found | |
for i in range(len(Samples)): | |
if Samples[i] == Instance: | |
index = i | |
break | |
if not MustFind: | |
return index | |
else: | |
if index != -1: | |
return index | |
else: | |
"""ERROR "\n\tfind_item(): Error: sample '" Instance "' was not find in sequence.\n""" | |
pass | |
""" | |
It create a cluster by creating of new prototype. And it also create a new sequence in prot_seq | |
One line in prot_seq = one cluster represented by a prototype | |
""" | |
def _createCluster(self, Instance, InstanceI): | |
# check if the instance is not presented in some other prototype | |
# if the instance is already included in some prototype - recreate the prototype | |
# and exclude this instance | |
prot_number = self._instanceInSequence(self.PrototypesSequence, InstanceI) | |
if prot_number != -1: | |
self._removeInstance(Instance, InstanceI, prot_number) | |
# create a new prototype | |
self.Prototypes.append(Instance[InstanceI]) | |
# create a new prototype sequence and insert the first index of instance | |
self.PrototypesSequence.append( [InstanceI] ) | |
# Samples : is the input | |
def doArt(self, Samples, Parameters): | |
# list of all prototypes | |
self.Prototypes = [] | |
# the best set of prototypes in all passes | |
PrototypesBest = [] | |
# sequences of samples Ek from which prototype has been created | |
# it is possible to reconstruct a prototype from the sequence | |
self.PrototypesSequence = [] | |
# the best sequence of prototypes of the whole history | |
PrototypesSequenceBest = [] | |
# list of prototypes which were used already | |
Used = [] | |
# this is used for counting how many instances were transformed | |
# into another prototype (in %) | |
Fluctuation = 100.0 | |
# the lowest error of the whole history | |
# it is initialized as some impossible number (higher than 100% can't be), to avoid problems with first iteration | |
FluctuationBest = 120.0 | |
# how many times it run throughout the samples | |
Pass = 0 | |
while Pass < Parameters.PassNumber and Fluctuation > Parameters.Error: | |
Changed = [False] * len(Samples) | |
for SampleI in range(len(Samples)): | |
# zeroing 'used' prototypes | |
Used = [] | |
while True: | |
# TODO< rename P > | |
# find the best prototype for current Ek | |
P = self._bestPrototypeBin(Samples[SampleI], self.Prototypes, Used, Parameters.Beta) | |
# if there is no best prototype (list of prototypes is empty, | |
# or number of used and number of prototypes are the same | |
if len(P) == 0: | |
# check if the instance is not included already in some other prototype | |
PrototypeIndex = self._instanceInSequence(self.PrototypesSequence, SampleI) | |
if PrototypeIndex != -1: | |
# if so, remove it (recreate prototype--without the instance) | |
self._removeInstance(Samples, SampleI, PrototypeIndex) | |
self._createCluster(Samples, SampleI) | |
Changed[SampleI] = True | |
break | |
Used.append(P) | |
# measure1 = (how many 1 have P and sample[SampleI](Ek) in common) / (beta+how many 1 has P) | |
# to have 1's in common = have 1's on the same positions | |
Measure1 = float(self._countCommonOnes(P, Samples[SampleI])) / (Parameters.Beta+float(self._countOnes(P))) | |
# measure2 = (how many 1 has Ek) / (beta + number of dimensions ) | |
Measure2 = float(self._countOnes(Samples[SampleI])) / (Parameters.Beta + float(len(Samples[SampleI]))) | |
# if P and sample[SampleI] are similar enough | |
if Measure1 >= Measure2: | |
# similarity = (how many 1 have Ek and P in common) / (how many 1's has Ek) | |
# to have 1's in common = have 1's on the same positions | |
Similarity = 0.0 | |
if self._countOnes(Samples[SampleI]) > 0: | |
Similarity = float(self._countCommonOnes(P, Samples[SampleI])) / float(self._countOnes(Samples[SampleI])) | |
# if the similarity is sufficient -- than sample[SampleI] going to be a member of P | |
if Similarity >= Parameters.Vigilance: | |
# if the instance is already included in some prototype -- find it | |
PrototypeIndex = self._instanceInSequence(self.PrototypesSequence, SampleI) | |
if PrototypeIndex != -1: | |
# test if founded prototype is not actual one in that case go for another Ek | |
if self.Prototypes[PrototypeIndex] == P: | |
break | |
else: | |
# re-build prototype - without the sample (Ek) | |
self._removeInstance(Sample, SampleI, PrototypeIndex) | |
# index of P(current prototype) in prototypes | |
PrototypeIndex = self._findItem(self.Prototypes, P, True) | |
# add instance to the current prototype | |
self._addInstanceBin(Samples[SampleI], PrototypeIndex) | |
self.PrototypesSequence[PrototypeIndex].append(SampleI) | |
Changed[SampleI] = True | |
break | |
else: | |
# try other best P | |
continue | |
else: | |
# if prototype is not enough similar to instance(sample[SampleI]) then create a new prototype | |
# check if the instance is not already in some other prototype | |
PrototypeIndex = self._instanceInSequence(self.PrototypesSequence, SampleI) | |
if PrototypeIndex != -1: | |
# if so, remove it (recreate prototype--without the instance) | |
self._removeInstance(Samples, SampleI, PrototypeIndex) | |
self._createCluster(Samples, SampleI) | |
Changed[SampleI] = True | |
break | |
# WHILE | |
if len(self.Prototypes) != len(Samples): | |
break | |
# count statistics for this pass | |
number_changed=0; | |
for ChangedItem in Changed: | |
if ChangedItem: | |
number_changed += 1 | |
Fluctuation = (float(number_changed)/len(Samples))*100.0 | |
Pass += 1 | |
print("Pass: " + str(Pass) + ", Fluctuation: " + str(Fluctuation) + "%" + ", clusters: " + str(len(self.Prototypes))) | |
# test if this iteration has not lower error | |
if Fluctuation < FluctuationBest: | |
PrototypesBest = self.Prototypes | |
PrototypesSequenceBest = self.PrototypesSequence | |
FluctuationBest = Fluctuation | |
# TODO |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment