PtrMan/Art1.py

## Art1.py
# code is translated from orginal implementation from
# http://77.93.202.96/~xhudik/art/

from random import randint

class Art1(object):
   def _countOnes(self, Instance):
      Count = 0

      for Element in Instance:
         if Element == 1.0:
            Count += 1

      return Count

   def _countCommonOnes(self, Instance1, Instance2):
      Count = 0

      assert len(Instance1) == len(Instance2)

      for i in range(len(Instance1)):
         if Instance1[i] == 1.0 and Instance2[i] == 1.0:
            Count += 1

      return Count

   """
   Add a instance (Ek) to a particular cluster (it will make the prototype more similar)
   The prototype will be more similar with an instance.
   """
   def _addInstanceBin(self, Instance, PrototypeI):

      # '1' which are not in a Instance[iinst] (but they are in Prototype)
      # are nullifying in the Prototype
      for i in range(len(Instance)):
         if Instance[i] == 0 and self.Prototypes[PrototypeI][i] == 1:
            self.Prototypes[PrototypeI][i] = 0

   """
   It returns the prototype with highest score (which was not used yet)
   The score is counted for a current instance (Ek)
   If it is returned empty prototype -- was not possible (for any reason) to find it.
   \param Instance Ek
   \param Prototypes list of prototypes
   \param Used list of already tested prototypes
   \param Beta ?
   """
   def _bestPrototypeBin(self, Instance, Prototypes, Used, Beta):
      # prototypes with the same score
      sameScores = []

      # if the number of already used prototypes and the number of
      # prototypes are the same return empty prototype. (no best prototype.)
      if len(Used) == len(Prototypes):
         return []

      Scores = [-1.0] * len(Prototypes)

      # set score for every prototype

      # TODO< rewrite i to pythonic code >
      for i in range(len(Prototypes)):
         # search if prototype is not among already used prototypes
         UsedBool = False

         for j in range(len(Used)):
            if Prototypes[i] == Used[j]:
               UsedBool = True
               break

         if UsedBool:
            continue

         Scores[i] = float(self._countCommonOnes(Prototypes[i], Instance)) / float(Beta + self._countOnes(Prototypes[i]))

      # find prototype with highest score
      Higher = -1.0

      # TODO< rewrite to pythonic way >
      for i in range(len(Prototypes)):
         if Scores[i] == Higher:
            sameScores.append(Prototypes[i])
         else:
            if Scores[i] > Higher:
               # erase the old list
               sameScores = []
               sameScores.append(Prototypes[i])
               Higher = Scores[i]

      # the result is an empty prototype
      if len(sameScores) == 0:
         return []

      # the result is the only one possible best prototype
      if len(sameScores) == 1:
         return sameScores[0]

      # if there is more best prototypes with the same score -- random choosing
      RandomIndex = randint(0, len(sameScores)-1)
      return sameScores[RandomIndex]

   # returns -1 if it was not found
   def _instanceInSequence(self, PrototypesSequence, InstanceI):
      PrototypeNumber = -1

      for i in range(len(PrototypesSequence)):
         # every prototype is compound of different number of instances
         for j in range(len(PrototypesSequence[i])):
            if PrototypesSequence[i][j] == InstanceI:
               PrototypeNumber = i
               break

         if PrototypeNumber != -1:
            break

      return PrototypeNumber

   """
   Removing an instance(Ek) from the particular prototype.
   Remove an instance with index 'iinst' in 'sample' from prototype
   with index 'iprot' in 'prot'. But also remove particular index
   from prototype sequence.
   """
   # clean
   def _removeInstance(self, Sample, InstanceI, PrototypeI):
      # find and erase in the prototype sequence the instance which should be deleted
      for i in range(len(self.PrototypesSequence[PrototypeI])):
         if self.PrototypesSequence[PrototypeI][i] == InstanceI:
            del self.PrototypesSequence[PrototypeI][i]
            break

      # if the particular prototype is empty now - delete whole prototype
      # delete also line (prototype) in prototype sequence
      if len(self.PrototypesSequence[PrototypeI]) == 0:
         del self.Prototypes[PrototypeI]
         del self.PrototypesSequence[PrototypeI]
      else:
         # if it is not empty - re-create it from the rest instances

         # build prototype but without instance which should be deleted
         # at first -- prototype is the first item in the prototype sequence
         self.Prototypes[PrototypeI] = Sample[self.PrototypesSequence[PrototypeI][0]]
         # it started from 2nd member because the first is already in (1st sample = prototype
         # continually add others samples
         for i in range(len(self.PrototypesSequence[PrototypeI]))[1:]:
            self._addInstanceBin(Sample[self.PrototypesSequence[PrototypeI][i]], PrototypeI)


   """
   find a particular instance(example Ek) or prototype in a sequence
   and give as a result the index of instance(prototype) in the sequence
   item means instance or prototype.
   If must_find is set up as true and the example (item) has not been found -- write error
   message and stop the program.
   It is used by ART 1, ART 2A and ART 2A-C algorithms
   """
   def _findItem(self, Samples, Instance, MustFind):
      index = -1 # not found

      for i in range(len(Samples)):
         if Samples[i] == Instance:
            index = i
            break

      if not MustFind:
         return index
      else:
         if index != -1:
            return index
         else:
            """ERROR  "\n\tfind_item(): Error: sample '"  Instance  "' was not find in sequence.\n"""
            pass


   """
   It create a cluster by creating of new prototype. And it also create a new sequence in prot_seq
   One line in prot_seq = one cluster represented by a prototype
   """
   def _createCluster(self, Instance, InstanceI):
      # check if the instance is not presented in some other prototype
      # if the instance is already included in some prototype - recreate the prototype
      # and exclude this instance
      prot_number = self._instanceInSequence(self.PrototypesSequence, InstanceI)

      if prot_number != -1:
         self._removeInstance(Instance, InstanceI, prot_number)

      # create a new prototype
      self.Prototypes.append(Instance[InstanceI])
      # create a new prototype sequence and insert the first index of instance
      self.PrototypesSequence.append( [InstanceI] )

   # Samples : is the input
   def doArt(self, Samples, Parameters):
      # list of all prototypes
      self.Prototypes = []

      # the best set of prototypes in all passes
      PrototypesBest = []

      # sequences of samples Ek from which prototype has been created
      # it is possible to reconstruct a prototype from the sequence
      self.PrototypesSequence = []

      # the best sequence of prototypes of the whole history
      PrototypesSequenceBest = []

      # list of prototypes which were used already
      Used = []

      # this is used for counting how many instances were transformed
      # into another prototype (in %)
      Fluctuation = 100.0

      # the lowest error of the whole history
      # it is initialized as some impossible number (higher than 100% can't be), to avoid problems with first iteration
      FluctuationBest = 120.0

      # how many times it run throughout the samples
      Pass = 0

      while Pass < Parameters.PassNumber and Fluctuation > Parameters.Error:
         Changed = [False] * len(Samples)

         for SampleI in range(len(Samples)):
            # zeroing 'used' prototypes
            Used = []

            while True:
               # TODO< rename P >

               # find the best prototype for current Ek
               P = self._bestPrototypeBin(Samples[SampleI], self.Prototypes, Used, Parameters.Beta)

               # if there is no best prototype (list of prototypes is empty,
               # or number of used and number of prototypes are the same
               if len(P) == 0:
                  # check if the instance is not included already in some other prototype
                  PrototypeIndex = self._instanceInSequence(self.PrototypesSequence, SampleI)
                  if PrototypeIndex != -1:
                     # if so, remove it (recreate prototype--without the instance)
                     self._removeInstance(Samples, SampleI, PrototypeIndex)


                  self._createCluster(Samples, SampleI)
                  Changed[SampleI] = True
                  break

               Used.append(P)

               # measure1 = (how many 1 have P and sample[SampleI](Ek) in common) / (beta+how many 1 has P)
               # to have 1's in common = have 1's on the same positions
               Measure1 = float(self._countCommonOnes(P, Samples[SampleI])) / (Parameters.Beta+float(self._countOnes(P)))

               # measure2 = (how many 1 has Ek) / (beta + number of dimensions )
               Measure2 = float(self._countOnes(Samples[SampleI])) / (Parameters.Beta + float(len(Samples[SampleI])))

               # if P and sample[SampleI] are similar enough
               if Measure1 >= Measure2:
                  # similarity = (how many 1 have Ek and P in common) / (how many 1's has Ek)
                  # to have 1's in common = have 1's on the same positions
                  Similarity = 0.0

                  if self._countOnes(Samples[SampleI]) > 0:
                     Similarity = float(self._countCommonOnes(P, Samples[SampleI])) / float(self._countOnes(Samples[SampleI]))

                  # if the similarity is sufficient  -- than sample[SampleI] going to be a member of P
                  if Similarity >= Parameters.Vigilance:
                     # if the instance is already included in some  prototype -- find it

                     PrototypeIndex = self._instanceInSequence(self.PrototypesSequence, SampleI)
                     if PrototypeIndex != -1:
                        # test if founded prototype is not actual one in that case go for another Ek
                        if self.Prototypes[PrototypeIndex] == P:
                           break
                        else:
                           # re-build prototype - without the sample (Ek)
                           self._removeInstance(Sample, SampleI, PrototypeIndex)

                     # index of P(current prototype) in prototypes
                     PrototypeIndex = self._findItem(self.Prototypes, P, True)

                     # add instance to the current prototype
                     self._addInstanceBin(Samples[SampleI], PrototypeIndex)
                     self.PrototypesSequence[PrototypeIndex].append(SampleI)
                     Changed[SampleI] = True
                     break
                  else:
                     # try other best P
                     continue
               else:
                  # if prototype is not enough similar to instance(sample[SampleI]) then create a new prototype

                  # check if the instance is not already in some other prototype
                  PrototypeIndex = self._instanceInSequence(self.PrototypesSequence, SampleI)
                  if PrototypeIndex != -1:
                     # if so, remove it (recreate prototype--without the instance)
                     self._removeInstance(Samples, SampleI, PrototypeIndex)

                  self._createCluster(Samples, SampleI)
                  Changed[SampleI] = True
                  break

               # WHILE
               if len(self.Prototypes) != len(Samples):
                  break

         # count statistics for this pass
         number_changed=0;
         for ChangedItem in Changed:
            if ChangedItem:
               number_changed += 1

         Fluctuation = (float(number_changed)/len(Samples))*100.0
         Pass += 1
         print("Pass: " + str(Pass) + ", Fluctuation: " + str(Fluctuation) + "%" + ", clusters: " + str(len(self.Prototypes)))

         # test if this iteration has not lower error
         if Fluctuation < FluctuationBest:
            PrototypesBest = self.Prototypes
            PrototypesSequenceBest = self.PrototypesSequence
            FluctuationBest = Fluctuation

      # TODO
	# code is translated from orginal implementation from
	# http://77.93.202.96/~xhudik/art/

	from random import randint

	class Art1(object):
	def _countOnes(self, Instance):
	Count = 0

	for Element in Instance:
	if Element == 1.0:
	Count += 1

	return Count

	def _countCommonOnes(self, Instance1, Instance2):
	Count = 0

	assert len(Instance1) == len(Instance2)

	for i in range(len(Instance1)):
	if Instance1[i] == 1.0 and Instance2[i] == 1.0:
	Count += 1

	return Count

	"""
	Add a instance (Ek) to a particular cluster (it will make the prototype more similar)
	The prototype will be more similar with an instance.
	"""
	def _addInstanceBin(self, Instance, PrototypeI):

	# '1' which are not in a Instance[iinst] (but they are in Prototype)
	# are nullifying in the Prototype
	for i in range(len(Instance)):
	if Instance[i] == 0 and self.Prototypes[PrototypeI][i] == 1:
	self.Prototypes[PrototypeI][i] = 0

	"""
	It returns the prototype with highest score (which was not used yet)
	The score is counted for a current instance (Ek)
	If it is returned empty prototype -- was not possible (for any reason) to find it.
	\param Instance Ek
	\param Prototypes list of prototypes
	\param Used list of already tested prototypes
	\param Beta ?
	"""
	def _bestPrototypeBin(self, Instance, Prototypes, Used, Beta):
	# prototypes with the same score
	sameScores = []

	# if the number of already used prototypes and the number of
	# prototypes are the same return empty prototype. (no best prototype.)
	if len(Used) == len(Prototypes):
	return []

	Scores = [-1.0] * len(Prototypes)

	# set score for every prototype

	# TODO< rewrite i to pythonic code >
	for i in range(len(Prototypes)):
	# search if prototype is not among already used prototypes
	UsedBool = False

	for j in range(len(Used)):
	if Prototypes[i] == Used[j]:
	UsedBool = True
	break

	if UsedBool:
	continue

	Scores[i] = float(self._countCommonOnes(Prototypes[i], Instance)) / float(Beta + self._countOnes(Prototypes[i]))

	# find prototype with highest score
	Higher = -1.0

	# TODO< rewrite to pythonic way >
	for i in range(len(Prototypes)):
	if Scores[i] == Higher:
	sameScores.append(Prototypes[i])
	else:
	if Scores[i] > Higher:
	# erase the old list
	sameScores = []
	sameScores.append(Prototypes[i])
	Higher = Scores[i]

	# the result is an empty prototype
	if len(sameScores) == 0:
	return []

	# the result is the only one possible best prototype
	if len(sameScores) == 1:
	return sameScores[0]

	# if there is more best prototypes with the same score -- random choosing
	RandomIndex = randint(0, len(sameScores)-1)
	return sameScores[RandomIndex]

	# returns -1 if it was not found
	def _instanceInSequence(self, PrototypesSequence, InstanceI):
	PrototypeNumber = -1

	for i in range(len(PrototypesSequence)):
	# every prototype is compound of different number of instances
	for j in range(len(PrototypesSequence[i])):
	if PrototypesSequence[i][j] == InstanceI:
	PrototypeNumber = i
	break

	if PrototypeNumber != -1:
	break

	return PrototypeNumber

	"""
	Removing an instance(Ek) from the particular prototype.
	Remove an instance with index 'iinst' in 'sample' from prototype
	with index 'iprot' in 'prot'. But also remove particular index
	from prototype sequence.
	"""
	# clean
	def _removeInstance(self, Sample, InstanceI, PrototypeI):
	# find and erase in the prototype sequence the instance which should be deleted
	for i in range(len(self.PrototypesSequence[PrototypeI])):
	if self.PrototypesSequence[PrototypeI][i] == InstanceI:
	del self.PrototypesSequence[PrototypeI][i]
	break

	# if the particular prototype is empty now - delete whole prototype
	# delete also line (prototype) in prototype sequence
	if len(self.PrototypesSequence[PrototypeI]) == 0:
	del self.Prototypes[PrototypeI]
	del self.PrototypesSequence[PrototypeI]
	else:
	# if it is not empty - re-create it from the rest instances

	# build prototype but without instance which should be deleted
	# at first -- prototype is the first item in the prototype sequence
	self.Prototypes[PrototypeI] = Sample[self.PrototypesSequence[PrototypeI][0]]
	# it started from 2nd member because the first is already in (1st sample = prototype
	# continually add others samples
	for i in range(len(self.PrototypesSequence[PrototypeI]))[1:]:
	self._addInstanceBin(Sample[self.PrototypesSequence[PrototypeI][i]], PrototypeI)


	"""
	find a particular instance(example Ek) or prototype in a sequence
	and give as a result the index of instance(prototype) in the sequence
	item means instance or prototype.
	If must_find is set up as true and the example (item) has not been found -- write error
	message and stop the program.
	It is used by ART 1, ART 2A and ART 2A-C algorithms
	"""
	def _findItem(self, Samples, Instance, MustFind):
	index = -1 # not found

	for i in range(len(Samples)):
	if Samples[i] == Instance:
	index = i
	break

	if not MustFind:
	return index
	else:
	if index != -1:
	return index
	else:
	"""ERROR "\n\tfind_item(): Error: sample '" Instance "' was not find in sequence.\n"""
	pass


	"""
	It create a cluster by creating of new prototype. And it also create a new sequence in prot_seq
	One line in prot_seq = one cluster represented by a prototype
	"""
	def _createCluster(self, Instance, InstanceI):
	# check if the instance is not presented in some other prototype
	# if the instance is already included in some prototype - recreate the prototype
	# and exclude this instance
	prot_number = self._instanceInSequence(self.PrototypesSequence, InstanceI)

	if prot_number != -1:
	self._removeInstance(Instance, InstanceI, prot_number)

	# create a new prototype
	self.Prototypes.append(Instance[InstanceI])
	# create a new prototype sequence and insert the first index of instance
	self.PrototypesSequence.append( [InstanceI] )

	# Samples : is the input
	def doArt(self, Samples, Parameters):
	# list of all prototypes
	self.Prototypes = []

	# the best set of prototypes in all passes
	PrototypesBest = []

	# sequences of samples Ek from which prototype has been created
	# it is possible to reconstruct a prototype from the sequence
	self.PrototypesSequence = []

	# the best sequence of prototypes of the whole history
	PrototypesSequenceBest = []

	# list of prototypes which were used already
	Used = []

	# this is used for counting how many instances were transformed
	# into another prototype (in %)
	Fluctuation = 100.0

	# the lowest error of the whole history
	# it is initialized as some impossible number (higher than 100% can't be), to avoid problems with first iteration
	FluctuationBest = 120.0

	# how many times it run throughout the samples
	Pass = 0

	while Pass < Parameters.PassNumber and Fluctuation > Parameters.Error:
	Changed = [False] * len(Samples)

	for SampleI in range(len(Samples)):
	# zeroing 'used' prototypes
	Used = []

	while True:
	# TODO< rename P >

	# find the best prototype for current Ek
	P = self._bestPrototypeBin(Samples[SampleI], self.Prototypes, Used, Parameters.Beta)

	# if there is no best prototype (list of prototypes is empty,
	# or number of used and number of prototypes are the same
	if len(P) == 0:
	# check if the instance is not included already in some other prototype
	PrototypeIndex = self._instanceInSequence(self.PrototypesSequence, SampleI)
	if PrototypeIndex != -1:
	# if so, remove it (recreate prototype--without the instance)
	self._removeInstance(Samples, SampleI, PrototypeIndex)


	self._createCluster(Samples, SampleI)
	Changed[SampleI] = True
	break

	Used.append(P)

	# measure1 = (how many 1 have P and sample[SampleI](Ek) in common) / (beta+how many 1 has P)
	# to have 1's in common = have 1's on the same positions
	Measure1 = float(self._countCommonOnes(P, Samples[SampleI])) / (Parameters.Beta+float(self._countOnes(P)))

	# measure2 = (how many 1 has Ek) / (beta + number of dimensions )
	Measure2 = float(self._countOnes(Samples[SampleI])) / (Parameters.Beta + float(len(Samples[SampleI])))

	# if P and sample[SampleI] are similar enough
	if Measure1 >= Measure2:
	# similarity = (how many 1 have Ek and P in common) / (how many 1's has Ek)
	# to have 1's in common = have 1's on the same positions
	Similarity = 0.0

	if self._countOnes(Samples[SampleI]) > 0:
	Similarity = float(self._countCommonOnes(P, Samples[SampleI])) / float(self._countOnes(Samples[SampleI]))

	# if the similarity is sufficient -- than sample[SampleI] going to be a member of P
	if Similarity >= Parameters.Vigilance:
	# if the instance is already included in some prototype -- find it

	PrototypeIndex = self._instanceInSequence(self.PrototypesSequence, SampleI)
	if PrototypeIndex != -1:
	# test if founded prototype is not actual one in that case go for another Ek
	if self.Prototypes[PrototypeIndex] == P:
	break
	else:
	# re-build prototype - without the sample (Ek)
	self._removeInstance(Sample, SampleI, PrototypeIndex)

	# index of P(current prototype) in prototypes
	PrototypeIndex = self._findItem(self.Prototypes, P, True)

	# add instance to the current prototype
	self._addInstanceBin(Samples[SampleI], PrototypeIndex)
	self.PrototypesSequence[PrototypeIndex].append(SampleI)
	Changed[SampleI] = True
	break
	else:
	# try other best P
	continue
	else:
	# if prototype is not enough similar to instance(sample[SampleI]) then create a new prototype

	# check if the instance is not already in some other prototype
	PrototypeIndex = self._instanceInSequence(self.PrototypesSequence, SampleI)
	if PrototypeIndex != -1:
	# if so, remove it (recreate prototype--without the instance)
	self._removeInstance(Samples, SampleI, PrototypeIndex)

	self._createCluster(Samples, SampleI)
	Changed[SampleI] = True
	break

	# WHILE
	if len(self.Prototypes) != len(Samples):
	break

	# count statistics for this pass
	number_changed=0;
	for ChangedItem in Changed:
	if ChangedItem:
	number_changed += 1

	Fluctuation = (float(number_changed)/len(Samples))*100.0
	Pass += 1
	print("Pass: " + str(Pass) + ", Fluctuation: " + str(Fluctuation) + "%" + ", clusters: " + str(len(self.Prototypes)))

	# test if this iteration has not lower error
	if Fluctuation < FluctuationBest:
	PrototypesBest = self.Prototypes
	PrototypesSequenceBest = self.PrototypesSequence
	FluctuationBest = Fluctuation

	# TODO