c58/fastpnn.pyx

## fastpnn.pyx
# encoding: utf-8
# cython: profile=False
# cython: wraparound=False
# cython: boundscheck=False
# filename: fastpnn.pyx

from libc.stdlib cimport malloc, realloc, free, rand
cimport cython
from cython.parallel import parallel, prange
from cython.view cimport array as cvarray
cimport libc.math as cmath
import math
import numpy as np
cimport numpy as np
from fastgen cimport BasicGeneticFunction, Population, Chromosome, GeneticAlgorithm, _new_population, _new_chromosome
from scipy.spatial import distance

######################################
#
#   NETWORK
#
cdef class ProbabilityNetwork(object):
    cdef:
        public int input_count
        public int output_count
        double _sigma
        dict _cls_to_idx
        dict _idx_to_cls
        double*** _samples
        int* _samples_count
        int* _samples_alloc

    property sigma:
        def __get__(self):
            return np.array(<double>self._sigma)

        def __set__(self, new_sigma):
            self._sigma = new_sigma

    def __init__(self, int inputs_count, int output_count, double sigma=0.001):
        cdef int i

        # Init basic variables
        self.input_count = inputs_count
        self.output_count = output_count
        self._cls_to_idx = {}
        self._idx_to_cls = {}
        self._sigma = sigma

        # Set samples array for each class
        self._samples = <double ***> malloc( sizeof(double**) * output_count)
        self._samples_count = <int *> malloc( sizeof(int) * output_count)
        self._samples_alloc = <int *> malloc( sizeof(int) * output_count)
        for i in range(output_count):
            self._samples_count[i] = 0
            self._samples_alloc[i] = 1000
            self._samples[i] = <double **> malloc( sizeof(double*) * self._samples_alloc[i])

    cdef double* compute(self, double[:] inp, double sigma):
        # Init of computation
        cdef:
            double* out = <double *> malloc(sizeof(double) * self.output_count)
            double diff, dist, psum
            int i, j, k

        # Pattern layer
        for i in prange(self.output_count, num_threads=self.output_count, nogil=True):
        #for i in range(self.output_count):
            out[i] = 0

            for j in range(self._samples_count[i]):
                dist = 0.0

                for k in range(self.input_count):
                    dist += (((inp[k] - self._samples[i][j][k])) / sigma) ** 2

                dist = cmath.exp(-dist)
                if dist < 1.e-40:
                    dist = 1.e-40

                out[i] += dist

        # Summation layer
        psum = 0.0
        for i in range(self.output_count):
            psum += out[i]
        if psum < 1.e-40:
            psum = 1.e-40

        # Output layer
        for i in range(self.output_count):
            out[i] /= psum

        return out

    cpdef int classify(self, double[:] inp, threshold=-1):
        return self._classify(inp, threshold, self.sigma)

    cdef int _classify(self, double[:] inp, threshold=-1, double sigma=0.1):
        cdef:
            double* res = self.compute(inp, sigma)
            int idx = -1, i
            double val = -1

        for i in range(self.output_count):
            if res[i] > val and res[i] >= threshold:
                val = res[i]
                idx = i

        free(res)
        return self.idx_to_class(idx) if idx >= 0 else None

    cpdef add_sample(self, double[:] s):
        cdef:
            double** extended_samples
            int j, curr

        # Get class and target index
        cls = <int>s[self.input_count]
        idx = self.class_to_idx(cls)

        # Check count of classes and update class count cache
        if idx >= self.output_count:
            raise Exception("Training data contains more classes than neural network has output neurons to hold.")

        # Realloc samples array if out of allocated
        if self._samples_count[idx] + 1 >= self._samples_alloc[idx]:
            self._samples_alloc[idx] += 1000
            extended_samples = <double **> realloc(self._samples[idx], sizeof(double*) * self._samples_alloc[idx])
            if not extended_samples:
                raise MemoryError()

            self._samples[idx] = extended_samples

        # Create new sample
        curr = self._samples_count[idx]
        self._samples[idx][curr] = <double *> malloc( sizeof(double) * (self.input_count + 1))
        self._samples[idx][curr][self.input_count] = idx
        self._samples_count[idx] += 1
        for j in range(self.input_count):
            self._samples[idx][curr][j] = s[j]

    cpdef set_samples(self, double[:,:] samples):
        cdef:
            int i, j, cls, idx
            double[:] s

        # Create empty values
        self._cls_to_idx = {}
        self._idx_to_cls = {}

        # Add all samples
        self._destroy_samples()
        for i in range(len(samples)):
            self.add_sample(samples[i])

    cpdef int class_to_idx(self, int cls):
        if cls not in self._cls_to_idx:
            self._cls_to_idx[cls] = len(self._cls_to_idx)
            self._idx_to_cls[self._cls_to_idx[cls]] = cls
        return self._cls_to_idx[cls]

    cpdef int idx_to_class(self, int idx):
        return self._idx_to_cls[idx]

    cdef _destroy_samples(self, deep=False):
        cdef int i, j, k
        for i in range(self.output_count):
            for j in range(self._samples_count[i]):
                free(self._samples[i][j])
            if deep:
                free(self._samples[i])
            else:
                self._samples_count[i] = 0
                self._samples_alloc[i] = 0
        if deep:
            free(self._samples)
            free(self._samples_count)
            free(self._samples_alloc)

    def __dealloc__(ProbabilityNetwork self):
        self._destroy_samples(deep=True)


######################################
#
#   LEARNING
#
cdef class GeneticLearning(object):
    cdef:
        public GeneticAlgorithm ga
        ProbabilityNetwork network

    def __init__(self, ProbabilityNetwork network, *args, **kwargs):
        self.network = network
        self.ga = GeneticAlgorithm(GeneticLearningFunction(network, *args, **kwargs))

    cpdef train(self):
        self.ga.run()
        avg_age, avg_score, max_score, sigma = self.ga.stat()
        self.network.sigma = sigma
        print str(sigma)


cdef class GeneticLearningFunction(BasicGeneticFunction):
    cdef:
        ProbabilityNetwork network
        double[:,:] test_set
        double[:,:] training_set
        int activ_train
        int activ_test

    def __init__(self, ProbabilityNetwork network, double[:,:] training_set, double[:,:] test_set):
        BasicGeneticFunction.__init__(self, pop_size=20, chromo_size=1, eq_distance=4,
                 banking_age=25, banking_score=0.2)
        self.network = network
        self.test_set = test_set
        self.training_set = training_set
        self.network.set_samples(training_set)
        self.activ_train = 0
        self.activ_test = 0

        cdef int i
        for i in range(len(training_set)):
            if training_set[i][network.input_count] != 0.0:
                self.activ_train += 1

        for i in range(len(test_set)):
            if training_set[i][network.input_count] != 0.0:
                self.activ_test += 1

    cdef bint maybe_stop(self, Population* pop):
        return self.iter_count > 20

    cdef Population* initial(self):
        return self._initial_random()

    # Basic genetic functions
    cdef Population* withhelds(self, Population* pop):
        return self._withhelds_best(pop, 3)

    cdef Population* mutations(self, Population* pop):
        return self._mutations_random(pop, 5)

    cdef Population* parents(self, Population* pop):
        return self._parents_random(pop, 10)

    cdef Chromosome* crossover(self, Chromosome* parent1, Chromosome* parent2):
        cdef int i
        cdef Chromosome* child = _new_chromosome(parent1.size)
        child.genotype[0] = (parent1.genotype[0] * np.random.uniform(-1.5, 1.5)) + (parent2.genotype[0] * np.random.uniform(-1.5, 1.5))
        return child

    cdef Chromosome* mutate(self, Chromosome* ch):
        return self._mutate_by_element(ch)

    cdef double fitness(self, Chromosome* chromo):
        cdef double score = 0
        #score += self._fitness_on_set(chromo, self.training_set, self.activ_train)
        score += self._fitness_on_set(chromo, self.test_set, self.activ_test)
        return score

    # Implementation details
    cdef double _fitness_on_set(self, Chromosome* chromo, double[:,:] data_set, int active_set):
        cdef double err = 0.0, active = 0.0
        cdef int i
        for i in range(len(data_set)):
            res = self.network._classify(data_set[i], -1, chromo.genotype[0])
            if res != 0.0:
                active += 1
            if res != data_set[i][self.network.input_count]:
                if res != 0.0:
                    err += 1
        if active == 0:
            return 0
        else:
            return (1.0 - (err / active)) * (1.0 - abs(active_set - active) / active_set)

    cdef double _mutate_by_element_func(self, double value, Chromosome* orig_ch):
        cdef double val = 1.0/((orig_ch.score + 1.0) ** 12)
        return value + np.random.uniform(-val, val)

    cdef double random_chromo_value(self):
        return np.random.uniform(0, np.random.uniform(0, 10))

    cdef double _distance(self, Chromosome* first, Chromosome* second):
        cdef double sumSq = 0.0
        cdef int i

        for i in range(first.size):
            sumSq += (first.genotype[i] - second.genotype[i]) ** 2

        return sumSq ** 0.5
	# encoding: utf-8
	# cython: profile=False
	# cython: wraparound=False
	# cython: boundscheck=False
	# filename: fastpnn.pyx

	from libc.stdlib cimport malloc, realloc, free, rand
	cimport cython
	from cython.parallel import parallel, prange
	from cython.view cimport array as cvarray
	cimport libc.math as cmath
	import math
	import numpy as np
	cimport numpy as np
	from fastgen cimport BasicGeneticFunction, Population, Chromosome, GeneticAlgorithm, _new_population, _new_chromosome
	from scipy.spatial import distance

	######################################
	#
	# NETWORK
	#
	cdef class ProbabilityNetwork(object):
	cdef:
	public int input_count
	public int output_count
	double _sigma
	dict _cls_to_idx
	dict _idx_to_cls
	double*** _samples
	int* _samples_count
	int* _samples_alloc

	property sigma:
	def __get__(self):
	return np.array(<double>self._sigma)

	def __set__(self, new_sigma):
	self._sigma = new_sigma

	def __init__(self, int inputs_count, int output_count, double sigma=0.001):
	cdef int i

	# Init basic variables
	self.input_count = inputs_count
	self.output_count = output_count
	self._cls_to_idx = {}
	self._idx_to_cls = {}
	self._sigma = sigma

	# Set samples array for each class
	self._samples = <double *> malloc( sizeof(double) * output_count)
	self._samples_count = <int > malloc( sizeof(int) output_count)
	self._samples_alloc = <int > malloc( sizeof(int) output_count)
	for i in range(output_count):
	self._samples_count[i] = 0
	self._samples_alloc[i] = 1000
	self._samples[i] = <double *> malloc( sizeof(double) * self._samples_alloc[i])

	cdef double* compute(self, double[:] inp, double sigma):
	# Init of computation
	cdef:
	double* out = <double > malloc(sizeof(double) self.output_count)
	double diff, dist, psum
	int i, j, k

	# Pattern layer
	for i in prange(self.output_count, num_threads=self.output_count, nogil=True):
	#for i in range(self.output_count):
	out[i] = 0

	for j in range(self._samples_count[i]):
	dist = 0.0

	for k in range(self.input_count):
	dist += (((inp[k] - self._samples[i][j][k])) / sigma) ** 2

	dist = cmath.exp(-dist)
	if dist < 1.e-40:
	dist = 1.e-40

	out[i] += dist

	# Summation layer
	psum = 0.0
	for i in range(self.output_count):
	psum += out[i]
	if psum < 1.e-40:
	psum = 1.e-40

	# Output layer
	for i in range(self.output_count):
	out[i] /= psum

	return out

	cpdef int classify(self, double[:] inp, threshold=-1):
	return self._classify(inp, threshold, self.sigma)

	cdef int _classify(self, double[:] inp, threshold=-1, double sigma=0.1):
	cdef:
	double* res = self.compute(inp, sigma)
	int idx = -1, i
	double val = -1

	for i in range(self.output_count):
	if res[i] > val and res[i] >= threshold:
	val = res[i]
	idx = i

	free(res)
	return self.idx_to_class(idx) if idx >= 0 else None

	cpdef add_sample(self, double[:] s):
	cdef:
	double** extended_samples
	int j, curr

	# Get class and target index
	cls = <int>s[self.input_count]
	idx = self.class_to_idx(cls)

	# Check count of classes and update class count cache
	if idx >= self.output_count:
	raise Exception("Training data contains more classes than neural network has output neurons to hold.")

	# Realloc samples array if out of allocated
	if self._samples_count[idx] + 1 >= self._samples_alloc[idx]:
	self._samples_alloc[idx] += 1000
	extended_samples = <double *> realloc(self._samples[idx], sizeof(double) * self._samples_alloc[idx])
	if not extended_samples:
	raise MemoryError()

	self._samples[idx] = extended_samples

	# Create new sample
	curr = self._samples_count[idx]
	self._samples[idx][curr] = <double > malloc( sizeof(double) (self.input_count + 1))
	self._samples[idx][curr][self.input_count] = idx
	self._samples_count[idx] += 1
	for j in range(self.input_count):
	self._samples[idx][curr][j] = s[j]

	cpdef set_samples(self, double[:,:] samples):
	cdef:
	int i, j, cls, idx
	double[:] s

	# Create empty values
	self._cls_to_idx = {}
	self._idx_to_cls = {}

	# Add all samples
	self._destroy_samples()
	for i in range(len(samples)):
	self.add_sample(samples[i])

	cpdef int class_to_idx(self, int cls):
	if cls not in self._cls_to_idx:
	self._cls_to_idx[cls] = len(self._cls_to_idx)
	self._idx_to_cls[self._cls_to_idx[cls]] = cls
	return self._cls_to_idx[cls]

	cpdef int idx_to_class(self, int idx):
	return self._idx_to_cls[idx]

	cdef _destroy_samples(self, deep=False):
	cdef int i, j, k
	for i in range(self.output_count):
	for j in range(self._samples_count[i]):
	free(self._samples[i][j])
	if deep:
	free(self._samples[i])
	else:
	self._samples_count[i] = 0
	self._samples_alloc[i] = 0
	if deep:
	free(self._samples)
	free(self._samples_count)
	free(self._samples_alloc)

	def __dealloc__(ProbabilityNetwork self):
	self._destroy_samples(deep=True)


	######################################
	#
	# LEARNING
	#
	cdef class GeneticLearning(object):
	cdef:
	public GeneticAlgorithm ga
	ProbabilityNetwork network

	def __init__(self, ProbabilityNetwork network, args, *kwargs):
	self.network = network
	self.ga = GeneticAlgorithm(GeneticLearningFunction(network, args, *kwargs))

	cpdef train(self):
	self.ga.run()
	avg_age, avg_score, max_score, sigma = self.ga.stat()
	self.network.sigma = sigma
	print str(sigma)


	cdef class GeneticLearningFunction(BasicGeneticFunction):
	cdef:
	ProbabilityNetwork network
	double[:,:] test_set
	double[:,:] training_set
	int activ_train
	int activ_test

	def __init__(self, ProbabilityNetwork network, double[:,:] training_set, double[:,:] test_set):
	BasicGeneticFunction.__init__(self, pop_size=20, chromo_size=1, eq_distance=4,
	banking_age=25, banking_score=0.2)
	self.network = network
	self.test_set = test_set
	self.training_set = training_set
	self.network.set_samples(training_set)
	self.activ_train = 0
	self.activ_test = 0

	cdef int i
	for i in range(len(training_set)):
	if training_set[i][network.input_count] != 0.0:
	self.activ_train += 1

	for i in range(len(test_set)):
	if training_set[i][network.input_count] != 0.0:
	self.activ_test += 1

	cdef bint maybe_stop(self, Population* pop):
	return self.iter_count > 20

	cdef Population* initial(self):
	return self._initial_random()

	# Basic genetic functions
	cdef Population* withhelds(self, Population* pop):
	return self._withhelds_best(pop, 3)

	cdef Population* mutations(self, Population* pop):
	return self._mutations_random(pop, 5)

	cdef Population* parents(self, Population* pop):
	return self._parents_random(pop, 10)

	cdef Chromosome* crossover(self, Chromosome* parent1, Chromosome* parent2):
	cdef int i
	cdef Chromosome* child = _new_chromosome(parent1.size)
	child.genotype[0] = (parent1.genotype[0] * np.random.uniform(-1.5, 1.5)) + (parent2.genotype[0] * np.random.uniform(-1.5, 1.5))
	return child

	cdef Chromosome* mutate(self, Chromosome* ch):
	return self._mutate_by_element(ch)

	cdef double fitness(self, Chromosome* chromo):
	cdef double score = 0
	#score += self._fitness_on_set(chromo, self.training_set, self.activ_train)
	score += self._fitness_on_set(chromo, self.test_set, self.activ_test)
	return score

	# Implementation details
	cdef double _fitness_on_set(self, Chromosome* chromo, double[:,:] data_set, int active_set):
	cdef double err = 0.0, active = 0.0
	cdef int i
	for i in range(len(data_set)):
	res = self.network._classify(data_set[i], -1, chromo.genotype[0])
	if res != 0.0:
	active += 1
	if res != data_set[i][self.network.input_count]:
	if res != 0.0:
	err += 1
	if active == 0:
	return 0
	else:
	return (1.0 - (err / active)) * (1.0 - abs(active_set - active) / active_set)

	cdef double _mutate_by_element_func(self, double value, Chromosome* orig_ch):
	cdef double val = 1.0/((orig_ch.score + 1.0) ** 12)
	return value + np.random.uniform(-val, val)

	cdef double random_chromo_value(self):
	return np.random.uniform(0, np.random.uniform(0, 10))

	cdef double _distance(self, Chromosome* first, Chromosome* second):
	cdef double sumSq = 0.0
	cdef int i

	for i in range(first.size):
	sumSq += (first.genotype[i] - second.genotype[i]) ** 2

	return sumSq ** 0.5