marty1885/denseHTM.cpp

## denseHTM.cpp
#include <Athena/Athena.hpp>
#include <Athena/XtensorBackend.hpp>
#include <Athena/NNPACKBackend.hpp>

#include "mnist_reader.hpp"

//Need to use xtensor API due to incomplete Tensor implementation
#include <xtensor/xarray.hpp>

#include <iostream>
#include <chrono>
#include <random>

using namespace std::chrono;

int maxElementIndex(const std::vector<float>& vec)
{
	return std::distance(vec.begin(), std::max_element(vec.begin(), vec.end()));
}

At::Tensor imagesToTensor(const std::vector<std::vector<uint8_t>>& arr, At::Backend& backend)
{
	std::vector<float> res;
	res.reserve(arr.size()*arr[0].size());
	for(const auto& img : arr)
	{
		for(const auto v : img)
			res.push_back(v/255.f);
	}

	return At::Tensor(res,{(intmax_t)arr.size(),(intmax_t)arr[0].size()}, backend);
}

std::vector<float> onehot(int ind, int total)
{
	std::vector<float> vec(total);
	for(int i=0;i<total;i++)
		vec[i] = (i==ind)?1.f:0.f;
	return vec;
}

At::Tensor labelsToOnehot(const std::vector<uint8_t>& labels, At::Backend& backend)
{
	std::vector<float> buffer;
	buffer.reserve(labels.size()*10);

	for(const auto& label : labels)
	{
		std::vector<float> onehotVec = onehot(label, 10);
		for(const auto v : onehotVec)
			buffer.push_back((float)v);
	}

	return At::Tensor(buffer, {(intmax_t)labels.size(), 10}, backend);
}

using namespace At;

//STDP Layer
//This is a fully-connected layer that tries to emulate to real neurons work.
//Each enuron in this layer compares singal from upper layer to a wheight over
//a guessian function. Then the neuron sums up all comparsion results larger than
//a pre-determined threashold to produce a score. Then the layer selects the top
//N neurons with the highest score to fire signal to the next layer. (Basiclly
//it outputs a Sparse Data Representation)
class STDP : public FullyConnectedLayer
{
public:
	STDP(intmax_t outputNum):dist(0, 1)
	{
		setOutputShape(Shape({Shape::None, outputNum}));
		setType("STDP");
	}

	virtual void build() override
	{
		intmax_t outputNum = outputShape()[1];
		intmax_t inputNum = inputShape()[1];
		//Initialize algorithms
		guessian = backend()->getAlgorithm<SigmoidForward>("guessian");
		updateVector = backend()->getAlgorithm<SigmoidForward>("STDPVector");
		activate = backend()->getAlgorithm<SigmoidForward>("activate");

		//Initialize weights
		for(intmax_t i=0;i<outputNum;i++)
		{
			weights_.push_back(At::rand(0, 1, {inputNum}, *backend()));
		}
		activatedCount_ = std::vector<intmax_t>(outputNum, 0);
	}

	virtual Tensor forward(const Tensor& x) override
	{
		Tensor out = At::zeros({x.shape()[0], outputShape()[1]}, *backend());
		std::vector<std::pair<float, int>> res(outputShape()[1]);
		for(intmax_t i=0;i<x.shape()[0];i++)
		{
			auto vec = x.slice({i});
			float* arr = out.hostPtr()+out.shape()[1]*i;
			#pragma omp parallel for
			for(intmax_t j=0;j<outputShape()[1];j++)
			{
				auto diff = vec-weights_[j];
				auto proximity = activate(diff);
				float prox = proximity.sum({1}).hostPtr()[0]/vec.size();

				//Naive(?) implementation of boosting
				#if 1
				intmax_t actiDiff = currentMaxActivateNum_ - activatedCount_[j];
				prox *= (float)std::min(actiDiff, 10L)*0.02f + 0.9f;
				#endif

				res[j] = {prox, j};
				arr[j] = 0;
			}
			std::nth_element(res.begin(), res.begin()+10, res.end(), [](auto a, auto b){return a.first < b.first;});
			for(intmax_t j=0;j<10;j++)
			{
				activatedCount_[res[j].second]++;
				if(activatedCount_[res[j].second] > currentMaxActivateNum_)
					currentMaxActivateNum_ = activatedCount_[res[j].second];
				arr[res[j].second] = 50;
				optimizer.update(weights_[j], updateVector(weights_[j]-vec));
			}
		}
		return out;
	}

	//Cannot back propergate trough this now
	virtual void backword(const Tensor& x, const Tensor& y,
		Tensor& dx, const Tensor& dy) override
	{
	}

	//Weight update done in the fordward pass.
	virtual void update(Optimizer* optimizer) override
	{
	}
protected:
	std::vector<Tensor> weight_;
	std::vector<intmax_t> activatedCount_;
	intmax_t currentMaxActivateNum_ = 0;

	std::minstd_rand eng;
	std::uniform_real_distribution<float> dist;

	delegate<SigmoidForward> guessian;
	delegate<SigmoidForward> updateVector;
	delegate<SigmoidForward> activate;
	NestrovOptimizer optimizer;
};

int main()
{
	At::XtensorBackend backend;

	//Use the NNPACK backend to accelerate things. Remove if NNPACK is not avliable
	At::NNPackBackend nnpBackend;
	backend.useAlgorithm<At::FCForwardFunction>("fullyconnectedForward", nnpBackend);
	backend.useAlgorithm<At::FCBackwardFunction>("fullyconnectedBackward", nnpBackend);

	backend.addAlgorithm<SigmoidForward>("guessian",
	[&backend](const Tensor& in)->Tensor
	{
		auto vec = in.host();
		for(auto& v : vec)
			v = std::exp(-2.f*v*v);
		return backend.createTensor(vec, in.shape());
	});

	backend.addAlgorithm<SigmoidForward>("STDPVector",
	[&backend](const Tensor& in)->Tensor
	{
		auto vec = in.host();
		for(auto& v : vec)
			v = (v<0)?(-0.06):0.2;
		return backend.createTensor(vec, in.shape());
	});

	backend.addAlgorithm<SigmoidForward>("activate",
	[&backend](const Tensor& in)->Tensor
	{
		auto vec = in.host();
		for(auto& v : vec)
		{
			float tmp = std::exp(-2.f*v*v);//guessian
			v = tmp > 0.35? tmp : -0.1*tmp;//threasholding the comparsion
		}
		return backend.createTensor(vec, in.shape());
	});

	At::SequentialNetwork net(&backend);

	auto dataset = mnist::read_dataset<std::vector, std::vector, uint8_t, uint8_t>("../mnist");
	At::Tensor traningImage = imagesToTensor(dataset.training_images, backend);
	At::Tensor traningLabels = labelsToOnehot(dataset.training_labels, backend);
	At::Tensor testingImage = imagesToTensor(dataset.test_images, backend);
	At::Tensor testingLabels = labelsToOnehot(dataset.test_labels, backend);

	net.add(At::InputLayer(At::Shape({Shape::None, 28*28})));
	net.add(STDP(30));
	net.add(At::TanhLayer(&backend));


	//A fully connected layer is used to perform the classifcation
	net.add(At::FullyConnectedLayer(10));
	net.add(At::SigmoidLayer());
	net.compile();

	net.summary();

	At::SGDOptimizer opt;
	opt.alpha_ = 0.1f;
	At::MSELoss loss;

	size_t epoch = 10;
	size_t batchSize = 16;

	int count = 0;
	auto onBatch = [&](float l)
	{
		int sampleNum = traningImage.shape()[0];
		std::cout << count << "/" << sampleNum << "\r" << std::flush;
		count += batchSize;
	};

	auto onEpoch = [&](float l)
	{
		std::cout << "Epoch Loss: " << l << std::endl;
		count = 0;
	};

	high_resolution_clock::time_point t1 = high_resolution_clock::now();

	net.fit(opt,loss,traningImage,traningLabels,batchSize,epoch,onBatch,onEpoch);

	high_resolution_clock::time_point t2 = high_resolution_clock::now();
	duration<double> time_span = duration_cast<duration<double>>(t2 - t1);
	std::cout << "It took me " << time_span.count() << " seconds." << std::endl;

	int correct = 0;
	for(intmax_t i=0;i<testingImage.shape()[0];i++)
	{
		At::Tensor x = testingImage.slice({i},{1});
		At::Tensor res = net.predict(x);
		int predictLabel = maxElementIndex(res.host());
		if(predictLabel == dataset.test_labels[i])
			correct++;
	}
	std::cout << "Accuracy: " << correct/(float)testingImage.shape()[0] << std::endl;
}
	#include <Athena/Athena.hpp>
	#include <Athena/XtensorBackend.hpp>
	#include <Athena/NNPACKBackend.hpp>

	#include "mnist_reader.hpp"

	//Need to use xtensor API due to incomplete Tensor implementation
	#include <xtensor/xarray.hpp>

	#include <iostream>
	#include <chrono>
	#include <random>

	using namespace std::chrono;

	int maxElementIndex(const std::vector<float>& vec)
	{
	return std::distance(vec.begin(), std::max_element(vec.begin(), vec.end()));
	}

	At::Tensor imagesToTensor(const std::vector<std::vector<uint8_t>>& arr, At::Backend& backend)
	{
	std::vector<float> res;
	res.reserve(arr.size()*arr[0].size());
	for(const auto& img : arr)
	{
	for(const auto v : img)
	res.push_back(v/255.f);
	}

	return At::Tensor(res,{(intmax_t)arr.size(),(intmax_t)arr[0].size()}, backend);
	}

	std::vector<float> onehot(int ind, int total)
	{
	std::vector<float> vec(total);
	for(int i=0;i<total;i++)
	vec[i] = (i==ind)?1.f:0.f;
	return vec;
	}

	At::Tensor labelsToOnehot(const std::vector<uint8_t>& labels, At::Backend& backend)
	{
	std::vector<float> buffer;
	buffer.reserve(labels.size()*10);

	for(const auto& label : labels)
	{
	std::vector<float> onehotVec = onehot(label, 10);
	for(const auto v : onehotVec)
	buffer.push_back((float)v);
	}

	return At::Tensor(buffer, {(intmax_t)labels.size(), 10}, backend);
	}

	using namespace At;

	//STDP Layer
	//This is a fully-connected layer that tries to emulate to real neurons work.
	//Each enuron in this layer compares singal from upper layer to a wheight over
	//a guessian function. Then the neuron sums up all comparsion results larger than
	//a pre-determined threashold to produce a score. Then the layer selects the top
	//N neurons with the highest score to fire signal to the next layer. (Basiclly
	//it outputs a Sparse Data Representation)
	class STDP : public FullyConnectedLayer
	{
	public:
	STDP(intmax_t outputNum):dist(0, 1)
	{
	setOutputShape(Shape({Shape::None, outputNum}));
	setType("STDP");
	}

	virtual void build() override
	{
	intmax_t outputNum = outputShape()[1];
	intmax_t inputNum = inputShape()[1];
	//Initialize algorithms
	guessian = backend()->getAlgorithm<SigmoidForward>("guessian");
	updateVector = backend()->getAlgorithm<SigmoidForward>("STDPVector");
	activate = backend()->getAlgorithm<SigmoidForward>("activate");

	//Initialize weights
	for(intmax_t i=0;i<outputNum;i++)
	{
	weights_.push_back(At::rand(0, 1, {inputNum}, *backend()));
	}
	activatedCount_ = std::vector<intmax_t>(outputNum, 0);
	}

	virtual Tensor forward(const Tensor& x) override
	{
	Tensor out = At::zeros({x.shape()[0], outputShape()[1]}, *backend());
	std::vector<std::pair<float, int>> res(outputShape()[1]);
	for(intmax_t i=0;i<x.shape()[0];i++)
	{
	auto vec = x.slice({i});
	float* arr = out.hostPtr()+out.shape()[1]*i;
	#pragma omp parallel for
	for(intmax_t j=0;j<outputShape()[1];j++)
	{
	auto diff = vec-weights_[j];
	auto proximity = activate(diff);
	float prox = proximity.sum({1}).hostPtr()[0]/vec.size();

	//Naive(?) implementation of boosting
	#if 1
	intmax_t actiDiff = currentMaxActivateNum_ - activatedCount_[j];
	prox = (float)std::min(actiDiff, 10L)0.02f + 0.9f;
	#endif

	res[j] = {prox, j};
	arr[j] = 0;
	}
	std::nth_element(res.begin(), res.begin()+10, res.end(), [](auto a, auto b){return a.first < b.first;});
	for(intmax_t j=0;j<10;j++)
	{
	activatedCount_[res[j].second]++;
	if(activatedCount_[res[j].second] > currentMaxActivateNum_)
	currentMaxActivateNum_ = activatedCount_[res[j].second];
	arr[res[j].second] = 50;
	optimizer.update(weights_[j], updateVector(weights_[j]-vec));
	}
	}
	return out;
	}

	//Cannot back propergate trough this now
	virtual void backword(const Tensor& x, const Tensor& y,
	Tensor& dx, const Tensor& dy) override
	{
	}

	//Weight update done in the fordward pass.
	virtual void update(Optimizer* optimizer) override
	{
	}
	protected:
	std::vector<Tensor> weight_;
	std::vector<intmax_t> activatedCount_;
	intmax_t currentMaxActivateNum_ = 0;

	std::minstd_rand eng;
	std::uniform_real_distribution<float> dist;

	delegate<SigmoidForward> guessian;
	delegate<SigmoidForward> updateVector;
	delegate<SigmoidForward> activate;
	NestrovOptimizer optimizer;
	};

	int main()
	{
	At::XtensorBackend backend;

	//Use the NNPACK backend to accelerate things. Remove if NNPACK is not avliable
	At::NNPackBackend nnpBackend;
	backend.useAlgorithm<At::FCForwardFunction>("fullyconnectedForward", nnpBackend);
	backend.useAlgorithm<At::FCBackwardFunction>("fullyconnectedBackward", nnpBackend);

	backend.addAlgorithm<SigmoidForward>("guessian",
	[&backend](const Tensor& in)->Tensor
	{
	auto vec = in.host();
	for(auto& v : vec)
	v = std::exp(-2.fvv);
	return backend.createTensor(vec, in.shape());
	});

	backend.addAlgorithm<SigmoidForward>("STDPVector",
	[&backend](const Tensor& in)->Tensor
	{
	auto vec = in.host();
	for(auto& v : vec)
	v = (v<0)?(-0.06):0.2;
	return backend.createTensor(vec, in.shape());
	});

	backend.addAlgorithm<SigmoidForward>("activate",
	[&backend](const Tensor& in)->Tensor
	{
	auto vec = in.host();
	for(auto& v : vec)
	{
	float tmp = std::exp(-2.fvv);//guessian
	v = tmp > 0.35? tmp : -0.1*tmp;//threasholding the comparsion
	}
	return backend.createTensor(vec, in.shape());
	});

	At::SequentialNetwork net(&backend);

	auto dataset = mnist::read_dataset<std::vector, std::vector, uint8_t, uint8_t>("../mnist");
	At::Tensor traningImage = imagesToTensor(dataset.training_images, backend);
	At::Tensor traningLabels = labelsToOnehot(dataset.training_labels, backend);
	At::Tensor testingImage = imagesToTensor(dataset.test_images, backend);
	At::Tensor testingLabels = labelsToOnehot(dataset.test_labels, backend);

	net.add(At::InputLayer(At::Shape({Shape::None, 28*28})));
	net.add(STDP(30));
	net.add(At::TanhLayer(&backend));


	//A fully connected layer is used to perform the classifcation
	net.add(At::FullyConnectedLayer(10));
	net.add(At::SigmoidLayer());
	net.compile();

	net.summary();

	At::SGDOptimizer opt;
	opt.alpha_ = 0.1f;
	At::MSELoss loss;

	size_t epoch = 10;
	size_t batchSize = 16;

	int count = 0;
	auto onBatch = [&](float l)
	{
	int sampleNum = traningImage.shape()[0];
	std::cout << count << "/" << sampleNum << "\r" << std::flush;
	count += batchSize;
	};

	auto onEpoch = [&](float l)
	{
	std::cout << "Epoch Loss: " << l << std::endl;
	count = 0;
	};

	high_resolution_clock::time_point t1 = high_resolution_clock::now();

	net.fit(opt,loss,traningImage,traningLabels,batchSize,epoch,onBatch,onEpoch);

	high_resolution_clock::time_point t2 = high_resolution_clock::now();
	duration<double> time_span = duration_cast<duration<double>>(t2 - t1);
	std::cout << "It took me " << time_span.count() << " seconds." << std::endl;

	int correct = 0;
	for(intmax_t i=0;i<testingImage.shape()[0];i++)
	{
	At::Tensor x = testingImage.slice({i},{1});
	At::Tensor res = net.predict(x);
	int predictLabel = maxElementIndex(res.host());
	if(predictLabel == dataset.test_labels[i])
	correct++;
	}
	std::cout << "Accuracy: " << correct/(float)testingImage.shape()[0] << std::endl;
	}