Skip to content

Instantly share code, notes, and snippets.

@marty1885
Created January 12, 2018 14:25
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save marty1885/a80138f518283eb5db0256acd78e1020 to your computer and use it in GitHub Desktop.
Save marty1885/a80138f518283eb5db0256acd78e1020 to your computer and use it in GitHub Desktop.
#include <Athena/Athena.hpp>
#include <Athena/XtensorBackend.hpp>
#include <Athena/NNPACKBackend.hpp>
#include "mnist_reader.hpp"
//Need to use xtensor API due to incomplete Tensor implementation
#include <xtensor/xarray.hpp>
#include <iostream>
#include <chrono>
#include <random>
using namespace std::chrono;
int maxElementIndex(const std::vector<float>& vec)
{
return std::distance(vec.begin(), std::max_element(vec.begin(), vec.end()));
}
At::Tensor imagesToTensor(const std::vector<std::vector<uint8_t>>& arr, At::Backend& backend)
{
std::vector<float> res;
res.reserve(arr.size()*arr[0].size());
for(const auto& img : arr)
{
for(const auto v : img)
res.push_back(v/255.f);
}
return At::Tensor(res,{(intmax_t)arr.size(),(intmax_t)arr[0].size()}, backend);
}
std::vector<float> onehot(int ind, int total)
{
std::vector<float> vec(total);
for(int i=0;i<total;i++)
vec[i] = (i==ind)?1.f:0.f;
return vec;
}
At::Tensor labelsToOnehot(const std::vector<uint8_t>& labels, At::Backend& backend)
{
std::vector<float> buffer;
buffer.reserve(labels.size()*10);
for(const auto& label : labels)
{
std::vector<float> onehotVec = onehot(label, 10);
for(const auto v : onehotVec)
buffer.push_back((float)v);
}
return At::Tensor(buffer, {(intmax_t)labels.size(), 10}, backend);
}
using namespace At;
//STDP Layer
//This is a fully-connected layer that tries to emulate to real neurons work.
//Each enuron in this layer compares singal from upper layer to a wheight over
//a guessian function. Then the neuron sums up all comparsion results larger than
//a pre-determined threashold to produce a score. Then the layer selects the top
//N neurons with the highest score to fire signal to the next layer. (Basiclly
//it outputs a Sparse Data Representation)
class STDP : public FullyConnectedLayer
{
public:
STDP(intmax_t outputNum):dist(0, 1)
{
setOutputShape(Shape({Shape::None, outputNum}));
setType("STDP");
}
virtual void build() override
{
intmax_t outputNum = outputShape()[1];
intmax_t inputNum = inputShape()[1];
//Initialize algorithms
guessian = backend()->getAlgorithm<SigmoidForward>("guessian");
updateVector = backend()->getAlgorithm<SigmoidForward>("STDPVector");
activate = backend()->getAlgorithm<SigmoidForward>("activate");
//Initialize weights
for(intmax_t i=0;i<outputNum;i++)
{
weights_.push_back(At::rand(0, 1, {inputNum}, *backend()));
}
activatedCount_ = std::vector<intmax_t>(outputNum, 0);
}
virtual Tensor forward(const Tensor& x) override
{
Tensor out = At::zeros({x.shape()[0], outputShape()[1]}, *backend());
std::vector<std::pair<float, int>> res(outputShape()[1]);
for(intmax_t i=0;i<x.shape()[0];i++)
{
auto vec = x.slice({i});
float* arr = out.hostPtr()+out.shape()[1]*i;
#pragma omp parallel for
for(intmax_t j=0;j<outputShape()[1];j++)
{
auto diff = vec-weights_[j];
auto proximity = activate(diff);
float prox = proximity.sum({1}).hostPtr()[0]/vec.size();
//Naive(?) implementation of boosting
#if 1
intmax_t actiDiff = currentMaxActivateNum_ - activatedCount_[j];
prox *= (float)std::min(actiDiff, 10L)*0.02f + 0.9f;
#endif
res[j] = {prox, j};
arr[j] = 0;
}
std::nth_element(res.begin(), res.begin()+10, res.end(), [](auto a, auto b){return a.first < b.first;});
for(intmax_t j=0;j<10;j++)
{
activatedCount_[res[j].second]++;
if(activatedCount_[res[j].second] > currentMaxActivateNum_)
currentMaxActivateNum_ = activatedCount_[res[j].second];
arr[res[j].second] = 50;
optimizer.update(weights_[j], updateVector(weights_[j]-vec));
}
}
return out;
}
//Cannot back propergate trough this now
virtual void backword(const Tensor& x, const Tensor& y,
Tensor& dx, const Tensor& dy) override
{
}
//Weight update done in the fordward pass.
virtual void update(Optimizer* optimizer) override
{
}
protected:
std::vector<Tensor> weight_;
std::vector<intmax_t> activatedCount_;
intmax_t currentMaxActivateNum_ = 0;
std::minstd_rand eng;
std::uniform_real_distribution<float> dist;
delegate<SigmoidForward> guessian;
delegate<SigmoidForward> updateVector;
delegate<SigmoidForward> activate;
NestrovOptimizer optimizer;
};
int main()
{
At::XtensorBackend backend;
//Use the NNPACK backend to accelerate things. Remove if NNPACK is not avliable
At::NNPackBackend nnpBackend;
backend.useAlgorithm<At::FCForwardFunction>("fullyconnectedForward", nnpBackend);
backend.useAlgorithm<At::FCBackwardFunction>("fullyconnectedBackward", nnpBackend);
backend.addAlgorithm<SigmoidForward>("guessian",
[&backend](const Tensor& in)->Tensor
{
auto vec = in.host();
for(auto& v : vec)
v = std::exp(-2.f*v*v);
return backend.createTensor(vec, in.shape());
});
backend.addAlgorithm<SigmoidForward>("STDPVector",
[&backend](const Tensor& in)->Tensor
{
auto vec = in.host();
for(auto& v : vec)
v = (v<0)?(-0.06):0.2;
return backend.createTensor(vec, in.shape());
});
backend.addAlgorithm<SigmoidForward>("activate",
[&backend](const Tensor& in)->Tensor
{
auto vec = in.host();
for(auto& v : vec)
{
float tmp = std::exp(-2.f*v*v);//guessian
v = tmp > 0.35? tmp : -0.1*tmp;//threasholding the comparsion
}
return backend.createTensor(vec, in.shape());
});
At::SequentialNetwork net(&backend);
auto dataset = mnist::read_dataset<std::vector, std::vector, uint8_t, uint8_t>("../mnist");
At::Tensor traningImage = imagesToTensor(dataset.training_images, backend);
At::Tensor traningLabels = labelsToOnehot(dataset.training_labels, backend);
At::Tensor testingImage = imagesToTensor(dataset.test_images, backend);
At::Tensor testingLabels = labelsToOnehot(dataset.test_labels, backend);
net.add(At::InputLayer(At::Shape({Shape::None, 28*28})));
net.add(STDP(30));
net.add(At::TanhLayer(&backend));
//A fully connected layer is used to perform the classifcation
net.add(At::FullyConnectedLayer(10));
net.add(At::SigmoidLayer());
net.compile();
net.summary();
At::SGDOptimizer opt;
opt.alpha_ = 0.1f;
At::MSELoss loss;
size_t epoch = 10;
size_t batchSize = 16;
int count = 0;
auto onBatch = [&](float l)
{
int sampleNum = traningImage.shape()[0];
std::cout << count << "/" << sampleNum << "\r" << std::flush;
count += batchSize;
};
auto onEpoch = [&](float l)
{
std::cout << "Epoch Loss: " << l << std::endl;
count = 0;
};
high_resolution_clock::time_point t1 = high_resolution_clock::now();
net.fit(opt,loss,traningImage,traningLabels,batchSize,epoch,onBatch,onEpoch);
high_resolution_clock::time_point t2 = high_resolution_clock::now();
duration<double> time_span = duration_cast<duration<double>>(t2 - t1);
std::cout << "It took me " << time_span.count() << " seconds." << std::endl;
int correct = 0;
for(intmax_t i=0;i<testingImage.shape()[0];i++)
{
At::Tensor x = testingImage.slice({i},{1});
At::Tensor res = net.predict(x);
int predictLabel = maxElementIndex(res.host());
if(predictLabel == dataset.test_labels[i])
correct++;
}
std::cout << "Accuracy: " << correct/(float)testingImage.shape()[0] << std::endl;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment