Navigation Menu

Skip to content

Instantly share code, notes, and snippets.

@tunabrain
Last active April 17, 2020 16:07
Show Gist options
  • Star 19 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tunabrain/aad151810a7b10197ff1e62f0be98c44 to your computer and use it in GitHub Desktop.
Save tunabrain/aad151810a7b10197ff1e62f0be98c44 to your computer and use it in GitHub Desktop.
Deep MNIST classifier in 200 lines of C++
// Compile using
// g++ -isystem PATH_TO_EIGEN_3 neural.cpp -std=c++11 -O3 -o neural
#include <Eigen/Dense>
#include <iostream>
#include <cstdint>
#include <fstream>
#include <memory>
#include <vector>
#include <array>
using namespace Eigen;
using std::uint32_t;
using std::uint8_t;
typedef DiagonalMatrix<float, -1, -1> DiagonalXf;
struct MnistImage {
std::array<uint8_t, 28*28> img;
int label;
};
std::vector<MnistImage> loadMnist(const char *labelPath, const char *imagePath);
VectorXf sigmoid(const VectorXf &t) {
return 1.0f/(1.0f + (-t.array()).exp());
}
DiagonalXf sigmoidDeriv(const VectorXf &t) {
auto exp = t.array().exp();
return VectorXf(exp/((1.0f + exp)*(1.0f + exp))).asDiagonal();
}
struct NeuralLayer
{
MatrixXf w, wTmp;
NeuralLayer(int inputs, int outputs)
: w(MatrixXf::Random(outputs, inputs)),
wTmp(MatrixXf::Zero(outputs, inputs))
{
}
VectorXf eval(const VectorXf &x) const {
return sigmoid(w*x);
}
VectorXf propagate(const VectorXf &x, const VectorXf &d) {
VectorXf deriv = sigmoidDeriv(w*x)*d;
wTmp -= deriv*x.transpose();
return deriv.transpose()*w;
}
void applyGradient() {
w += wTmp;
wTmp = MatrixXf::Zero(w.rows(), w.cols());
}
};
struct NeuralNetwork
{
std::vector<NeuralLayer> layers;
NeuralNetwork(int inputs, int outputs, int hiddenNodes, int hiddenLayers) {
// Terminology here isn't quite right but this is a bit more convenient
layers.emplace_back(NeuralLayer(inputs, hiddenNodes));
for (int i = 0; i < hiddenLayers; ++i)
layers.emplace_back(NeuralLayer(hiddenNodes, hiddenNodes));
layers.emplace_back(NeuralLayer(hiddenNodes, outputs));
}
VectorXf eval(VectorXf x) const {
for (size_t i = 0; i < layers.size(); ++i)
x = layers[i].eval(x);
return x;
}
void propagate(const VectorXf &x, VectorXf d) {
std::vector<VectorXf> inputs{{x}};
for (size_t i = 0; i < layers.size() - 1; ++i)
inputs.emplace_back(layers[i].eval(inputs.back()));
for (int i = layers.size() - 1; i >= 0; --i)
d = layers[i].propagate(inputs[i], d);
}
void applyGradient() {
for (auto &l : layers)
l.applyGradient();
}
};
int main() {
auto mnist = loadMnist("nn/train-labels-idx1-ubyte", "nn/train-images-idx3-ubyte");
auto test = loadMnist("nn/t10k-labels-idx1-ubyte", "nn/t10k-images-idx3-ubyte");
NeuralNetwork nn(28*28, 10, 100, 6);
const int BatchSize = 100;
const float Eta = 1.0f/BatchSize;
int updates = 0;
for (int iter = 0; iter < 6; ++iter) {
std::vector<int> trainSequence(mnist.size());
for (size_t i = 0; i < trainSequence.size(); ++i)
trainSequence[i] = i;
std::random_shuffle(trainSequence.begin(), trainSequence.end());
int incorrect = 0;
for (auto k : trainSequence) {
VectorXf x(28*28);
for (int j = 0; j < 28*28; ++j)
x[j] = mnist[k].img[j]/255.0f;
VectorXf target = VectorXf::Zero(10);
target[mnist[k].label] = 1.0f;
VectorXf value = nn.eval(x);
VectorXf delta = value - target;
nn.propagate(x, 2.0f*delta*Eta);
VectorXf::Index classification;
value.maxCoeff(&classification);
if (classification != mnist[k].label)
incorrect++;
updates++;
if (updates == BatchSize) {
nn.applyGradient();
updates = 0;
}
}
std::cout << "Iteration " << iter << " Training error: " << incorrect*100.0f/mnist.size() << "%" << std::endl;
}
int testIncorrect = 0;
for (size_t i = 0; i < test.size(); ++i) {
VectorXf x(28*28);
for (int j = 0; j < 28*28; ++j)
x[j] = test[i].img[j]/255.0f;
VectorXf::Index idx;
nn.eval(x).maxCoeff(&idx);
if (idx != test[i].label)
testIncorrect++;
}
std::cout << "Test error: " << testIncorrect*100.0f/test.size() << "%" << std::endl;
return 0;
}
uint32_t readIntBigEndian(std::istream &in) {
union { char c[4]; uint32_t i; };
in.read(c, 4);
std::swap(c[0], c[3]);
std::swap(c[1], c[2]);
return i;
};
std::vector<MnistImage> loadMnist(const char *labelPath, const char *imagePath) {
std::vector<MnistImage> result;
std::ifstream labelFile(labelPath, std::ios_base::binary);
std::ifstream imageFile(imagePath, std::ios_base::binary);
if (!labelFile.good() || !imageFile.good())
return result;
readIntBigEndian(labelFile);
uint32_t n = readIntBigEndian(labelFile);
auto labels = std::unique_ptr<uint8_t[]>(new uint8_t[n]);
labelFile.read(reinterpret_cast<char *>(labels.get()), n);
readIntBigEndian(imageFile);
n = readIntBigEndian(imageFile);
uint32_t w = readIntBigEndian(imageFile);
uint32_t h = readIntBigEndian(imageFile);
for (uint32_t i = 0; i < n; ++i) {
MnistImage digit;
imageFile.read(reinterpret_cast<char *>(&digit.img[0]), w*h);
digit.label = labels[i];
result.emplace_back(digit);
}
return std::move(result);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment