-
-
Save tunabrain/aad151810a7b10197ff1e62f0be98c44 to your computer and use it in GitHub Desktop.
Deep MNIST classifier in 200 lines of C++
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Compile using | |
// g++ -isystem PATH_TO_EIGEN_3 neural.cpp -std=c++11 -O3 -o neural | |
#include <Eigen/Dense> | |
#include <iostream> | |
#include <cstdint> | |
#include <fstream> | |
#include <memory> | |
#include <vector> | |
#include <array> | |
using namespace Eigen; | |
using std::uint32_t; | |
using std::uint8_t; | |
typedef DiagonalMatrix<float, -1, -1> DiagonalXf; | |
struct MnistImage { | |
std::array<uint8_t, 28*28> img; | |
int label; | |
}; | |
std::vector<MnistImage> loadMnist(const char *labelPath, const char *imagePath); | |
VectorXf sigmoid(const VectorXf &t) { | |
return 1.0f/(1.0f + (-t.array()).exp()); | |
} | |
DiagonalXf sigmoidDeriv(const VectorXf &t) { | |
auto exp = t.array().exp(); | |
return VectorXf(exp/((1.0f + exp)*(1.0f + exp))).asDiagonal(); | |
} | |
struct NeuralLayer | |
{ | |
MatrixXf w, wTmp; | |
NeuralLayer(int inputs, int outputs) | |
: w(MatrixXf::Random(outputs, inputs)), | |
wTmp(MatrixXf::Zero(outputs, inputs)) | |
{ | |
} | |
VectorXf eval(const VectorXf &x) const { | |
return sigmoid(w*x); | |
} | |
VectorXf propagate(const VectorXf &x, const VectorXf &d) { | |
VectorXf deriv = sigmoidDeriv(w*x)*d; | |
wTmp -= deriv*x.transpose(); | |
return deriv.transpose()*w; | |
} | |
void applyGradient() { | |
w += wTmp; | |
wTmp = MatrixXf::Zero(w.rows(), w.cols()); | |
} | |
}; | |
struct NeuralNetwork | |
{ | |
std::vector<NeuralLayer> layers; | |
NeuralNetwork(int inputs, int outputs, int hiddenNodes, int hiddenLayers) { | |
// Terminology here isn't quite right but this is a bit more convenient | |
layers.emplace_back(NeuralLayer(inputs, hiddenNodes)); | |
for (int i = 0; i < hiddenLayers; ++i) | |
layers.emplace_back(NeuralLayer(hiddenNodes, hiddenNodes)); | |
layers.emplace_back(NeuralLayer(hiddenNodes, outputs)); | |
} | |
VectorXf eval(VectorXf x) const { | |
for (size_t i = 0; i < layers.size(); ++i) | |
x = layers[i].eval(x); | |
return x; | |
} | |
void propagate(const VectorXf &x, VectorXf d) { | |
std::vector<VectorXf> inputs{{x}}; | |
for (size_t i = 0; i < layers.size() - 1; ++i) | |
inputs.emplace_back(layers[i].eval(inputs.back())); | |
for (int i = layers.size() - 1; i >= 0; --i) | |
d = layers[i].propagate(inputs[i], d); | |
} | |
void applyGradient() { | |
for (auto &l : layers) | |
l.applyGradient(); | |
} | |
}; | |
int main() { | |
auto mnist = loadMnist("nn/train-labels-idx1-ubyte", "nn/train-images-idx3-ubyte"); | |
auto test = loadMnist("nn/t10k-labels-idx1-ubyte", "nn/t10k-images-idx3-ubyte"); | |
NeuralNetwork nn(28*28, 10, 100, 6); | |
const int BatchSize = 100; | |
const float Eta = 1.0f/BatchSize; | |
int updates = 0; | |
for (int iter = 0; iter < 6; ++iter) { | |
std::vector<int> trainSequence(mnist.size()); | |
for (size_t i = 0; i < trainSequence.size(); ++i) | |
trainSequence[i] = i; | |
std::random_shuffle(trainSequence.begin(), trainSequence.end()); | |
int incorrect = 0; | |
for (auto k : trainSequence) { | |
VectorXf x(28*28); | |
for (int j = 0; j < 28*28; ++j) | |
x[j] = mnist[k].img[j]/255.0f; | |
VectorXf target = VectorXf::Zero(10); | |
target[mnist[k].label] = 1.0f; | |
VectorXf value = nn.eval(x); | |
VectorXf delta = value - target; | |
nn.propagate(x, 2.0f*delta*Eta); | |
VectorXf::Index classification; | |
value.maxCoeff(&classification); | |
if (classification != mnist[k].label) | |
incorrect++; | |
updates++; | |
if (updates == BatchSize) { | |
nn.applyGradient(); | |
updates = 0; | |
} | |
} | |
std::cout << "Iteration " << iter << " Training error: " << incorrect*100.0f/mnist.size() << "%" << std::endl; | |
} | |
int testIncorrect = 0; | |
for (size_t i = 0; i < test.size(); ++i) { | |
VectorXf x(28*28); | |
for (int j = 0; j < 28*28; ++j) | |
x[j] = test[i].img[j]/255.0f; | |
VectorXf::Index idx; | |
nn.eval(x).maxCoeff(&idx); | |
if (idx != test[i].label) | |
testIncorrect++; | |
} | |
std::cout << "Test error: " << testIncorrect*100.0f/test.size() << "%" << std::endl; | |
return 0; | |
} | |
uint32_t readIntBigEndian(std::istream &in) { | |
union { char c[4]; uint32_t i; }; | |
in.read(c, 4); | |
std::swap(c[0], c[3]); | |
std::swap(c[1], c[2]); | |
return i; | |
}; | |
std::vector<MnistImage> loadMnist(const char *labelPath, const char *imagePath) { | |
std::vector<MnistImage> result; | |
std::ifstream labelFile(labelPath, std::ios_base::binary); | |
std::ifstream imageFile(imagePath, std::ios_base::binary); | |
if (!labelFile.good() || !imageFile.good()) | |
return result; | |
readIntBigEndian(labelFile); | |
uint32_t n = readIntBigEndian(labelFile); | |
auto labels = std::unique_ptr<uint8_t[]>(new uint8_t[n]); | |
labelFile.read(reinterpret_cast<char *>(labels.get()), n); | |
readIntBigEndian(imageFile); | |
n = readIntBigEndian(imageFile); | |
uint32_t w = readIntBigEndian(imageFile); | |
uint32_t h = readIntBigEndian(imageFile); | |
for (uint32_t i = 0; i < n; ++i) { | |
MnistImage digit; | |
imageFile.read(reinterpret_cast<char *>(&digit.img[0]), w*h); | |
digit.label = labels[i]; | |
result.emplace_back(digit); | |
} | |
return std::move(result); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment