Created
January 16, 2019 17:34
-
-
Save xiejw/054a76d654a8d1ffef88c30d33d4e7a8 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import TensorFlow | |
import Python | |
let np = Python.import("numpy") | |
func readFile(_ filename: String) -> [UInt8] { | |
let d = Python.open(filename, "rb").read() | |
return Array(numpyArray: np.frombuffer(d, dtype: np.uint8))! | |
} | |
/// Reads MNIST images and labels from specified file paths. | |
func readMNIST(imagesFile: String, labelsFile: String) -> | |
(images: Tensor<Float>, labels: Tensor<Int32>) { | |
print("Reading data.") | |
let images = readFile(imagesFile).dropFirst(16).map { Float($0) } | |
let labels = readFile(labelsFile).dropFirst(8).map { Int32($0) } | |
let rowCount = Int32(labels.count) | |
let columnCount = Int32(images.count) / rowCount | |
print("Constructing data tensors.") | |
let imagesTensor = Tensor(shape: [rowCount, columnCount], scalars: images) / 255 | |
let labelsTensor = Tensor(labels) | |
return (imagesTensor.toAccelerator(), labelsTensor.toAccelerator()) | |
} | |
/// A fully connected layer. | |
struct LinearLayer: ParameterGroup, Differentiable { | |
var weight: Tensor<Float> | |
var bias: Tensor<Float> | |
public init(from inputSize: Int32, to outputSize: Int32) { | |
weight = Tensor<Float>(randomNormal: [inputSize, outputSize]) | |
bias = Tensor<Float>(zeros: [1, outputSize]) | |
} | |
func forward(_ input: Tensor<Float>) -> Tensor<Float> { | |
return matmul(input, weight) + bias | |
} | |
} | |
// A toy model for MNIST dataset. | |
struct Model: ParameterGroup, Differentiable { | |
var hiddenLayer = LinearLayer(from: 784, to: 30) | |
var outputLayer = LinearLayer(from: 30, to: 10) | |
func prediction(input image: Tensor<Float>) -> Tensor<Float> { | |
let z = hiddenLayer.forward(image) | |
let h = sigmoid(z) | |
let logits = outputLayer.forward(h) | |
return softmax(logits) | |
} | |
func loss(modelOutput pred: Tensor<Float>, target label: Tensor<Float>) -> Tensor<Float> { | |
let miniBatchSize = Tensor(Float(pred.shape[0])) | |
return Tensor(Float(-1)) * (label * log(pred)).sum() / miniBatchSize | |
// return divide(-sum(multiply(label, log(pred))), Tensor(Float(10))) | |
} | |
} | |
func numCorrectPredictions(modelOutput pred: Tensor<Float>, target label: Tensor<Int32>) -> Int32 { | |
let correctPredictions = Tensor<Int32>(pred.argmax(squeezingAxis: 1) .== label) | |
// TODO(xiejw): What's the S4TF way to reduce here? | |
return Int32(correctPredictions.sum())! | |
} | |
/// Train a MNIST classifier for the specified number of epochs. | |
func train(epochCount: Int32) { | |
var model = Model() | |
// Get training data. | |
let (images, numericLabels) = readMNIST(imagesFile: "train-images-idx3-ubyte", | |
labelsFile: "train-labels-idx1-ubyte") | |
let labels = Tensor<Float>(oneHotAtIndices: numericLabels, depth: 10) | |
let batchSize = Float(images.shape[0]) | |
print(labels.shape) | |
// Hyper-parameters. | |
let miniBatchSize: Int32 = 10 | |
let learningRate: Float = 0.2 | |
// Training loop. | |
let iterationCount = Int32(batchSize) / miniBatchSize | |
print("Begin training for \(epochCount) epochs.") | |
print("Mini batch size is \(miniBatchSize)") | |
print("Each epoch has \(iterationCount) iterations (final partial batch may be dropped).") | |
func minibatch<Scalar>(_ x: Tensor<Scalar>, index: Int32) -> Tensor<Scalar> { | |
let start = index * miniBatchSize | |
return x[start..<start+miniBatchSize] | |
} | |
for epoch in 0..<epochCount { | |
// Store number of correct/total guesses, used to print accuracy. | |
var correctGuesses: Int32 = 0 | |
var totalGuesses = 0 | |
var totalLoss = Float(0) | |
// TODO: Randomly sample minibatches using TensorFlow dataset APIs. | |
for i in 0..<iterationCount { | |
let images = minibatch(images, index: i) | |
let numericLabels = minibatch(numericLabels, index: i) | |
let labels = minibatch(labels, index: i) | |
var localLoss = Float(0) | |
let gradients = gradient(at: model) { model -> Tensor<Float> in | |
let prediction = model.prediction(input: images) | |
// correctGuesses = correctGuesses + numCorrectPredictions( | |
// modelOutput: prediction, target: numericLabels) | |
// correctGuesses += 1 | |
let loss = model.loss(modelOutput: prediction, target: labels) | |
localLoss = 1.0 // (loss * Tensor(Float(miniBatchSize))).scalarized() | |
// totalGuesses += loss * | |
return loss | |
} | |
model = model.moved( | |
along: -learningRate * model.tangentVector(from: gradients)) | |
totalGuesses += Int(miniBatchSize) | |
// totalLoss += loss * Float(miniBatchSize) | |
} | |
print(""" | |
Epoch \(epoch + 1): loss: \(totalLoss / Float(totalGuesses)); \ | |
accuracy: \(correctGuesses)/\(totalGuesses) \ | |
(\(Float(correctGuesses) / Float(totalGuesses))) | |
""") | |
} | |
} | |
// Start training. | |
train(epochCount: 1) | |
// train(epochCount: 20) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment