xiejw/gist:054a76d654a8d1ffef88c30d33d4e7a8

## gistfile1.txt
import TensorFlow
import Python

let np = Python.import("numpy")

func readFile(_ filename: String) -> [UInt8] {
    let d = Python.open(filename, "rb").read()
    return Array(numpyArray: np.frombuffer(d, dtype: np.uint8))!
}

/// Reads MNIST images and labels from specified file paths.
func readMNIST(imagesFile: String, labelsFile: String) ->
        (images: Tensor<Float>, labels: Tensor<Int32>) {
    print("Reading data.")

    let images = readFile(imagesFile).dropFirst(16).map { Float($0) }
    let labels = readFile(labelsFile).dropFirst(8).map { Int32($0) }
    let rowCount = Int32(labels.count)
    let columnCount = Int32(images.count) / rowCount

    print("Constructing data tensors.")
    let imagesTensor = Tensor(shape: [rowCount, columnCount], scalars: images) / 255
    let labelsTensor = Tensor(labels)
    return (imagesTensor.toAccelerator(), labelsTensor.toAccelerator())
}


/// A fully connected layer.
struct LinearLayer: ParameterGroup, Differentiable {
    var weight: Tensor<Float>
    var bias: Tensor<Float>

    public init(from inputSize: Int32, to outputSize: Int32) {
        weight = Tensor<Float>(randomNormal: [inputSize, outputSize])
        bias = Tensor<Float>(zeros: [1, outputSize])
    }

    func forward(_ input: Tensor<Float>) -> Tensor<Float> {
        return matmul(input, weight) + bias
    }
}

// A toy model for MNIST dataset.
struct Model: ParameterGroup, Differentiable {
   var hiddenLayer = LinearLayer(from: 784, to: 30)
   var outputLayer = LinearLayer(from: 30, to: 10)

   func prediction(input image: Tensor<Float>) -> Tensor<Float> {
       let z = hiddenLayer.forward(image)
       let h = sigmoid(z)
       let logits = outputLayer.forward(h)
       return softmax(logits)
   }

   func loss(modelOutput pred: Tensor<Float>, target label: Tensor<Float>) -> Tensor<Float> {
     let miniBatchSize = Tensor(Float(pred.shape[0]))
     return Tensor(Float(-1)) * (label * log(pred)).sum() / miniBatchSize
       // return divide(-sum(multiply(label, log(pred))), Tensor(Float(10)))
   }

}

   func numCorrectPredictions(modelOutput pred: Tensor<Float>, target label: Tensor<Int32>) -> Int32 {
     let correctPredictions = Tensor<Int32>(pred.argmax(squeezingAxis: 1) .== label)
     // TODO(xiejw): What's the S4TF way to reduce here?
     return Int32(correctPredictions.sum())!
   }


/// Train a MNIST classifier for the specified number of epochs.
func train(epochCount: Int32) {
  var model = Model()
  // Get training data.
  let (images, numericLabels) = readMNIST(imagesFile: "train-images-idx3-ubyte",
                                          labelsFile: "train-labels-idx1-ubyte")
  let labels = Tensor<Float>(oneHotAtIndices: numericLabels, depth: 10)
  let batchSize = Float(images.shape[0])

  print(labels.shape)

  // Hyper-parameters.
  let miniBatchSize: Int32 = 10
  let learningRate: Float = 0.2

  // Training loop.
  let iterationCount = Int32(batchSize) / miniBatchSize
  print("Begin training for \(epochCount) epochs.")
  print("Mini batch size is \(miniBatchSize)")
  print("Each epoch has \(iterationCount) iterations (final partial batch may be dropped).")

  func minibatch<Scalar>(_ x: Tensor<Scalar>, index: Int32) -> Tensor<Scalar> {
    let start = index * miniBatchSize
    return x[start..<start+miniBatchSize]
  }

  for epoch in 0..<epochCount {
    // Store number of correct/total guesses, used to print accuracy.
    var correctGuesses: Int32 = 0
    var totalGuesses = 0
    var totalLoss = Float(0)

    // TODO: Randomly sample minibatches using TensorFlow dataset APIs.
    for i in 0..<iterationCount {
      let images = minibatch(images, index: i)
      let numericLabels = minibatch(numericLabels, index: i)
      let labels = minibatch(labels, index: i)
    var localLoss = Float(0)

      let gradients = gradient(at: model) { model -> Tensor<Float> in
        let prediction = model.prediction(input: images)
        // correctGuesses = correctGuesses + numCorrectPredictions(
        //    modelOutput: prediction, target: numericLabels)
        // correctGuesses += 1
        let loss = model.loss(modelOutput: prediction, target: labels)
        localLoss = 1.0 // (loss * Tensor(Float(miniBatchSize))).scalarized()
        // totalGuesses += loss *
        return loss
      }

      model = model.moved(
        along: -learningRate * model.tangentVector(from: gradients))


               totalGuesses += Int(miniBatchSize)
          //      totalLoss += loss * Float(miniBatchSize)
    }

    print("""
          Epoch \(epoch + 1): loss: \(totalLoss / Float(totalGuesses)); \
          accuracy: \(correctGuesses)/\(totalGuesses) \
          (\(Float(correctGuesses) / Float(totalGuesses)))
          """)
  }
}

// Start training.
train(epochCount: 1)
// train(epochCount: 20)
	import TensorFlow
	import Python

	let np = Python.import("numpy")

	func readFile(_ filename: String) -> [UInt8] {
	let d = Python.open(filename, "rb").read()
	return Array(numpyArray: np.frombuffer(d, dtype: np.uint8))!
	}

	/// Reads MNIST images and labels from specified file paths.
	func readMNIST(imagesFile: String, labelsFile: String) ->
	(images: Tensor<Float>, labels: Tensor<Int32>) {
	print("Reading data.")

	let images = readFile(imagesFile).dropFirst(16).map { Float($0) }
	let labels = readFile(labelsFile).dropFirst(8).map { Int32($0) }
	let rowCount = Int32(labels.count)
	let columnCount = Int32(images.count) / rowCount

	print("Constructing data tensors.")
	let imagesTensor = Tensor(shape: [rowCount, columnCount], scalars: images) / 255
	let labelsTensor = Tensor(labels)
	return (imagesTensor.toAccelerator(), labelsTensor.toAccelerator())
	}


	/// A fully connected layer.
	struct LinearLayer: ParameterGroup, Differentiable {
	var weight: Tensor<Float>
	var bias: Tensor<Float>

	public init(from inputSize: Int32, to outputSize: Int32) {
	weight = Tensor<Float>(randomNormal: [inputSize, outputSize])
	bias = Tensor<Float>(zeros: [1, outputSize])
	}

	func forward(_ input: Tensor<Float>) -> Tensor<Float> {
	return matmul(input, weight) + bias
	}
	}

	// A toy model for MNIST dataset.
	struct Model: ParameterGroup, Differentiable {
	var hiddenLayer = LinearLayer(from: 784, to: 30)
	var outputLayer = LinearLayer(from: 30, to: 10)

	func prediction(input image: Tensor<Float>) -> Tensor<Float> {
	let z = hiddenLayer.forward(image)
	let h = sigmoid(z)
	let logits = outputLayer.forward(h)
	return softmax(logits)
	}

	func loss(modelOutput pred: Tensor<Float>, target label: Tensor<Float>) -> Tensor<Float> {
	let miniBatchSize = Tensor(Float(pred.shape[0]))
	return Tensor(Float(-1)) * (label * log(pred)).sum() / miniBatchSize
	// return divide(-sum(multiply(label, log(pred))), Tensor(Float(10)))
	}

	}

	func numCorrectPredictions(modelOutput pred: Tensor<Float>, target label: Tensor<Int32>) -> Int32 {
	let correctPredictions = Tensor<Int32>(pred.argmax(squeezingAxis: 1) .== label)
	// TODO(xiejw): What's the S4TF way to reduce here?
	return Int32(correctPredictions.sum())!
	}


	/// Train a MNIST classifier for the specified number of epochs.
	func train(epochCount: Int32) {
	var model = Model()
	// Get training data.
	let (images, numericLabels) = readMNIST(imagesFile: "train-images-idx3-ubyte",
	labelsFile: "train-labels-idx1-ubyte")
	let labels = Tensor<Float>(oneHotAtIndices: numericLabels, depth: 10)
	let batchSize = Float(images.shape[0])

	print(labels.shape)

	// Hyper-parameters.
	let miniBatchSize: Int32 = 10
	let learningRate: Float = 0.2

	// Training loop.
	let iterationCount = Int32(batchSize) / miniBatchSize
	print("Begin training for \(epochCount) epochs.")
	print("Mini batch size is \(miniBatchSize)")
	print("Each epoch has \(iterationCount) iterations (final partial batch may be dropped).")

	func minibatch<Scalar>(_ x: Tensor<Scalar>, index: Int32) -> Tensor<Scalar> {
	let start = index * miniBatchSize
	return x[start..<start+miniBatchSize]
	}

	for epoch in 0..<epochCount {
	// Store number of correct/total guesses, used to print accuracy.
	var correctGuesses: Int32 = 0
	var totalGuesses = 0
	var totalLoss = Float(0)

	// TODO: Randomly sample minibatches using TensorFlow dataset APIs.
	for i in 0..<iterationCount {
	let images = minibatch(images, index: i)
	let numericLabels = minibatch(numericLabels, index: i)
	let labels = minibatch(labels, index: i)
	var localLoss = Float(0)

	let gradients = gradient(at: model) { model -> Tensor<Float> in
	let prediction = model.prediction(input: images)
	// correctGuesses = correctGuesses + numCorrectPredictions(
	// modelOutput: prediction, target: numericLabels)
	// correctGuesses += 1
	let loss = model.loss(modelOutput: prediction, target: labels)
	localLoss = 1.0 // (loss * Tensor(Float(miniBatchSize))).scalarized()
	// totalGuesses += loss *
	return loss
	}

	model = model.moved(
	along: -learningRate * model.tangentVector(from: gradients))


	totalGuesses += Int(miniBatchSize)
	// totalLoss += loss * Float(miniBatchSize)
	}

	print("""
	Epoch \(epoch + 1): loss: \(totalLoss / Float(totalGuesses)); \
	accuracy: \(correctGuesses)/\(totalGuesses) \
	(\(Float(correctGuesses) / Float(totalGuesses)))
	""")
	}
	}

	// Start training.
	train(epochCount: 1)
	// train(epochCount: 20)