philipturner/s4tf-colab-condensed-tutorials.swift

## s4tf-colab-condensed-tutorials.swift
////////// model_training_walkthrough.ipynb
// ========================================================
import TensorFlow
import PythonKit

import Foundation
import FoundationNetworking
func download(from sourceString: String, to destinationString: String) {
    let source = URL(string: sourceString)!
    let destination = URL(fileURLWithPath: destinationString)
    let data = try! Data.init(contentsOf: source)
    try! data.write(to: destination)
}

let trainDataFilename = "iris_training.csv"
download(from: "http://download.tensorflow.org/data/iris_training.csv", to: trainDataFilename)

let f = Python.open(trainDataFilename)
for _ in 0..<5 {
    print(Python.next(f).strip())
}
print(f.close())

let featureNames = ["sepal_length", "sepal_width", "petal_length", "petal_width"]
let labelName = "species"
let columnNames = featureNames + [labelName]

// print("Features: \(featureNames)")
// print("Label: \(labelName)")

let classNames = ["Iris setosa", "Iris versicolor", "Iris virginica"]
let batchSize = 32

/// A batch of examples from the iris dataset.
struct IrisBatch {
    /// [batchSize, featureCount] tensor of features.
    let features: Tensor<Float>

    /// [batchSize] tensor of labels.
    let labels: Tensor<Int32>
}

/// Conform `IrisBatch` to `Collatable` so that we can load it into a `TrainingEpoch`.
extension IrisBatch: Collatable {
    public init<BatchSamples: Collection>(collating samples: BatchSamples)
        where BatchSamples.Element == Self {
        /// `IrisBatch`es are collated by stacking their feature and label tensors
        /// along the batch axis to produce a single feature and label tensor
        features = Tensor<Float>(stacking: samples.map{$0.features})
        labels = Tensor<Int32>(stacking: samples.map{$0.labels})
    }
}

/// Initialize an `IrisBatch` dataset from a CSV file.
func loadIrisDatasetFromCSV(
        contentsOf: String, hasHeader: Bool, featureColumns: [Int], labelColumns: [Int]) -> [IrisBatch] {
        let np = Python.import("numpy")

        let featuresNp = np.loadtxt(
            contentsOf,
            delimiter: ",",
            skiprows: hasHeader ? 1 : 0,
            usecols: featureColumns,
            dtype: Float.numpyScalarTypes.first!)
        guard let featuresTensor = Tensor<Float>(numpy: featuresNp) else {
            // This should never happen, because we construct featuresNp in such a
            // way that it should be convertible to tensor.
            fatalError("np.loadtxt result can't be converted to Tensor")
        }

        let labelsNp = np.loadtxt(
            contentsOf,
            delimiter: ",",
            skiprows: hasHeader ? 1 : 0,
            usecols: labelColumns,
            dtype: Int32.numpyScalarTypes.first!)
        guard let labelsTensor = Tensor<Int32>(numpy: labelsNp) else {
            // This should never happen, because we construct labelsNp in such a
            // way that it should be convertible to tensor.
            fatalError("np.loadtxt result can't be converted to Tensor")
        }

        return zip(featuresTensor.unstacked(), labelsTensor.unstacked()).map{IrisBatch(features: $0.0, labels: $0.1)}

    }

let trainingDataset: [IrisBatch] = loadIrisDatasetFromCSV(contentsOf: trainDataFilename,
                                                  hasHeader: true,
                                                  featureColumns: [0, 1, 2, 3],
                                                  labelColumns: [4])

let trainingEpochs: TrainingEpochs = TrainingEpochs(samples: trainingDataset, batchSize: batchSize)

let firstTrainEpoch = trainingEpochs.next()!
let firstTrainBatch = firstTrainEpoch.first!.collated
let firstTrainFeatures = firstTrainBatch.features
let firstTrainLabels = firstTrainBatch.labels

// print("First batch of features: \(firstTrainFeatures)")
// print("firstTrainFeatures.shape: \(firstTrainFeatures.shape)")
// print("First batch of labels: \(firstTrainLabels)")
// print("firstTrainLabels.shape: \(firstTrainLabels.shape)")

let firstTrainFeaturesTransposed = firstTrainFeatures.transposed()
let petalLengths = firstTrainFeaturesTransposed[2].scalars
let sepalLengths = firstTrainFeaturesTransposed[0].scalars

import TensorFlow

let hiddenSize: Int = 10
struct IrisModel: Layer {
    var layer1 = Dense<Float>(inputSize: 4, outputSize: hiddenSize, activation: relu)
    var layer2 = Dense<Float>(inputSize: hiddenSize, outputSize: hiddenSize, activation: relu)
    var layer3 = Dense<Float>(inputSize: hiddenSize, outputSize: 3)

    @differentiable(reverse)
    func callAsFunction(_ input: Tensor<Float>) -> Tensor<Float> {
        return input.sequenced(through: layer1, layer2, layer3)
    }
}

var model = IrisModel()

// Apply the model to a batch of features.
let firstTrainPredictions = model(firstTrainFeatures)
print(firstTrainPredictions[0..<5])
print(softmax(firstTrainPredictions[0..<5]))

print("Prediction: \(firstTrainPredictions.argmax(squeezingAxis: 1))")
print("    Labels: \(firstTrainLabels)")

let untrainedLogits = model(firstTrainFeatures)
let untrainedLoss = softmaxCrossEntropy(logits: untrainedLogits, labels: firstTrainLabels)
print("Loss test: \(untrainedLoss)")

let optimizer = SGD(for: model, learningRate: 0.01)
let (loss, grads) = valueWithGradient(at: model) { model -> Tensor<Float> in
    let logits = model(firstTrainFeatures)
    return softmaxCrossEntropy(logits: logits, labels: firstTrainLabels)
}
print("Current loss: \(loss)")
optimizer.update(&model, along: grads)

let logitsAfterOneStep = model(firstTrainFeatures)
let lossAfterOneStep = softmaxCrossEntropy(logits: logitsAfterOneStep, labels: firstTrainLabels)
print("Next loss: \(lossAfterOneStep)")

let epochCount = 500
var trainAccuracyResults: [Float] = []
var trainLossResults: [Float] = []

func accuracy(predictions: Tensor<Int32>, truths: Tensor<Int32>) -> Float {
    return Tensor<Float>(predictions .== truths).mean().scalarized()
}

for (epochIndex, epoch) in trainingEpochs.prefix(epochCount).enumerated() {
    var epochLoss: Float = 0
    var epochAccuracy: Float = 0
    var batchCount: Int = 0
    for batchSamples in epoch {
        let batch = batchSamples.collated
        let (loss, grad) = valueWithGradient(at: model) { (model: IrisModel) -> Tensor<Float> in
            let logits = model(batch.features)
            return softmaxCrossEntropy(logits: logits, labels: batch.labels)
        }
        optimizer.update(&model, along: grad)

        let logits = model(batch.features)
        epochAccuracy += accuracy(predictions: logits.argmax(squeezingAxis: 1), truths: batch.labels)
        epochLoss += loss.scalarized()
        batchCount += 1
    }
    epochAccuracy /= Float(batchCount)
    epochLoss /= Float(batchCount)
    trainAccuracyResults.append(epochAccuracy)
    trainLossResults.append(epochLoss)
    if epochIndex % 50 == 0 {
        print("Epoch \(epochIndex): Loss: \(epochLoss), Accuracy: \(epochAccuracy)")
    }
}

let testDataFilename = "iris_test.csv"
download(from: "http://download.tensorflow.org/data/iris_test.csv", to: testDataFilename)

let testDataset = loadIrisDatasetFromCSV(
    contentsOf: testDataFilename, hasHeader: true,
    featureColumns: [0, 1, 2, 3], labelColumns: [4]).inBatches(of: batchSize)

// NOTE: Only a single batch will run in the loop since the batchSize we're using is larger than the test set size
for batchSamples in testDataset {
    let batch = batchSamples.collated
    let logits = model(batch.features)
    let predictions = logits.argmax(squeezingAxis: 1)
    print("Test batch accuracy: \(accuracy(predictions: predictions, truths: batch.labels))")
}

let firstTestBatch = testDataset.first!.collated
let firstTestBatchLogits = model(firstTestBatch.features)
let firstTestBatchPredictions = firstTestBatchLogits.argmax(squeezingAxis: 1)

print(firstTestBatchPredictions)
print(firstTestBatch.labels)

let unlabeledDataset: Tensor<Float> =
    [[5.1, 3.3, 1.7, 0.5],
     [5.9, 3.0, 4.2, 1.5],
     [6.9, 3.1, 5.4, 2.1]]

let unlabeledDatasetPredictions = model(unlabeledDataset)

for i in 0..<unlabeledDatasetPredictions.shape[0] {
    let logits = unlabeledDatasetPredictions[i]
    let classIdx = logits.argmax().scalar!
    print("Example \(i) prediction: \(classNames[Int(classIdx)]) (\(softmax(logits)))")
}


////////// raw_tensorFlow_operators.ipynb
// ========================================================
//
// I don't know if this one works; I commented it out for some reason.
//

import TensorFlow
print(_Raw.mul(Tensor([2.0, 3.0]), Tensor([5.0, 6.0])))

infix operator .* : MultiplicationPrecedence

extension Tensor where Scalar: Numeric {
    @differentiable(reverse where Scalar: TensorFlowFloatingPoint)
    static func .* (_ lhs: Tensor, _ rhs: Tensor) -> Tensor {
        return _Raw.mul(lhs, rhs)
    }
}

extension Tensor where Scalar: TensorFlowFloatingPoint {
    @derivative(of: .*)
    static func multiplyDerivative(
        _ lhs: Tensor, _ rhs: Tensor
    ) -> (value: Tensor, pullback: (Tensor) -> (Tensor, Tensor)) {
        return (lhs * rhs, { v in
            ((rhs * v).unbroadcasted(to: lhs.shape),
            (lhs * v).unbroadcasted(to: rhs.shape))
        })
    }
}

let x: Tensor<Double> = [[1.0, 2.0], [3.0, 4.0]]
let y: Tensor<Double> = [[8.0, 7.0], [6.0, 5.0]]
print(x .* y)
print(gradient(at: x, y) { x, y in
    (x .* y).sum()
})

// this isn't a separate tutorial, but run it as its own script anyway
// ========================================================

import TensorFlow

let matrix = Tensor<Float>([[1, 2], [3, 4]])

print(_Raw.matMul(matrix, matrix, transposeA: true, transposeB: true))
print(_Raw.matMul(matrix, matrix, transposeA: true, transposeB: false))
print(_Raw.matMul(matrix, matrix, transposeA: false, transposeB: true))
print(_Raw.matMul(matrix, matrix, transposeA: false, transposeB: false))
	////////// model_training_walkthrough.ipynb
	// ========================================================
	import TensorFlow
	import PythonKit

	import Foundation
	import FoundationNetworking
	func download(from sourceString: String, to destinationString: String) {
	let source = URL(string: sourceString)!
	let destination = URL(fileURLWithPath: destinationString)
	let data = try! Data.init(contentsOf: source)
	try! data.write(to: destination)
	}

	let trainDataFilename = "iris_training.csv"
	download(from: "http://download.tensorflow.org/data/iris_training.csv", to: trainDataFilename)

	let f = Python.open(trainDataFilename)
	for _ in 0..<5 {
	print(Python.next(f).strip())
	}
	print(f.close())

	let featureNames = ["sepal_length", "sepal_width", "petal_length", "petal_width"]
	let labelName = "species"
	let columnNames = featureNames + [labelName]

	// print("Features: \(featureNames)")
	// print("Label: \(labelName)")

	let classNames = ["Iris setosa", "Iris versicolor", "Iris virginica"]
	let batchSize = 32

	/// A batch of examples from the iris dataset.
	struct IrisBatch {
	/// [batchSize, featureCount] tensor of features.
	let features: Tensor<Float>

	/// [batchSize] tensor of labels.
	let labels: Tensor<Int32>
	}

	/// Conform `IrisBatch` to `Collatable` so that we can load it into a `TrainingEpoch`.
	extension IrisBatch: Collatable {
	public init<BatchSamples: Collection>(collating samples: BatchSamples)
	where BatchSamples.Element == Self {
	/// `IrisBatch`es are collated by stacking their feature and label tensors
	/// along the batch axis to produce a single feature and label tensor
	features = Tensor<Float>(stacking: samples.map{$0.features})
	labels = Tensor<Int32>(stacking: samples.map{$0.labels})
	}
	}

	/// Initialize an `IrisBatch` dataset from a CSV file.
	func loadIrisDatasetFromCSV(
	contentsOf: String, hasHeader: Bool, featureColumns: [Int], labelColumns: [Int]) -> [IrisBatch] {
	let np = Python.import("numpy")

	let featuresNp = np.loadtxt(
	contentsOf,
	delimiter: ",",
	skiprows: hasHeader ? 1 : 0,
	usecols: featureColumns,
	dtype: Float.numpyScalarTypes.first!)
	guard let featuresTensor = Tensor<Float>(numpy: featuresNp) else {
	// This should never happen, because we construct featuresNp in such a
	// way that it should be convertible to tensor.
	fatalError("np.loadtxt result can't be converted to Tensor")
	}

	let labelsNp = np.loadtxt(
	contentsOf,
	delimiter: ",",
	skiprows: hasHeader ? 1 : 0,
	usecols: labelColumns,
	dtype: Int32.numpyScalarTypes.first!)
	guard let labelsTensor = Tensor<Int32>(numpy: labelsNp) else {
	// This should never happen, because we construct labelsNp in such a
	// way that it should be convertible to tensor.
	fatalError("np.loadtxt result can't be converted to Tensor")
	}

	return zip(featuresTensor.unstacked(), labelsTensor.unstacked()).map{IrisBatch(features: $0.0, labels: $0.1)}

	}

	let trainingDataset: [IrisBatch] = loadIrisDatasetFromCSV(contentsOf: trainDataFilename,
	hasHeader: true,
	featureColumns: [0, 1, 2, 3],
	labelColumns: [4])

	let trainingEpochs: TrainingEpochs = TrainingEpochs(samples: trainingDataset, batchSize: batchSize)

	let firstTrainEpoch = trainingEpochs.next()!
	let firstTrainBatch = firstTrainEpoch.first!.collated
	let firstTrainFeatures = firstTrainBatch.features
	let firstTrainLabels = firstTrainBatch.labels

	// print("First batch of features: \(firstTrainFeatures)")
	// print("firstTrainFeatures.shape: \(firstTrainFeatures.shape)")
	// print("First batch of labels: \(firstTrainLabels)")
	// print("firstTrainLabels.shape: \(firstTrainLabels.shape)")

	let firstTrainFeaturesTransposed = firstTrainFeatures.transposed()
	let petalLengths = firstTrainFeaturesTransposed[2].scalars
	let sepalLengths = firstTrainFeaturesTransposed[0].scalars

	import TensorFlow

	let hiddenSize: Int = 10
	struct IrisModel: Layer {
	var layer1 = Dense<Float>(inputSize: 4, outputSize: hiddenSize, activation: relu)
	var layer2 = Dense<Float>(inputSize: hiddenSize, outputSize: hiddenSize, activation: relu)
	var layer3 = Dense<Float>(inputSize: hiddenSize, outputSize: 3)

	@differentiable(reverse)
	func callAsFunction(_ input: Tensor<Float>) -> Tensor<Float> {
	return input.sequenced(through: layer1, layer2, layer3)
	}
	}

	var model = IrisModel()

	// Apply the model to a batch of features.
	let firstTrainPredictions = model(firstTrainFeatures)
	print(firstTrainPredictions[0..<5])
	print(softmax(firstTrainPredictions[0..<5]))

	print("Prediction: \(firstTrainPredictions.argmax(squeezingAxis: 1))")
	print(" Labels: \(firstTrainLabels)")

	let untrainedLogits = model(firstTrainFeatures)
	let untrainedLoss = softmaxCrossEntropy(logits: untrainedLogits, labels: firstTrainLabels)
	print("Loss test: \(untrainedLoss)")

	let optimizer = SGD(for: model, learningRate: 0.01)
	let (loss, grads) = valueWithGradient(at: model) { model -> Tensor<Float> in
	let logits = model(firstTrainFeatures)
	return softmaxCrossEntropy(logits: logits, labels: firstTrainLabels)
	}
	print("Current loss: \(loss)")
	optimizer.update(&model, along: grads)

	let logitsAfterOneStep = model(firstTrainFeatures)
	let lossAfterOneStep = softmaxCrossEntropy(logits: logitsAfterOneStep, labels: firstTrainLabels)
	print("Next loss: \(lossAfterOneStep)")

	let epochCount = 500
	var trainAccuracyResults: [Float] = []
	var trainLossResults: [Float] = []

	func accuracy(predictions: Tensor<Int32>, truths: Tensor<Int32>) -> Float {
	return Tensor<Float>(predictions .== truths).mean().scalarized()
	}

	for (epochIndex, epoch) in trainingEpochs.prefix(epochCount).enumerated() {
	var epochLoss: Float = 0
	var epochAccuracy: Float = 0
	var batchCount: Int = 0
	for batchSamples in epoch {
	let batch = batchSamples.collated
	let (loss, grad) = valueWithGradient(at: model) { (model: IrisModel) -> Tensor<Float> in
	let logits = model(batch.features)
	return softmaxCrossEntropy(logits: logits, labels: batch.labels)
	}
	optimizer.update(&model, along: grad)

	let logits = model(batch.features)
	epochAccuracy += accuracy(predictions: logits.argmax(squeezingAxis: 1), truths: batch.labels)
	epochLoss += loss.scalarized()
	batchCount += 1
	}
	epochAccuracy /= Float(batchCount)
	epochLoss /= Float(batchCount)
	trainAccuracyResults.append(epochAccuracy)
	trainLossResults.append(epochLoss)
	if epochIndex % 50 == 0 {
	print("Epoch \(epochIndex): Loss: \(epochLoss), Accuracy: \(epochAccuracy)")
	}
	}

	let testDataFilename = "iris_test.csv"
	download(from: "http://download.tensorflow.org/data/iris_test.csv", to: testDataFilename)

	let testDataset = loadIrisDatasetFromCSV(
	contentsOf: testDataFilename, hasHeader: true,
	featureColumns: [0, 1, 2, 3], labelColumns: [4]).inBatches(of: batchSize)

	// NOTE: Only a single batch will run in the loop since the batchSize we're using is larger than the test set size
	for batchSamples in testDataset {
	let batch = batchSamples.collated
	let logits = model(batch.features)
	let predictions = logits.argmax(squeezingAxis: 1)
	print("Test batch accuracy: \(accuracy(predictions: predictions, truths: batch.labels))")
	}

	let firstTestBatch = testDataset.first!.collated
	let firstTestBatchLogits = model(firstTestBatch.features)
	let firstTestBatchPredictions = firstTestBatchLogits.argmax(squeezingAxis: 1)

	print(firstTestBatchPredictions)
	print(firstTestBatch.labels)

	let unlabeledDataset: Tensor<Float> =
	[[5.1, 3.3, 1.7, 0.5],
	[5.9, 3.0, 4.2, 1.5],
	[6.9, 3.1, 5.4, 2.1]]

	let unlabeledDatasetPredictions = model(unlabeledDataset)

	for i in 0..<unlabeledDatasetPredictions.shape[0] {
	let logits = unlabeledDatasetPredictions[i]
	let classIdx = logits.argmax().scalar!
	print("Example \(i) prediction: \(classNames[Int(classIdx)]) (\(softmax(logits)))")
	}


	////////// raw_tensorFlow_operators.ipynb
	// ========================================================
	//
	// I don't know if this one works; I commented it out for some reason.
	//

	import TensorFlow
	print(_Raw.mul(Tensor([2.0, 3.0]), Tensor([5.0, 6.0])))

	infix operator .* : MultiplicationPrecedence

	extension Tensor where Scalar: Numeric {
	@differentiable(reverse where Scalar: TensorFlowFloatingPoint)
	static func .* (_ lhs: Tensor, _ rhs: Tensor) -> Tensor {
	return _Raw.mul(lhs, rhs)
	}
	}

	extension Tensor where Scalar: TensorFlowFloatingPoint {
	@derivative(of: .*)
	static func multiplyDerivative(
	_ lhs: Tensor, _ rhs: Tensor
	) -> (value: Tensor, pullback: (Tensor) -> (Tensor, Tensor)) {
	return (lhs * rhs, { v in
	((rhs * v).unbroadcasted(to: lhs.shape),
	(lhs * v).unbroadcasted(to: rhs.shape))
	})
	}
	}

	let x: Tensor<Double> = [[1.0, 2.0], [3.0, 4.0]]
	let y: Tensor<Double> = [[8.0, 7.0], [6.0, 5.0]]
	print(x .* y)
	print(gradient(at: x, y) { x, y in
	(x .* y).sum()
	})

	// this isn't a separate tutorial, but run it as its own script anyway
	// ========================================================

	import TensorFlow

	let matrix = Tensor<Float>([[1, 2], [3, 4]])

	print(_Raw.matMul(matrix, matrix, transposeA: true, transposeB: true))
	print(_Raw.matMul(matrix, matrix, transposeA: true, transposeB: false))
	print(_Raw.matMul(matrix, matrix, transposeA: false, transposeB: true))
	print(_Raw.matMul(matrix, matrix, transposeA: false, transposeB: false))