Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save JacopoMangiavacchi/500e4d328231263efd1b425483264164 to your computer and use it in GitHub Desktop.
Save JacopoMangiavacchi/500e4d328231263efd1b425483264164 to your computer and use it in GitHub Desktop.
private func readDataSet(fileName: String, updateStatus: @escaping (Int) -> Void) -> ([Float], [Float]) { //}(MLCTensor, MLCTensor) {
guard let filePath = Bundle.main.path(forResource: fileName, ofType: "csv") else {
fatalError("CSV file not found")
}
let serialQueue = DispatchQueue(label: "MNIST.serial.queue.\(fileName)")
var count = 0
var X = [Float]()
var Y = [Float]()
let iterations = 20
var iteration = 0
var iterationList = Array<Array<String>>(repeating: Array<String>(), count: iterations)
getFileLine(filePath: filePath) { line in
iterationList[iteration].append(line)
iteration = (iteration + 1) % iterations
}
DispatchQueue.concurrentPerform(iterations: iterations) { iteration in
for line in iterationList[iteration] {
let sample = line.split(separator: ",").compactMap({Int($0)})
serialQueue.sync {
Y.append(contentsOf: oneHotEncoding(sample[0]))
X.append(contentsOf: sample[1...self.imageSize].map{Float($0) / Float(255.0)})
count += 1
updateStatus(count)
}
}
}
return (X, Y)
}
public func asyncPrepareData() {
trainingBatchCount = 0
testBatchCount = 0
dataPreparing = true
var trainPrepared = false
var testPrepared = false
concurrentQueue.async {
let (X, Y) = self.readDataSet(fileName: "mnist_train") { count in
DispatchQueue.main.async {
self.trainingBatchCount = count
}
}
DispatchQueue.main.async {
self.trainingBatchCount = X.count / self.imageSize
self.trainingDataX = X
self.trainingDataY = Y
trainPrepared = true
if testPrepared {
self.dataPreparing = false
}
}
}
concurrentQueue.async {
let (X, Y) = self.readDataSet(fileName: "mnist_test") { count in
DispatchQueue.main.async {
self.testBatchCount = count
}
}
DispatchQueue.main.async {
self.testBatchCount = X.count / self.imageSize
self.testDataX = X
self.testDataY = Y
testPrepared = true
if trainPrepared {
self.dataPreparing = false
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment