Skip to content

Instantly share code, notes, and snippets.

@prufrock
Last active June 5, 2020 18:28
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save prufrock/f1f3e5ba6e28eea817d92fa82d3d7452 to your computer and use it in GitHub Desktop.
Save prufrock/f1f3e5ba6e28eea817d92fa82d3d7452 to your computer and use it in GitHub Desktop.
I converted this Apple Metal tutorial, https://developer.apple.com/documentation/metal/basic_tasks_and_concepts/performing_calculations_on_a_gpu?preferredLanguage=occ, into Swift with some tweaks. This should technicall be "MetalAdder.playground" but GitHub doesn't seem to know what to make of the ".playground" extension. If you want to try it o…
import PlaygroundSupport
import MetalKit
// The length in Apple's example but it takes too long to build 2 buffers of that length
//let arrayLength: Int = 1 << 24
let arrayLength: Int = 1 << 20
let bufferSize = arrayLength * MemoryLayout<Float>.stride
class MetalAdder {
var device: MTLDevice
// The compute pipeline generated from the compute kernel in the .metal shader file.
var addFunctionPSO: MTLComputePipelineState
// The command queue used to pass commands to the device.
var commandQueue: MTLCommandQueue
//Buffers to hold data
var mBufferA: MTLBuffer?
var mBufferB: MTLBuffer?
var mBufferResult: MTLBuffer?
init(withDevice: MTLDevice) throws {
device = withDevice
let shader = """
#include <metal_stdlib>
using namespace metal;
/// This is a Metal Shading Language (MSL) function equivalent to the add_arrays() C function, used to perform the calculation on a GPU.
kernel void add_arrays(device const float* inA,
device const float* inB,
device float* result,
uint index [[thread_position_in_grid]])
{
// the for-loop is replaced with a collection of threads, each of which
// calls this function.
result[index] = inA[index] + inB[index];
}
"""
let library = try device.makeLibrary(source: shader, options: nil)
guard let addFunction = library.makeFunction(name: "add_arrays") else {
fatalError("Could not create the adder function")
}
// Create a compute pipeline state object
try addFunctionPSO = device.makeComputePipelineState(function: addFunction)
guard let _commandQueue = device.makeCommandQueue() else {
fatalError("Could not create command queue")
}
commandQueue = _commandQueue
}
func prepareData() {
// Allocate three buffers to hold our initial data and the result.
mBufferA = device.makeBuffer(length: bufferSize, options: .storageModeShared)
mBufferB = device.makeBuffer(length: bufferSize, options: .storageModeShared)
mBufferResult = device.makeBuffer(length: bufferSize, options: .storageModeShared)
generateRandomFloatData(buffer: mBufferA!)
generateRandomFloatData(buffer: mBufferB!)
}
func sendComputeCommand() {
// Create a command buffer to hold commands.
guard let commandBuffer = commandQueue.makeCommandBuffer() else {
fatalError("Could not create command buffer")
}
// Start a compute pass.
guard let computeEncoder = commandBuffer.makeComputeCommandEncoder() else {
fatalError("Could not create compute encoder")
}
encodeAddCommand(computeEncoder: computeEncoder)
//End the computer pass
commandBuffer.commit()
let startTime = CFAbsoluteTimeGetCurrent()
// Block until calcuation is complete
commandBuffer.waitUntilCompleted()
let timeElapsed = CFAbsoluteTimeGetCurrent() - startTime
print("Time elapsed for GPU calculation: \(timeElapsed) s.")
verifyResults()
}
func encodeAddCommand(computeEncoder:MTLComputeCommandEncoder) {
// Encode the pipeline state object and it's parameters
computeEncoder.setComputePipelineState(addFunctionPSO)
computeEncoder.setBuffer(mBufferA, offset: 0, index: 0)
computeEncoder.setBuffer(mBufferB, offset: 0, index: 1)
computeEncoder.setBuffer(mBufferResult, offset: 0, index: 2)
let gridSize = MTLSizeMake(arrayLength, 1, 1)
// Calculate a threadgroup size
var threadGroupSize = addFunctionPSO.maxTotalThreadsPerThreadgroup
if (threadGroupSize > arrayLength) {
threadGroupSize = arrayLength
}
let threadgroupSize = MTLSizeMake(threadGroupSize, 1, 1)
// Encode the compute command.
computeEncoder.dispatchThreads(gridSize, threadsPerThreadgroup: threadgroupSize)
computeEncoder.endEncoding()
}
func generateRandomFloatData(buffer:MTLBuffer) {
let dataPtr = buffer.contents().assumingMemoryBound(to: Float.self)
for index in 0...arrayLength {
dataPtr[Int(index)] = Float(arc4random())/Float(RAND_MAX)
}
}
func verifyResults() {
let a = mBufferA!.contents().assumingMemoryBound(to: Float.self)
let b = mBufferB!.contents().assumingMemoryBound(to: Float.self)
let result = mBufferResult!.contents().assumingMemoryBound(to: Float.self)
let startTime = CFAbsoluteTimeGetCurrent()
for index in 0..<arrayLength {
let total = a[index] + b[index]
if(result[index] != total) {
print("Compute error index=\(index) result=\(result[index]) \(total)=a+b")
assert(result[index] != total)
}
}
let timeElapsed = CFAbsoluteTimeGetCurrent() - startTime
print("Time elapsed for CPU calculation: \(timeElapsed) s.")
print("Compute results as expected")
}
}
guard let device = MTLCreateSystemDefaultDevice() else {
fatalError("GPU is not supported")
}
let adder = try MetalAdder(withDevice:device)
adder.prepareData()
adder.sendComputeCommand()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment