Skip to content

Instantly share code, notes, and snippets.

@noppoMan

noppoMan/Shaders.metal

Last active May 30, 2020
Embed
What would you like to do?
A Sample code for computing sigmoid function with Metal
let input = (0...10000).map(Float.init)
print(try sigmoid_on_gpu(input))
// This is Metal Shading Language Specification file.
// More Detail: https://developer.apple.com/metal/Metal-Shading-Language-Specification.pdf
#include <metal_stdlib>
using namespace metal;
kernel void sigmoid(const device float *inVector [[ buffer(0) ]],
device float *outVector [[ buffer(1) ]],
uint id [[ thread_position_in_grid ]]) {
// This calculates sigmoid for _one_ position (=id) in a vector per call on the GPU
outVector[id] = 1.0 / (1.0 + exp(-inVector[id]));
}
import Metal
import MetalPerformanceShaders
let device = MTLCreateSystemDefaultDevice()!
let commandQueue = device.makeCommandQueue()!
let defaultLibrary = device.makeDefaultLibrary()! // Loads Shaders.metal
// refere from: https://memkite.com/blog/2014/12/15/data-parallel-programming-with-metal-and-swift-for-iphoneipad-gpu/index.html
func sigmoid_on_gpu(_ input: [Float]) throws -> [Float] {
var input = input
let commandBuffer = commandQueue.makeCommandBuffer()!
let computeCommandEncoder = commandBuffer.makeComputeCommandEncoder()!
let sigmoidFunction = defaultLibrary.makeFunction(name: "sigmoid")!
let computePipelineFilter = try device.makeComputePipelineState(function: sigmoidFunction)
computeCommandEncoder.setComputePipelineState(computePipelineFilter)
let inputByteLength = input.count*MemoryLayout<Float>.size
let inVectorBuffer = device.makeBuffer(bytes: &input, length: inputByteLength, options: [])
computeCommandEncoder.setBuffer(inVectorBuffer, offset: 0, index: 0)
var resultdata = [Float].init(repeating: 0, count: input.count)
let outVectorBuffer = device.makeBuffer(bytes: &resultdata, length: inputByteLength, options: [])
computeCommandEncoder.setBuffer(outVectorBuffer, offset: 0, index: 1)
let threadsPerGroup = MTLSize(width:32,height:1,depth:1)
let numThreadgroups = MTLSize(width:(input.count+31)/32, height:1, depth:1)
computeCommandEncoder.dispatchThreadgroups(numThreadgroups, threadsPerThreadgroup: threadsPerGroup)
computeCommandEncoder.endEncoding()
commandBuffer.commit()
commandBuffer.waitUntilCompleted()
let data = NSData(bytesNoCopy: outVectorBuffer!.contents(), length: input.count*MemoryLayout<Float>.size, freeWhenDone: false)
var output = [Float].init(repeating: 0, count: input.count)
data.getBytes(&output, length:input.count * MemoryLayout<Float>.size)
return output
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment