A Sample code for computing sigmoid function with Metal
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
let input = (0...10000).map(Float.init) | |
print(try sigmoid_on_gpu(input)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import Metal | |
import MetalPerformanceShaders | |
let device = MTLCreateSystemDefaultDevice()! | |
let commandQueue = device.makeCommandQueue()! | |
let defaultLibrary = device.makeDefaultLibrary()! // Loads Shaders.metal | |
// refere from: https://memkite.com/blog/2014/12/15/data-parallel-programming-with-metal-and-swift-for-iphoneipad-gpu/index.html | |
func sigmoid_on_gpu(_ input: [Float]) throws -> [Float] { | |
var input = input | |
let commandBuffer = commandQueue.makeCommandBuffer()! | |
let computeCommandEncoder = commandBuffer.makeComputeCommandEncoder()! | |
let sigmoidFunction = defaultLibrary.makeFunction(name: "sigmoid")! | |
let computePipelineFilter = try device.makeComputePipelineState(function: sigmoidFunction) | |
computeCommandEncoder.setComputePipelineState(computePipelineFilter) | |
let inputByteLength = input.count*MemoryLayout<Float>.size | |
let inVectorBuffer = device.makeBuffer(bytes: &input, length: inputByteLength, options: []) | |
computeCommandEncoder.setBuffer(inVectorBuffer, offset: 0, index: 0) | |
var resultdata = [Float].init(repeating: 0, count: input.count) | |
let outVectorBuffer = device.makeBuffer(bytes: &resultdata, length: inputByteLength, options: []) | |
computeCommandEncoder.setBuffer(outVectorBuffer, offset: 0, index: 1) | |
let threadsPerGroup = MTLSize(width:32,height:1,depth:1) | |
let numThreadgroups = MTLSize(width:(input.count+31)/32, height:1, depth:1) | |
computeCommandEncoder.dispatchThreadgroups(numThreadgroups, threadsPerThreadgroup: threadsPerGroup) | |
computeCommandEncoder.endEncoding() | |
commandBuffer.commit() | |
commandBuffer.waitUntilCompleted() | |
let data = NSData(bytesNoCopy: outVectorBuffer!.contents(), length: input.count*MemoryLayout<Float>.size, freeWhenDone: false) | |
var output = [Float].init(repeating: 0, count: input.count) | |
data.getBytes(&output, length:input.count * MemoryLayout<Float>.size) | |
return output | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment