Skip to content

Instantly share code, notes, and snippets.

@lukaszmargielewski
Last active March 11, 2021 15:04
Show Gist options
  • Save lukaszmargielewski/0a3b16d4661dd7d7e00d to your computer and use it in GitHub Desktop.
Save lukaszmargielewski/0a3b16d4661dd7d7e00d to your computer and use it in GitHub Desktop.
iOS Metal client side code for compute pipeline
// For stackoverflow question:
// http://stackoverflow.com/questions/30445801/ios8-metal-compute-pipeline-slower-than-cpu
// I. CODE INVOKED ONCE, AT THE INIT STAGE:
_mtlCharTable = [_mtlDevice newBufferWithBytes:_charTable->pointer length:_charTable->bytesTotal options:0];
_mtlSearchMasks = [_mtlDevice newBufferWithBytesNoCopy:_searchIndexes.mask length:_searchIndexes.bytesTotalMask options:0 deallocator:nil];
//iPhone 6: maxTotalThreadsPerThreadgroup: 512, threadExecutionWidth: 32
uint threadsPerThreadGroup = [_mtlComputePipelineState maxTotalThreadsPerThreadgroup];// / 4.0;
m_ThreadgroupSize = MTLSizeMake(threadsPerThreadGroup, 1, 1);
// Calculate the compute kernel's width and height
NSUInteger nThreadCount = (_charTable->rowCount + m_ThreadgroupSize.width - 1) / m_ThreadgroupSize.width;
// Set the compute kernel's thread count
m_ThreadgroupCount = MTLSizeMake(nThreadCount, 1, 1);
///////////////////////////////////////////////////////////////////
// II. SEARCH CODE (Invoked every time search phrase changes):
dispatch_semaphore_wait(m_InflightSemaphore, DISPATCH_TIME_FOREVER);
uint64_t ts = mach_absolute_time();
const uint sPhraseLenght = (uint)searchPhrase.length;
const uint charsPerRow = (uint)_charTable->charsPerRow;
const uint rowCount = (uint)_charTable->rowCount;
const char *sPhrase = [searchPhrase.lowercaseString UTF8String];
[_mtlCommandQueue insertDebugCaptureBoundary];
id<MTLCommandBuffer> commandBuffer = [_mtlCommandQueue commandBuffer];
id<MTLComputeCommandEncoder> commandEncoder = [commandBuffer computeCommandEncoder];
[commandEncoder setComputePipelineState:_mtlComputePipelineState];
[commandEncoder setBuffer:_mtlCharTable offset:0 atIndex:0];
[commandEncoder setBuffer:_mtlSearchMasks offset:0 atIndex:5];
[commandEncoder setBytes:&charsPerRow length:sizeof(uint) atIndex:1];
[commandEncoder setBytes:&rowCount length:sizeof(uint) atIndex:2];
[commandEncoder setBytes:sPhrase length:sPhraseLenght * sizeof(char) atIndex:3];
[commandEncoder setBytes:&sPhraseLenght length:sizeof(uint) atIndex:4];
[commandEncoder dispatchThreadgroups:m_ThreadgroupCount
threadsPerThreadgroup:m_ThreadgroupSize];
[commandEncoder endEncoding];
uint64_t duratione = mach_absolute_time() - ts;
[commandBuffer addCompletedHandler:^(id <MTLCommandBuffer> cmdb)
{
dispatch_semaphore_signal(dispatchSemaphore);
uint64_t duration = mach_absolute_time() - ts;
// consolidate:
...
NSLog(@"status: %lu duration: %f sec (encoding: %f sec) | search count: %i", (unsigned long)cmdb.status, cpuTicksToMiliseconds(duration) / 1000.0f, cpuTicksToMiliseconds(duratione) / 1000.0f, searchCount);
dispatch_async(dispatch_get_main_queue(), ^{
completionBlock(si, YES);
});
}];
// Commit the command buffer
[commandBuffer commit];
[cq insertDebugCaptureBoundary];
@phlasserre
Copy link

thx for sharing this - that did help me ... even if 4 years after the post ;-)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment