Skip to content

Instantly share code, notes, and snippets.

@SharanSMenon
Created April 8, 2022 23:39
Show Gist options
  • Save SharanSMenon/10fa4750a8edb4cfd69028f627a6adec to your computer and use it in GitHub Desktop.
Save SharanSMenon/10fa4750a8edb4cfd69028f627a6adec to your computer and use it in GitHub Desktop.
Adding in metal. Borrowed from the apple developer documentation.
//
// addShader.metal
// MetalLearn1
//
// Created by Sharan Sajiv Menon on 3/22/22.
//
#include <metal_stdlib>
using namespace metal;
kernel void addShader(device const float* inA,
device const float* inB,
device float* result,
uint index [[thread_position_in_grid]]) {
result[index] = inA[index] + inB[index];
}
//
// main.m
// MetalLearn1
//
// Created by Sharan Sajiv Menon on 3/22/22.
//
#import <Foundation/Foundation.h>
#import <Metal/Metal.h>
#import "MetalAdder.h"
int main(int argc, const char * argv[]) {
@autoreleasepool {
// insert code here...
NSLog(@"Program Started");
id<MTLDevice> device = MTLCreateSystemDefaultDevice();
MetalAdder* madder = [[MetalAdder alloc] initWithDevice:device];
[madder prepareData];
NSLog(@"Data prepared");
[madder sendComputeCommand];
NSLog(@"Program Completed");
}
return 0;
}
//
// MetalAdder.h
// MetalLearn1
//
// Created by Sharan Sajiv Menon on 4/3/22.
//
#import <Foundation/Foundation.h>
#import <Metal/Metal.h>
#ifndef MetalAdder_h
#define MetalAdder_h
// Header file for MetalAdder.
@interface MetalAdder : NSObject
// These three methods will be public.
-(instancetype) initWithDevice: (id<MTLDevice>) device;
-(void) prepareData;
-(void) sendComputeCommand;
@end
#endif /* MetalAdder_h */
//
// MetalAdder.m
// MetalLearn1
//
// Created by Sharan Sajiv Menon on 4/3/22.
//
#import "MetalAdder.h"
// Creates a massive array of 16777216 numbers.
const unsigned int arrayLength = 1 << 24;
const unsigned int bufferSize = arrayLength * sizeof(float);
@implementation MetalAdder
{
// Initializing any metal variables, like the device, the command queue, and the command buffers.
// Device will be passed in from main program.
id<MTLDevice> _mDevice;
id<MTLComputePipelineState> _mAddFunctionPSO;
id<MTLCommandQueue> _mCommandQueue;
id<MTLBuffer> _mBufferA;
id<MTLBuffer> _mBufferB;
id<MTLBuffer> _mBufferRes;
}
-(instancetype) initWithDevice:(id<MTLDevice>) device
{
self = [super init];
if (self) {
_mDevice = device;
NSError* error = nil;
// Find the kernel that we made in addShader.metal
id<MTLLibrary> defaultLibrary = [_mDevice newDefaultLibrary];
id<MTLFunction> addFunction = [defaultLibrary newFunctionWithName:@"addShader"];
// in addShader.metal, I named the function addFunction
_mAddFunctionPSO = [_mDevice newComputePipelineStateWithFunction: addFunction error: &error];
// Initializing a new command queue.
_mCommandQueue = [_mDevice newCommandQueue];
}
return self;
}
-(void) sendComputeCommand {
// This function performs the actual calculation
id<MTLCommandBuffer> commandBuffer = [_mCommandQueue commandBuffer];
id<MTLComputeCommandEncoder> computeEncoder = [commandBuffer computeCommandEncoder];
// adds the buffers into the command buffer and ready it for execution
[self encodeAddCommand:computeEncoder];
// Execute the shader
[computeEncoder endEncoding];
[commandBuffer commit];
[commandBuffer waitUntilCompleted];
// Verify the results once completed.
[self verifyResults];
}
-(void) encodeAddCommand:(id<MTLComputeCommandEncoder>)computeEncoder {
// We are adding the buffers into the command encoder
[computeEncoder setComputePipelineState:_mAddFunctionPSO];
[computeEncoder setBuffer:_mBufferA offset:0 atIndex:0];
[computeEncoder setBuffer:_mBufferB offset:0 atIndex:1];
[computeEncoder setBuffer:_mBufferRes offset:0 atIndex:2];
// We are letting the GPU know the size of our array.
MTLSize gridSize = MTLSizeMake(arrayLength, 1, 1);
NSUInteger threadGroupSize = _mAddFunctionPSO.maxTotalThreadsPerThreadgroup;
if (threadGroupSize > arrayLength) {
threadGroupSize = arrayLength;
}
// Informing the GPU of the number of threads it should run with.
MTLSize threadgroupSize = MTLSizeMake(threadGroupSize, 1, 1);
[computeEncoder dispatchThreads:gridSize threadsPerThreadgroup:threadgroupSize];
}
-(void) prepareData {
// Initialize the 3 buffers with a fixed length.
NSLog(@"Arraylength: %d", arrayLength);
_mBufferA = [_mDevice newBufferWithLength:bufferSize options:MTLResourceStorageModeShared];
_mBufferB = [_mDevice newBufferWithLength:bufferSize options:MTLResourceStorageModeShared];
_mBufferRes = [_mDevice newBufferWithLength:bufferSize options:MTLResourceStorageModeShared];
// Generate the data.
[self generateRandomFloatData:_mBufferA];
[self generateRandomFloatData:_mBufferB];
}
-(void) generateRandomFloatData:(id<MTLBuffer>)buffer {
// Data generation function.
float *dataPtr = buffer.contents;
for (unsigned long i = 0; i < arrayLength; i++) {
dataPtr[i] = (float)rand()/(float)(RAND_MAX);
}
}
-(void) verifyResults {
// Checking results by grabbing the pointers.
float* A = _mBufferA.contents;
float* B = _mBufferB.contents;
float* C = _mBufferRes.contents;
long errors = 0;
for (unsigned long i = 0; i < arrayLength; i++) {
if (C[i] != (A[i] + B[i])) {
printf("Compute ERROR: index=%lu result=%g vs %g=a+b\n",
i, C[i], A[i] + B[i]);
errors++;
}
}
NSLog(@"Finished verification");
NSLog(@"%ld errors found.", errors);
}
@end
@SharanSMenon
Copy link
Author

SharanSMenon commented Apr 19, 2022

I plan on creating a Swift and C++ (metal-cpp) version soon, along with the Objective C version.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment