SharanSMenon/MetalAdder.h

## addShader.metal
//
//  addShader.metal
//  MetalLearn1
//
//  Created by Sharan Sajiv Menon on 3/22/22.
//

#include <metal_stdlib>
using namespace metal;

kernel void addShader(device const float* inA,
                      device const float* inB,
                      device float* result,
                      uint index [[thread_position_in_grid]]) {
    result[index] = inA[index] + inB[index];
}


## main.m
//
//  main.m
//  MetalLearn1
//
//  Created by Sharan Sajiv Menon on 3/22/22.
//

#import <Foundation/Foundation.h>
#import <Metal/Metal.h>
#import "MetalAdder.h"


int main(int argc, const char * argv[]) {
    @autoreleasepool {
        // insert code here...
        NSLog(@"Program Started");
        id<MTLDevice> device = MTLCreateSystemDefaultDevice();

        MetalAdder* madder = [[MetalAdder alloc] initWithDevice:device];

        [madder prepareData];
        NSLog(@"Data prepared");
        [madder sendComputeCommand];

        NSLog(@"Program Completed");
    }
    return 0;
}

## MetalAdder.h
//
//  MetalAdder.h
//  MetalLearn1
//
//  Created by Sharan Sajiv Menon on 4/3/22.
//
#import <Foundation/Foundation.h>
#import <Metal/Metal.h>

#ifndef MetalAdder_h
#define MetalAdder_h

// Header file for MetalAdder.
@interface MetalAdder : NSObject
// These three methods will be public.
-(instancetype) initWithDevice: (id<MTLDevice>) device;
-(void) prepareData;
-(void) sendComputeCommand;
@end

#endif /* MetalAdder_h */

## MetalAdder.m
//
//  MetalAdder.m
//  MetalLearn1
//
//  Created by Sharan Sajiv Menon on 4/3/22.
//

#import "MetalAdder.h"

// Creates a massive array of 16777216 numbers.
const unsigned int arrayLength = 1 << 24;
const unsigned int bufferSize = arrayLength * sizeof(float);

@implementation MetalAdder
{
    // Initializing any metal variables, like the device, the command queue, and the command buffers.
    // Device will be passed in from main program.
    id<MTLDevice> _mDevice;
    id<MTLComputePipelineState> _mAddFunctionPSO;
    id<MTLCommandQueue> _mCommandQueue;
    id<MTLBuffer> _mBufferA;
    id<MTLBuffer> _mBufferB;
    id<MTLBuffer> _mBufferRes;
}

-(instancetype) initWithDevice:(id<MTLDevice>) device
{
    self = [super init];
    if (self) {
        _mDevice = device;
        NSError* error = nil;
        // Find the kernel that we made in addShader.metal
        id<MTLLibrary> defaultLibrary = [_mDevice newDefaultLibrary];
        id<MTLFunction> addFunction = [defaultLibrary newFunctionWithName:@"addShader"];
        // in addShader.metal, I named the function addFunction
        _mAddFunctionPSO = [_mDevice newComputePipelineStateWithFunction: addFunction error: &error];
        // Initializing a new command queue.
        _mCommandQueue = [_mDevice newCommandQueue];
    }
    return self;
}

-(void) sendComputeCommand {
    // This function performs the actual calculation
    id<MTLCommandBuffer> commandBuffer = [_mCommandQueue commandBuffer];
    id<MTLComputeCommandEncoder> computeEncoder = [commandBuffer computeCommandEncoder];

    // adds the buffers into the command buffer and ready it for execution
    [self encodeAddCommand:computeEncoder];

    // Execute the shader
    [computeEncoder endEncoding];
    [commandBuffer commit];
    [commandBuffer waitUntilCompleted];

    // Verify the results once completed.
    [self verifyResults];


}

-(void) encodeAddCommand:(id<MTLComputeCommandEncoder>)computeEncoder {
    // We are adding the buffers into the command encoder
    [computeEncoder setComputePipelineState:_mAddFunctionPSO];
    [computeEncoder setBuffer:_mBufferA offset:0 atIndex:0];
    [computeEncoder setBuffer:_mBufferB offset:0 atIndex:1];
    [computeEncoder setBuffer:_mBufferRes offset:0 atIndex:2];

    // We are letting the GPU know the size of our array.
    MTLSize gridSize = MTLSizeMake(arrayLength, 1, 1);
    NSUInteger threadGroupSize = _mAddFunctionPSO.maxTotalThreadsPerThreadgroup;
    if (threadGroupSize > arrayLength) {
        threadGroupSize = arrayLength;
    }

    // Informing the GPU of the number of threads it should run with.
    MTLSize threadgroupSize = MTLSizeMake(threadGroupSize, 1, 1);
    [computeEncoder dispatchThreads:gridSize threadsPerThreadgroup:threadgroupSize];

}

-(void) prepareData {
    // Initialize the 3 buffers with a fixed length.
    NSLog(@"Arraylength: %d", arrayLength);
    _mBufferA = [_mDevice newBufferWithLength:bufferSize options:MTLResourceStorageModeShared];
    _mBufferB = [_mDevice newBufferWithLength:bufferSize options:MTLResourceStorageModeShared];
    _mBufferRes = [_mDevice newBufferWithLength:bufferSize options:MTLResourceStorageModeShared];

    // Generate the data.
    [self generateRandomFloatData:_mBufferA];
    [self generateRandomFloatData:_mBufferB];


}

-(void) generateRandomFloatData:(id<MTLBuffer>)buffer {
    // Data generation function.
    float *dataPtr = buffer.contents;
    for (unsigned long i = 0; i < arrayLength; i++) {
        dataPtr[i] = (float)rand()/(float)(RAND_MAX);
    }
}

-(void) verifyResults {
    // Checking results by grabbing the pointers.
    float* A = _mBufferA.contents;
    float* B = _mBufferB.contents;
    float* C = _mBufferRes.contents;

    long errors = 0;

    for (unsigned long i = 0; i < arrayLength; i++) {
        if (C[i] != (A[i] + B[i])) {
            printf("Compute ERROR: index=%lu result=%g vs %g=a+b\n",
                   i, C[i], A[i] + B[i]);
            errors++;
        }
    }
    NSLog(@"Finished verification");
    NSLog(@"%ld errors found.", errors);
}


@end
	//
	// addShader.metal
	// MetalLearn1
	//
	// Created by Sharan Sajiv Menon on 3/22/22.
	//

	#include <metal_stdlib>
	using namespace metal;

	kernel void addShader(device const float* inA,
	device const float* inB,
	device float* result,
	uint index [[thread_position_in_grid]]) {
	result[index] = inA[index] + inB[index];
	}
	//
	// main.m
	// MetalLearn1
	//
	// Created by Sharan Sajiv Menon on 3/22/22.
	//

	#import <Foundation/Foundation.h>
	#import <Metal/Metal.h>
	#import "MetalAdder.h"


	int main(int argc, const char * argv[]) {
	@autoreleasepool {
	// insert code here...
	NSLog(@"Program Started");
	id<MTLDevice> device = MTLCreateSystemDefaultDevice();

	MetalAdder* madder = [[MetalAdder alloc] initWithDevice:device];

	[madder prepareData];
	NSLog(@"Data prepared");
	[madder sendComputeCommand];

	NSLog(@"Program Completed");
	}
	return 0;
	}
	//
	// MetalAdder.h
	// MetalLearn1
	//
	// Created by Sharan Sajiv Menon on 4/3/22.
	//
	#import <Foundation/Foundation.h>
	#import <Metal/Metal.h>

	#ifndef MetalAdder_h
	#define MetalAdder_h

	// Header file for MetalAdder.
	@interface MetalAdder : NSObject
	// These three methods will be public.
	-(instancetype) initWithDevice: (id<MTLDevice>) device;
	-(void) prepareData;
	-(void) sendComputeCommand;
	@end

	#endif /* MetalAdder_h */
	//
	// MetalAdder.m
	// MetalLearn1
	//
	// Created by Sharan Sajiv Menon on 4/3/22.
	//

	#import "MetalAdder.h"

	// Creates a massive array of 16777216 numbers.
	const unsigned int arrayLength = 1 << 24;
	const unsigned int bufferSize = arrayLength * sizeof(float);

	@implementation MetalAdder
	{
	// Initializing any metal variables, like the device, the command queue, and the command buffers.
	// Device will be passed in from main program.
	id<MTLDevice> _mDevice;
	id<MTLComputePipelineState> _mAddFunctionPSO;
	id<MTLCommandQueue> _mCommandQueue;
	id<MTLBuffer> _mBufferA;
	id<MTLBuffer> _mBufferB;
	id<MTLBuffer> _mBufferRes;
	}

	-(instancetype) initWithDevice:(id<MTLDevice>) device
	{
	self = [super init];
	if (self) {
	_mDevice = device;
	NSError* error = nil;
	// Find the kernel that we made in addShader.metal
	id<MTLLibrary> defaultLibrary = [_mDevice newDefaultLibrary];
	id<MTLFunction> addFunction = [defaultLibrary newFunctionWithName:@"addShader"];
	// in addShader.metal, I named the function addFunction
	_mAddFunctionPSO = [_mDevice newComputePipelineStateWithFunction: addFunction error: &error];
	// Initializing a new command queue.
	_mCommandQueue = [_mDevice newCommandQueue];
	}
	return self;
	}

	-(void) sendComputeCommand {
	// This function performs the actual calculation
	id<MTLCommandBuffer> commandBuffer = [_mCommandQueue commandBuffer];
	id<MTLComputeCommandEncoder> computeEncoder = [commandBuffer computeCommandEncoder];

	// adds the buffers into the command buffer and ready it for execution
	[self encodeAddCommand:computeEncoder];

	// Execute the shader
	[computeEncoder endEncoding];
	[commandBuffer commit];
	[commandBuffer waitUntilCompleted];

	// Verify the results once completed.
	[self verifyResults];


	}

	-(void) encodeAddCommand:(id<MTLComputeCommandEncoder>)computeEncoder {
	// We are adding the buffers into the command encoder
	[computeEncoder setComputePipelineState:_mAddFunctionPSO];
	[computeEncoder setBuffer:_mBufferA offset:0 atIndex:0];
	[computeEncoder setBuffer:_mBufferB offset:0 atIndex:1];
	[computeEncoder setBuffer:_mBufferRes offset:0 atIndex:2];

	// We are letting the GPU know the size of our array.
	MTLSize gridSize = MTLSizeMake(arrayLength, 1, 1);
	NSUInteger threadGroupSize = _mAddFunctionPSO.maxTotalThreadsPerThreadgroup;
	if (threadGroupSize > arrayLength) {
	threadGroupSize = arrayLength;
	}

	// Informing the GPU of the number of threads it should run with.
	MTLSize threadgroupSize = MTLSizeMake(threadGroupSize, 1, 1);
	[computeEncoder dispatchThreads:gridSize threadsPerThreadgroup:threadgroupSize];

	}

	-(void) prepareData {
	// Initialize the 3 buffers with a fixed length.
	NSLog(@"Arraylength: %d", arrayLength);
	_mBufferA = [_mDevice newBufferWithLength:bufferSize options:MTLResourceStorageModeShared];
	_mBufferB = [_mDevice newBufferWithLength:bufferSize options:MTLResourceStorageModeShared];
	_mBufferRes = [_mDevice newBufferWithLength:bufferSize options:MTLResourceStorageModeShared];

	// Generate the data.
	[self generateRandomFloatData:_mBufferA];
	[self generateRandomFloatData:_mBufferB];


	}

	-(void) generateRandomFloatData:(id<MTLBuffer>)buffer {
	// Data generation function.
	float *dataPtr = buffer.contents;
	for (unsigned long i = 0; i < arrayLength; i++) {
	dataPtr[i] = (float)rand()/(float)(RAND_MAX);
	}
	}

	-(void) verifyResults {
	// Checking results by grabbing the pointers.
	float* A = _mBufferA.contents;
	float* B = _mBufferB.contents;
	float* C = _mBufferRes.contents;

	long errors = 0;

	for (unsigned long i = 0; i < arrayLength; i++) {
	if (C[i] != (A[i] + B[i])) {
	printf("Compute ERROR: index=%lu result=%g vs %g=a+b\n",
	i, C[i], A[i] + B[i]);
	errors++;
	}
	}
	NSLog(@"Finished verification");
	NSLog(@"%ld errors found.", errors);
	}


	@end