Skip to content

Instantly share code, notes, and snippets.

@vhaasteren
Created February 14, 2024 18:29
Show Gist options
  • Save vhaasteren/8978208d39edbdef131ce4b7babaa7a2 to your computer and use it in GitHub Desktop.
Save vhaasteren/8978208d39edbdef131ce4b7babaa7a2 to your computer and use it in GitHub Desktop.
Cholesky Decomposition on Metal
// CholeskyWrapper.h
#import <Foundation/Foundation.h>
@interface CholeskyWrapper : NSObject
- (void)performCholeskyDecompositionWithMatrix:(float *)matrix size:(NSUInteger)size;
@end
// CholeskyWrapper.m
// Compile with: clang -fobjc-arc -framework Foundation -framework Metal -framework MetalPerformanceShaders -framework CoreGraphics CholeskyWrapper.m -o libCholeskyWrapper.dylib -dynamiclib
#import "CholeskyWrapper.h"
#import <Metal/Metal.h>
#import <MetalPerformanceShaders/MetalPerformanceShaders.h>
#ifdef __cplusplus
extern "C" {
#endif
__attribute__((visibility("default")))
void C_performCholeskyDecompositionWithMatrix(float* matrix, size_t size);
#ifdef __cplusplus
}
#endif
void C_performCholeskyDecompositionWithMatrix(float* matrix, size_t size) {
CholeskyWrapper* wrapper = [[CholeskyWrapper alloc] init];
[wrapper performCholeskyDecompositionWithMatrix:matrix size:size];
}
@implementation CholeskyWrapper {
id<MTLDevice> _device;
id<MTLCommandQueue> _commandQueue;
}
- (instancetype)init {
self = [super init];
if (self) {
// Initialize the Metal device
_device = MTLCreateSystemDefaultDevice();
if (!_device) {
NSLog(@"Metal is not supported on this device");
return nil;
}
// Create a command queue
_commandQueue = [_device newCommandQueue];
}
return self;
}
- (void)performCholeskyDecompositionWithMatrix:(float *)matrix size:(NSUInteger)size {
// Ensure the Metal device and command queue are set up
if (!_device || !_commandQueue) {
NSLog(@"Metal device or command queue not properly initialized");
return;
}
// Create a buffer for the matrix data
NSUInteger matrixSize = size * size * sizeof(float);
id<MTLBuffer> matrixBuffer = [_device newBufferWithBytes:matrix
length:matrixSize
options:MTLResourceStorageModeShared];
// Create MPS matrix object
MPSMatrixDescriptor *descriptor = [MPSMatrixDescriptor matrixDescriptorWithDimensions:size
columns:size
rowBytes:size * sizeof(float)
dataType:MPSDataTypeFloat32];
MPSMatrix *mpsMatrix = [[MPSMatrix alloc] initWithBuffer:matrixBuffer descriptor:descriptor];
// MPSMatrixDecompositionCholesky object for performing the decomposition
MPSMatrixDecompositionCholesky *choleskyDecomposition = [[MPSMatrixDecompositionCholesky alloc] initWithDevice:_device lower:YES order:size];
// Create a command buffer to encode the operation
id<MTLCommandBuffer> commandBuffer = [_commandQueue commandBuffer];
// Allocate a buffer for the status
id<MTLBuffer> statusBuffer = [_device newBufferWithLength:sizeof(MPSMatrixDecompositionStatus)
options:MTLResourceStorageModeShared];
// Corrected encodeToCommandBuffer call with the status buffer
[choleskyDecomposition encodeToCommandBuffer:commandBuffer
sourceMatrix:mpsMatrix
resultMatrix:mpsMatrix
status:statusBuffer];
// Commit the command buffer and wait for completion
[commandBuffer commit];
[commandBuffer waitUntilCompleted];
// Read back the status from the buffer
MPSMatrixDecompositionStatus *statusPointer = (MPSMatrixDecompositionStatus *)statusBuffer.contents;
MPSMatrixDecompositionStatus operationStatus = *statusPointer;
// TODO: Check the status here more appropriately
if (operationStatus == MPSMatrixDecompositionStatusSuccess) {
NSLog(@"Cholesky decomposition completed successfully.");
} else if (operationStatus == MPSMatrixDecompositionStatusNonPositiveDefinite) {
NSLog(@"The matrix is not numerically positive definite.");
// Return an error value?
return;
} else {
NSLog(@"An unknown error occurred during Cholesky decomposition.");
// Return another error value?
return;
}
// Copy the matrix back in-place
memcpy(matrix, matrixBuffer.contents, matrixSize);
}
@end
# CholeskyWrapperTest.py
import numpy as np
import ctypes
# Load the shared library
lib = ctypes.CDLL('./libCholeskyWrapper.dylib')
# Define the function prototype
cholesky_decomp = lib.C_performCholeskyDecompositionWithMatrix
cholesky_decomp.argtypes = [np.ctypeslib.ndpointer(dtype=np.float32, ndim=2, flags="C_CONTIGUOUS"), ctypes.c_size_t]
cholesky_decomp.restype = None
def perform_cholesky(matrix):
if not matrix.flags['C_CONTIGUOUS']:
matrix = np.ascontiguousarray(matrix, dtype=np.float32)
else:
matrix = matrix.astype(np.float32, copy=False)
# Perform the decomposition in-place
cholesky_decomp(matrix, matrix.shape[0])
return matrix
# Example usage
n = 3
A = np.array([[4, 12, -16], [12, 37, -43], [-16, -43, 98]], dtype=np.float32)
A_decomposed = perform_cholesky(A)
print(A_decomposed)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment