Skip to content

Instantly share code, notes, and snippets.

@NSG650
Created April 22, 2024 11:38
Show Gist options
  • Save NSG650/705627edbce9c8ffaf4d151cdf649c39 to your computer and use it in GitHub Desktop.
Save NSG650/705627edbce9c8ffaf4d151cdf649c39 to your computer and use it in GitHub Desktop.
Multi threaded matrix multiplications using NT native API
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
#include <stdio.h>
#include "native.h"
#include "random.h"
typedef struct _MATRIX {
double **matrix;
ULONG rows;
ULONG columns;
} MATRIX, *PMATRIX;
typedef struct _MATRIX_MULTIPLICATION_HANDOVER {
ULONG row;
ULONG column;
PMATRIX MatrixOne;
PMATRIX MatrixTwo;
PMATRIX WorkingMatrix;
} MATRIX_MULTIPLICATION_HANDOVER, *PMATRIX_MULTIPLICATION_HANDOVER;
VOID MatrixMultiplicationWorker(LPVOID Args) {
PMATRIX_MULTIPLICATION_HANDOVER Handover = (PMATRIX_MULTIPLICATION_HANDOVER)Args;
double t = 0;
for (ULONG k = 0; k < Handover->MatrixOne->rows; k++) {
t += Handover->MatrixOne->matrix[Handover->row][k] * Handover->MatrixTwo->matrix[k][Handover->column];
}
Handover->WorkingMatrix->matrix[Handover->row][Handover->column] = t;
RtlFreeHeap(GetProcessHeap(), 0, Handover);
}
VOID MatrixDump(PMATRIX Matrix) {
if (!Matrix) { return; }
for (ULONG i = 0; i < Matrix->rows; i++) {
for (ULONG j = 0; j < Matrix->columns; j++) {
printf("%.2f ", Matrix->matrix[i][j]);
}
printf("\n");
}
}
PMATRIX MatrixNew(ULONG Rows, ULONG Columns) {
PMATRIX New = RtlAllocateHeap(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(MATRIX));
if (!New) { return NULL; }
New->rows = Rows;
New->columns = Columns;
New->matrix = (double **)RtlAllocateHeap(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(double *) * Rows);
if (!New->matrix) { RtlFreeHeap(GetProcessHeap(), 0, New); return NULL; }
for (ULONG i = 0; i < Rows; i++) {
New->matrix[i] = (double *)RtlAllocateHeap(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(double) * Columns);
}
return New;
}
VOID MatrixCopyDoubleMatrixToMatrix(double **DoubleMatrix, PMATRIX Matrix) {
if (!Matrix) { return; }
if (!DoubleMatrix) { return; }
for (ULONG i = 0; i < Matrix->rows; i++) {
for (ULONG j = 0; j < Matrix->columns; j++) {
Matrix->matrix[i][j] = DoubleMatrix[i][j];
}
}
}
VOID MatrixFillWithRandomNumbers(PMATRIX Matrix, INT Low, INT High) {
for (ULONG i = 0; i < Matrix->rows; i++) {
for (ULONG j = 0; j < Matrix->columns; j++) {
Matrix->matrix[i][j] = (double)((Random() % High) + Low);
}
}
}
PMATRIX MatrixMultiply(PMATRIX MatrixOne, PMATRIX MatrixTwo) {
if (MatrixTwo->rows != MatrixOne->columns) { return NULL; }
PMATRIX New = MatrixNew(MatrixOne->rows, MatrixTwo->columns);
ULONG TotalThreads = MatrixOne->rows * MatrixTwo->columns;
HANDLE *ThreadHandles = (HANDLE *)RtlAllocateHeap(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(HANDLE) * TotalThreads);
ULONG counter = 0;
for (ULONG i = 0; i < MatrixOne->rows; i++) {
for (ULONG j = 0; j < MatrixTwo->columns; j++) {
PMATRIX_MULTIPLICATION_HANDOVER Handover = (PMATRIX_MULTIPLICATION_HANDOVER)RtlAllocateHeap(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(MATRIX_MULTIPLICATION_HANDOVER));
Handover->row = i;
Handover->column = j;
Handover->MatrixOne = MatrixOne;
Handover->MatrixTwo = MatrixTwo;
Handover->WorkingMatrix = New;
RtlCreateUserThread(NtCurrentProcess(),
NULL,
FALSE,
0,
0,
0,
(PUSER_THREAD_START_ROUTINE)MatrixMultiplicationWorker,
(LPVOID)Handover,
&ThreadHandles[counter++],
NULL);
}
}
NtWaitForMultipleObjects(TotalThreads, ThreadHandles, WaitAll, FALSE, NULL);
for (ULONG i = 0; i < TotalThreads; i++) {
NtClose(ThreadHandles[i]);
}
return New;
}
INT main(VOID) {
printf("Hello World!\n\n");
RandomSetSeed(NtGetTickCount());
PMATRIX MatrixOne = MatrixNew(4, 4);
PMATRIX MatrixTwo = MatrixNew(4, 4);
MatrixFillWithRandomNumbers(MatrixOne, 1, 10);
MatrixFillWithRandomNumbers(MatrixTwo, 1, 10);
MatrixDump(MatrixOne);
printf("\n");
MatrixDump(MatrixTwo);
printf("\n");
PMATRIX Result = MatrixMultiply(MatrixOne, MatrixTwo);
MatrixDump(Result);
return 0;
}
#pragma once
#include <winternl.h>
typedef VOID (NTAPI *PUSER_THREAD_START_ROUTINE)(IN PVOID ApcArgument1);
typedef enum _WAIT_TYPE {
WaitAll,
WaitAny
} WAIT_TYPE;
NTSTATUS
RtlCreateUserThread(
IN HANDLE Process,
IN PSECURITY_DESCRIPTOR ThreadSecurityDescriptor,
IN BOOLEAN CreateSuspended,
IN ULONG_PTR ZeroBits,
IN SIZE_T MaximumStackSize,
IN SIZE_T CommittedStackSize,
IN PUSER_THREAD_START_ROUTINE StartAddress,
IN PVOID Parameter,
OUT PHANDLE Thread,
OUT PVOID ClientId
);
NTSTATUS
NtWaitForMultipleObjects (
IN ULONG Count,
IN HANDLE Handle[],
IN WAIT_TYPE WaitType,
IN BOOLEAN Alertable,
IN PLARGE_INTEGER Timeout OPTIONAL
);
NTSTATUS NtClose(IN HANDLE Handle);
BOOLEAN RtlFreeHeap(IN PVOID HeapHandle, IN ULONG Flags, IN PVOID HeapBase);
PVOID RtlAllocateHeap(IN PVOID HeapHandle, IN ULONG Flags, IN SIZE_T Size);
ULONG NtGetTickCount(VOID);
#define NtCurrentProcess() ((HANDLE)(LONG_PTR)-1)
#pragma once
// Mersenne Twister
// Based on mt19937ar.c found here:
// http://www.math.sci.hiroshima-u.ac.jp/m-mat/MT/MT2002/CODES/mt19937ar.c
#define W 64
#define N 312
#define M 156
#define R 31
#define A 0xB5026F5AA96619E9ULL
#define U 29
#define D 0x5555555555555555ULL
#define S 17
#define B 0x71D67FFFEDA60000ULL
#define T 37
#define C 0xFFF7EEE000000000ULL
#define L 43
#define F 6364136223846793005
#define MASK_LOW ((1ULL << R) - 1)
#define MASK_UPP (~MASK_LOW)
static ULONGLONG state[N];
static ULONG index = N + 1;
static void twist(void) {
for (ULONG i = 0; i < N; ++i) {
ULONGLONG x = (state[i] & MASK_UPP) + (state[(i + 1) % N] & MASK_LOW);
ULONGLONG xa = x >> 1;
if (x & 1) {
xa ^= A;
}
state[i] = state[(i + M) % N] ^ xa;
}
index = 0;
}
void RandomSetSeed(ULONGLONG seed) {
state[0] = seed;
index = N;
for (ULONG i = 1; i < N; ++i) {
state[i] = F * (state[i - 1] ^ (state[i - 1] >> (W - 2))) + i;
}
}
ULONGLONG Random(void) {
if (index >= N) {
if (index > N) {
srand(5489);
}
twist();
}
ULONGLONG y = state[index];
y ^= (y >> U) & D;
y ^= (y << S) & B;
y ^= (y << T) & C;
y ^= y >> L;
++index;
return y;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment