Created
July 27, 2020 17:14
-
-
Save Yey007/4a69ddff67e4b7eb6d099233678eb830 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
public BufferedFastMatrix<T> AddShared(BufferedFastMatrix<T> one, BufferedFastMatrix<T> two) | |
{ | |
if (one == null || two == null) | |
{ | |
throw new ArgumentNullException(); | |
} | |
if ((one.GetSize(0) != two.GetSize(0)) || (one.GetSize(1) != two.GetSize(1))) | |
{ | |
throw new BadDimensionException(one.GetSize(0), one.GetSize(1), two.GetSize(0), | |
two.GetSize(1)); | |
} | |
MemoryBuffer2D<T> resultBuffer; | |
one.CopyToGPU(); | |
two.CopyToGPU(); | |
resultBuffer = accelerator.Allocate<T>(one.GetSize(0), one.GetSize(1)); | |
var config = SharedMemoryConfig.RequestDynamic<T>(one.GetSize(-1) + two.GetSize(-1) + resultBuffer.Length); | |
one.WaitForCopy(); //this function call is currently not required, | |
//will come up with a better solution later but for now I'm just | |
//gonna leave it here | |
two.WaitForCopy(); | |
var groupSize = accelerator.MaxNumThreadsPerGroup; | |
//(resultBuffer.Length + groupSize - 1) / groupSize | |
var kernelConfig = (2, groupSize, config); | |
AddSharedKernel(kernelConfig, one.buffer.View, two.buffer.View, resultBuffer.View); | |
accelerator.Synchronize(); | |
var tempArray = resultBuffer.GetAs2DArray(); | |
accelerator.Synchronize(); | |
BufferedFastMatrix<T> returnMatrix = new BufferedFastMatrix<T>(tempArray); | |
return returnMatrix; | |
} | |
private static void GPUAddShared(ArrayView2D<T> aView, ArrayView2D<T> bView, | |
ArrayView2D<T> resView) | |
{ | |
var stride = GridExtensions.GridStrideLoopStride; | |
var globalIndex = Grid.GlobalIndex.XY; | |
var localIndex = Group.Index.XY; | |
var sharedMem = SharedMemory.GetDynamic<T>(); | |
var aShared = sharedMem.GetSubView(0, aView.Length); | |
var bShared = sharedMem.GetSubView(aView.Length, bView.Length); | |
var resShared = sharedMem.GetSubView(bView.Length, resView.Length); | |
var aLinear = localIndex.ComputeLinearIndex(aView.Extent); | |
var bLinear = localIndex.ComputeLinearIndex(bView.Extent); | |
var resLinear = localIndex.ComputeLinearIndex(resView.Extent); | |
for(var a = aLinear; a < aView.Length; a += stride) | |
aShared[a] = aView[globalIndex]; | |
for (var b = aLinear; b < aView.Length; b += stride) | |
bShared[b] = bView[globalIndex]; | |
for (var r = aLinear; r < aView.Length; r += stride) | |
resShared[r] = aShared[aLinear].Add(bShared[bLinear]); | |
Group.Barrier(); | |
resView[globalIndex] = resShared[resLinear]; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment