Skip to content

Instantly share code, notes, and snippets.

@Yey007
Created July 27, 2020 17:14
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Yey007/4a69ddff67e4b7eb6d099233678eb830 to your computer and use it in GitHub Desktop.
Save Yey007/4a69ddff67e4b7eb6d099233678eb830 to your computer and use it in GitHub Desktop.
public BufferedFastMatrix<T> AddShared(BufferedFastMatrix<T> one, BufferedFastMatrix<T> two)
{
if (one == null || two == null)
{
throw new ArgumentNullException();
}
if ((one.GetSize(0) != two.GetSize(0)) || (one.GetSize(1) != two.GetSize(1)))
{
throw new BadDimensionException(one.GetSize(0), one.GetSize(1), two.GetSize(0),
two.GetSize(1));
}
MemoryBuffer2D<T> resultBuffer;
one.CopyToGPU();
two.CopyToGPU();
resultBuffer = accelerator.Allocate<T>(one.GetSize(0), one.GetSize(1));
var config = SharedMemoryConfig.RequestDynamic<T>(one.GetSize(-1) + two.GetSize(-1) + resultBuffer.Length);
one.WaitForCopy(); //this function call is currently not required,
//will come up with a better solution later but for now I'm just
//gonna leave it here
two.WaitForCopy();
var groupSize = accelerator.MaxNumThreadsPerGroup;
//(resultBuffer.Length + groupSize - 1) / groupSize
var kernelConfig = (2, groupSize, config);
AddSharedKernel(kernelConfig, one.buffer.View, two.buffer.View, resultBuffer.View);
accelerator.Synchronize();
var tempArray = resultBuffer.GetAs2DArray();
accelerator.Synchronize();
BufferedFastMatrix<T> returnMatrix = new BufferedFastMatrix<T>(tempArray);
return returnMatrix;
}
private static void GPUAddShared(ArrayView2D<T> aView, ArrayView2D<T> bView,
ArrayView2D<T> resView)
{
var stride = GridExtensions.GridStrideLoopStride;
var globalIndex = Grid.GlobalIndex.XY;
var localIndex = Group.Index.XY;
var sharedMem = SharedMemory.GetDynamic<T>();
var aShared = sharedMem.GetSubView(0, aView.Length);
var bShared = sharedMem.GetSubView(aView.Length, bView.Length);
var resShared = sharedMem.GetSubView(bView.Length, resView.Length);
var aLinear = localIndex.ComputeLinearIndex(aView.Extent);
var bLinear = localIndex.ComputeLinearIndex(bView.Extent);
var resLinear = localIndex.ComputeLinearIndex(resView.Extent);
for(var a = aLinear; a < aView.Length; a += stride)
aShared[a] = aView[globalIndex];
for (var b = aLinear; b < aView.Length; b += stride)
bShared[b] = bView[globalIndex];
for (var r = aLinear; r < aView.Length; r += stride)
resShared[r] = aShared[aLinear].Add(bShared[bLinear]);
Group.Barrier();
resView[globalIndex] = resShared[resLinear];
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment