Skip to content

Instantly share code, notes, and snippets.

@strikeraryu
Last active April 9, 2021 07:54
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save strikeraryu/81e43c4ec6c684616b52e3566d60daf4 to your computer and use it in GitHub Desktop.
Save strikeraryu/81e43c4ec6c684616b52e3566d60daf4 to your computer and use it in GitHub Desktop.
Strassen's Matrix Multipication algorithm on 2d array implemented in chapel, further parallism is used. Implementaion have to improved as we go deeper in the recursion tree the number of the parallel task will increase exponentially.
use Map;
// All logs are printed at last
// map to store logs of local
var taskMap : map(int, int);
var maxTaskMap : map(int, int);
/**
matrix subtraction
arguments
A [?domA] -> first array
B [?domB] -> second array
returns [domA] -> result array after subtraction
**/
proc sub(A : [?domA] int, B : [?domB] int) : [] int{
var C : [0..(domA.shape(0)-1), 0..(domA.shape(0)-1)] int;
var dom_C = C.domain;
for (ia, ib, ic) in zip(domA.dim(0), domB.dim(0), dom_C.dim(0)){
for (ja, jb, jc) in zip(domA.dim(1), domB.dim(1), dom_C.dim(1)){
C[(ic, jc)] = A[(ia, ja)] - B[(ib, jb)];
}
}
return C;
}
/**
matrix addition
arguments
A [?domA] -> first array
B [?domB] -> second array
returns [domA] -> result array after addition
**/
proc add(A : [?domA] int, B : [?domB] int) : [] int{
var C : [0..(domA.shape(0)-1), 0..(domA.shape(0)-1)] int;
var dom_C = C.domain;
for (ia, ib, ic) in zip(domA.dim(0), domB.dim(0), dom_C.dim(0)){
for (ja, jb, jc) in zip(domA.dim(1), domB.dim(1), dom_C.dim(1)){
C[(ic, jc)] = A[(ia, ja)] + B[(ib, jb)];
}
}
return C;
}
/**
rescursive Function for strassen's matrix multipication
arguments
A [?domA] -> first array
B [?domB] -> second array
currLoc int -> local number where the task should run
returns [domA] -> result array after multipication
**/
proc strassen(A : [?domA] int, B : [?domB] int, currLoc : int = 0) : [domA] int{
// creating locales log
taskMap[here.id] += 1;
maxTaskMap[here.id] = max(maxTaskMap[here.id], here.runningTasks());
// finding all bounds
var blA0 : int = domA.dim(0).low;
var bhA0 : int = domA.dim(0).high;
var bmA0 : int = (domA.dim(0).high + domA.dim(0).low)/2+1;
var blB0 : int = domB.dim(0).low;
var bhB0 : int = domB.dim(0).high;
var bmB0 : int = (domB.dim(0).high + domB.dim(0).low)/2+1;
var blA1 : int = domA.dim(1).low;
var bhA1 : int = domA.dim(1).high;
var bmA1 : int = (domA.dim(1).high + domA.dim(1).low)/2+1;
var blB1 : int = domB.dim(1).low;
var bhB1 : int = domB.dim(1).high;
var bmB1 : int = (domB.dim(1).high + domB.dim(1).low)/2+1;
// base case
if(A.size == 1){
return A*B;
}
// spliting both array into four part
var a = A[blA0..(bmA0-1), blA1..(bmA1-1)];
var b = A[blA0..(bmA0-1), bmA1..bhA1];
var c = A[bmA0..bhA0, blA1..(bmA1-1)];
var d = A[bmA0..bhA0, bmA1..bhA1];
var e = B[blB0..(bmB0-1), blB1..(bmB1-1)];
var f = B[blB0..(bmB0-1), bmB1..bhB1];
var g = B[bmB0..bhB0, blB1..(bmB1-1)];
var h = B[bmB0..bhB0, bmB1..bhB1];
var p1, p2, p3, p4, p5, p6, p7 : [blA0..(bmA0-1), blA1..(bmA1-1)] int;
// running on the designated local
on Locales[currLoc] {
// running tasks serial if it exceed total number of PUs in a local
serial here.runningTasks() + 7 > here.numPUs() {
// running task parallel and giving different local for different task ((currLoc + taskNo)%total Locales)
cobegin {
p1 = strassen(a, sub(f, h), (currLoc+1)%numLocales);
p2 = strassen(add(a, b), h, (currLoc+2)%numLocales);
p3 = strassen(add(c, d), e, (currLoc+3)%numLocales);
p4 = strassen(d, sub(g, e), (currLoc+4)%numLocales);
p5 = strassen(add(a, d), add(e, h), (currLoc+5)%numLocales);
p6 = strassen(sub(b, d), add(g, h), (currLoc+6)%numLocales);
p7 = strassen(sub(a, c), add(e, f), (currLoc+7)%numLocales);
}
}
}
// finding four part for the final Matrix
var c11 = add(sub(add(p5, p4), p2), p6);
var c12 = add(p1, p2);
var c21 = add(p3, p4);
var c22 = sub(sub(add(p1, p5), p3), p7);
// joining four parts in one
var C : [blB0..bhB0, blB1..bhB1] int;
C[blB0..(bmB0-1), blB1..(bmB1-1)] = c11;
C[blB0..(bmB0 - 1), bmB1..bhB1] = c12;
C[bmB0..bhB0, blB1..(bmB1 - 1)] = c21;
C[bmB0..bhB0, bmB1..bhB1] = c22;
return C;
}
var dom = {0..3, 0..3};
var A : [dom] int;
var B : [dom] int;
// Matrix Input
A[0, 0] = 5;
A[0, 1] = 1;
A[0, 2] = 12;
A[0, 3] = 10;
A[1, 0] = 2;
A[1, 1] = 3;
A[1, 2] = 3;
A[1, 3] = 8;
A[2, 0] = 8;
A[2, 1] = 17;
A[2, 2] = 2;
A[2, 3] = 3;
A[3, 0] = 3;
A[3, 1] = 3;
A[3, 2] = 4;
A[3, 3] = 7;
B[0, 0] = 3;
B[0, 1] = 30;
B[0, 2] = 12;
B[0, 3] = 18;
B[1, 0] = 12;
B[1, 1] = 1;
B[1, 2] = 23;
B[1, 3] = 9;
B[2, 0] = 5;
B[2, 1] = 10;
B[2, 2] = 12;
B[2, 3] = 2;
B[3, 0] = 3;
B[3, 1] = 1;
B[3, 2] = 4;
B[3, 3] = 10;
// print Results
writeln(strassen(A, B));
// print locales logs
writeln(taskMap);
writeln(maxTaskMap);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment