Last active
April 9, 2021 07:54
-
-
Save strikeraryu/81e43c4ec6c684616b52e3566d60daf4 to your computer and use it in GitHub Desktop.
Strassen's Matrix Multipication algorithm on 2d array implemented in chapel, further parallism is used. Implementaion have to improved as we go deeper in the recursion tree the number of the parallel task will increase exponentially.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use Map; | |
// All logs are printed at last | |
// map to store logs of local | |
var taskMap : map(int, int); | |
var maxTaskMap : map(int, int); | |
/** | |
matrix subtraction | |
arguments | |
A [?domA] -> first array | |
B [?domB] -> second array | |
returns [domA] -> result array after subtraction | |
**/ | |
proc sub(A : [?domA] int, B : [?domB] int) : [] int{ | |
var C : [0..(domA.shape(0)-1), 0..(domA.shape(0)-1)] int; | |
var dom_C = C.domain; | |
for (ia, ib, ic) in zip(domA.dim(0), domB.dim(0), dom_C.dim(0)){ | |
for (ja, jb, jc) in zip(domA.dim(1), domB.dim(1), dom_C.dim(1)){ | |
C[(ic, jc)] = A[(ia, ja)] - B[(ib, jb)]; | |
} | |
} | |
return C; | |
} | |
/** | |
matrix addition | |
arguments | |
A [?domA] -> first array | |
B [?domB] -> second array | |
returns [domA] -> result array after addition | |
**/ | |
proc add(A : [?domA] int, B : [?domB] int) : [] int{ | |
var C : [0..(domA.shape(0)-1), 0..(domA.shape(0)-1)] int; | |
var dom_C = C.domain; | |
for (ia, ib, ic) in zip(domA.dim(0), domB.dim(0), dom_C.dim(0)){ | |
for (ja, jb, jc) in zip(domA.dim(1), domB.dim(1), dom_C.dim(1)){ | |
C[(ic, jc)] = A[(ia, ja)] + B[(ib, jb)]; | |
} | |
} | |
return C; | |
} | |
/** | |
rescursive Function for strassen's matrix multipication | |
arguments | |
A [?domA] -> first array | |
B [?domB] -> second array | |
currLoc int -> local number where the task should run | |
returns [domA] -> result array after multipication | |
**/ | |
proc strassen(A : [?domA] int, B : [?domB] int, currLoc : int = 0) : [domA] int{ | |
// creating locales log | |
taskMap[here.id] += 1; | |
maxTaskMap[here.id] = max(maxTaskMap[here.id], here.runningTasks()); | |
// finding all bounds | |
var blA0 : int = domA.dim(0).low; | |
var bhA0 : int = domA.dim(0).high; | |
var bmA0 : int = (domA.dim(0).high + domA.dim(0).low)/2+1; | |
var blB0 : int = domB.dim(0).low; | |
var bhB0 : int = domB.dim(0).high; | |
var bmB0 : int = (domB.dim(0).high + domB.dim(0).low)/2+1; | |
var blA1 : int = domA.dim(1).low; | |
var bhA1 : int = domA.dim(1).high; | |
var bmA1 : int = (domA.dim(1).high + domA.dim(1).low)/2+1; | |
var blB1 : int = domB.dim(1).low; | |
var bhB1 : int = domB.dim(1).high; | |
var bmB1 : int = (domB.dim(1).high + domB.dim(1).low)/2+1; | |
// base case | |
if(A.size == 1){ | |
return A*B; | |
} | |
// spliting both array into four part | |
var a = A[blA0..(bmA0-1), blA1..(bmA1-1)]; | |
var b = A[blA0..(bmA0-1), bmA1..bhA1]; | |
var c = A[bmA0..bhA0, blA1..(bmA1-1)]; | |
var d = A[bmA0..bhA0, bmA1..bhA1]; | |
var e = B[blB0..(bmB0-1), blB1..(bmB1-1)]; | |
var f = B[blB0..(bmB0-1), bmB1..bhB1]; | |
var g = B[bmB0..bhB0, blB1..(bmB1-1)]; | |
var h = B[bmB0..bhB0, bmB1..bhB1]; | |
var p1, p2, p3, p4, p5, p6, p7 : [blA0..(bmA0-1), blA1..(bmA1-1)] int; | |
// running on the designated local | |
on Locales[currLoc] { | |
// running tasks serial if it exceed total number of PUs in a local | |
serial here.runningTasks() + 7 > here.numPUs() { | |
// running task parallel and giving different local for different task ((currLoc + taskNo)%total Locales) | |
cobegin { | |
p1 = strassen(a, sub(f, h), (currLoc+1)%numLocales); | |
p2 = strassen(add(a, b), h, (currLoc+2)%numLocales); | |
p3 = strassen(add(c, d), e, (currLoc+3)%numLocales); | |
p4 = strassen(d, sub(g, e), (currLoc+4)%numLocales); | |
p5 = strassen(add(a, d), add(e, h), (currLoc+5)%numLocales); | |
p6 = strassen(sub(b, d), add(g, h), (currLoc+6)%numLocales); | |
p7 = strassen(sub(a, c), add(e, f), (currLoc+7)%numLocales); | |
} | |
} | |
} | |
// finding four part for the final Matrix | |
var c11 = add(sub(add(p5, p4), p2), p6); | |
var c12 = add(p1, p2); | |
var c21 = add(p3, p4); | |
var c22 = sub(sub(add(p1, p5), p3), p7); | |
// joining four parts in one | |
var C : [blB0..bhB0, blB1..bhB1] int; | |
C[blB0..(bmB0-1), blB1..(bmB1-1)] = c11; | |
C[blB0..(bmB0 - 1), bmB1..bhB1] = c12; | |
C[bmB0..bhB0, blB1..(bmB1 - 1)] = c21; | |
C[bmB0..bhB0, bmB1..bhB1] = c22; | |
return C; | |
} | |
var dom = {0..3, 0..3}; | |
var A : [dom] int; | |
var B : [dom] int; | |
// Matrix Input | |
A[0, 0] = 5; | |
A[0, 1] = 1; | |
A[0, 2] = 12; | |
A[0, 3] = 10; | |
A[1, 0] = 2; | |
A[1, 1] = 3; | |
A[1, 2] = 3; | |
A[1, 3] = 8; | |
A[2, 0] = 8; | |
A[2, 1] = 17; | |
A[2, 2] = 2; | |
A[2, 3] = 3; | |
A[3, 0] = 3; | |
A[3, 1] = 3; | |
A[3, 2] = 4; | |
A[3, 3] = 7; | |
B[0, 0] = 3; | |
B[0, 1] = 30; | |
B[0, 2] = 12; | |
B[0, 3] = 18; | |
B[1, 0] = 12; | |
B[1, 1] = 1; | |
B[1, 2] = 23; | |
B[1, 3] = 9; | |
B[2, 0] = 5; | |
B[2, 1] = 10; | |
B[2, 2] = 12; | |
B[2, 3] = 2; | |
B[3, 0] = 3; | |
B[3, 1] = 1; | |
B[3, 2] = 4; | |
B[3, 3] = 10; | |
// print Results | |
writeln(strassen(A, B)); | |
// print locales logs | |
writeln(taskMap); | |
writeln(maxTaskMap); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment