Skip to content

Instantly share code, notes, and snippets.

@dtysky
Last active June 10, 2019 11:02
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dtysky/b1b0ea3282225a3f39397d30d78a2f4d to your computer and use it in GitHub Desktop.
Save dtysky/b1b0ea3282225a3f39397d30d78a2f4d to your computer and use it in GitHub Desktop.
mat4-mul-wasm
(func $0 (; 0 ;) (; has Stack IR ;) (type $0) (param $0 i32) (param $1 i32) (param $2 i32)
(local $3 f32)
(local $4 f32)
(local $5 f32)
(local $6 f32)
(local $7 f32)
(local $8 f32)
(local $9 f32)
(local $10 f32)
(local $11 f32)
(local $12 f32)
(local $13 f32)
(local $14 f32)
(local $15 f32)
(local $16 f32)
(local $17 f32)
(local $18 f32)
(local $19 f32)
(local $20 f32)
(local $21 f32)
(local $22 f32)
(set_local $9
(f32.load offset=4
(get_local $1)
)
)
(set_local $10
(f32.load offset=8
(get_local $1)
)
)
(set_local $11
(f32.load offset=12
(get_local $1)
)
)
(set_local $12
(f32.load offset=20
(get_local $1)
)
)
(set_local $13
(f32.load offset=24
(get_local $1)
)
)
(set_local $14
(f32.load offset=28
(get_local $1)
)
)
(set_local $15
(f32.load offset=36
(get_local $1)
)
)
(set_local $16
(f32.load offset=40
(get_local $1)
)
)
(set_local $17
(f32.load offset=44
(get_local $1)
)
)
(set_local $18
(f32.load offset=52
(get_local $1)
)
)
(set_local $19
(f32.load offset=56
(get_local $1)
)
)
(set_local $20
(f32.load offset=60
(get_local $1)
)
)
(f32.store
(get_local $0)
(f32.add
(f32.add
(f32.add
(f32.mul
(tee_local $7
(f32.load
(get_local $1)
)
)
(tee_local $3
(f32.load
(get_local $2)
)
)
)
(f32.mul
(tee_local $8
(f32.load offset=16
(get_local $1)
)
)
(tee_local $4
(f32.load offset=4
(get_local $2)
)
)
)
)
(f32.mul
(tee_local $21
(f32.load offset=32
(get_local $1)
)
)
(tee_local $5
(f32.load offset=8
(get_local $2)
)
)
)
)
(f32.mul
(tee_local $22
(f32.load offset=48
(get_local $1)
)
)
(tee_local $6
(f32.load offset=12
(get_local $2)
)
)
)
)
)
(f32.store offset=4
(get_local $0)
(f32.add
(f32.add
(f32.add
(f32.mul
(get_local $9)
(get_local $3)
)
(f32.mul
(get_local $12)
(get_local $4)
)
)
(f32.mul
(get_local $15)
(get_local $5)
)
)
(f32.mul
(get_local $18)
(get_local $6)
)
)
)
(f32.store offset=8
(get_local $0)
(f32.add
(f32.add
(f32.add
(f32.mul
(get_local $10)
(get_local $3)
)
(f32.mul
(get_local $13)
(get_local $4)
)
)
(f32.mul
(get_local $16)
(get_local $5)
)
)
(f32.mul
(get_local $19)
(get_local $6)
)
)
)
(f32.store offset=12
(get_local $0)
(f32.add
(f32.add
(f32.add
(f32.mul
(get_local $11)
(get_local $3)
)
(f32.mul
(get_local $14)
(get_local $4)
)
)
(f32.mul
(get_local $17)
(get_local $5)
)
)
(f32.mul
(get_local $20)
(get_local $6)
)
)
)
(f32.store offset=16
(get_local $0)
(f32.add
(f32.add
(f32.add
(f32.mul
(get_local $7)
(tee_local $3
(f32.load offset=16
(get_local $2)
)
)
)
(f32.mul
(get_local $8)
(tee_local $4
(f32.load offset=20
(get_local $2)
)
)
)
)
(f32.mul
(get_local $21)
(tee_local $5
(f32.load offset=24
(get_local $2)
)
)
)
)
(f32.mul
(get_local $22)
(tee_local $6
(f32.load offset=28
(get_local $2)
)
)
)
)
)
(f32.store offset=20
(get_local $0)
(f32.add
(f32.add
(f32.add
(f32.mul
(get_local $9)
(get_local $3)
)
(f32.mul
(get_local $12)
(get_local $4)
)
)
(f32.mul
(get_local $15)
(get_local $5)
)
)
(f32.mul
(get_local $18)
(get_local $6)
)
)
)
(f32.store offset=24
(get_local $0)
(f32.add
(f32.add
(f32.add
(f32.mul
(get_local $10)
(get_local $3)
)
(f32.mul
(get_local $13)
(get_local $4)
)
)
(f32.mul
(get_local $16)
(get_local $5)
)
)
(f32.mul
(get_local $19)
(get_local $6)
)
)
)
(f32.store offset=28
(get_local $0)
(f32.add
(f32.add
(f32.add
(f32.mul
(get_local $11)
(get_local $3)
)
(f32.mul
(get_local $14)
(get_local $4)
)
)
(f32.mul
(get_local $17)
(get_local $5)
)
)
(f32.mul
(get_local $20)
(get_local $6)
)
)
)
(f32.store offset=32
(get_local $0)
(f32.add
(f32.add
(f32.add
(f32.mul
(get_local $7)
(tee_local $3
(f32.load offset=32
(get_local $2)
)
)
)
(f32.mul
(get_local $8)
(tee_local $4
(f32.load offset=36
(get_local $2)
)
)
)
)
(f32.mul
(get_local $21)
(tee_local $5
(f32.load offset=40
(get_local $2)
)
)
)
)
(f32.mul
(get_local $22)
(tee_local $6
(f32.load offset=44
(get_local $2)
)
)
)
)
)
(f32.store offset=36
(get_local $0)
(f32.add
(f32.add
(f32.add
(f32.mul
(get_local $9)
(get_local $3)
)
(f32.mul
(get_local $12)
(get_local $4)
)
)
(f32.mul
(get_local $15)
(get_local $5)
)
)
(f32.mul
(get_local $18)
(get_local $6)
)
)
)
(f32.store offset=40
(get_local $0)
(f32.add
(f32.add
(f32.add
(f32.mul
(get_local $10)
(get_local $3)
)
(f32.mul
(get_local $13)
(get_local $4)
)
)
(f32.mul
(get_local $16)
(get_local $5)
)
)
(f32.mul
(get_local $19)
(get_local $6)
)
)
)
(f32.store offset=44
(get_local $0)
(f32.add
(f32.add
(f32.add
(f32.mul
(get_local $11)
(get_local $3)
)
(f32.mul
(get_local $14)
(get_local $4)
)
)
(f32.mul
(get_local $17)
(get_local $5)
)
)
(f32.mul
(get_local $20)
(get_local $6)
)
)
)
(f32.store offset=48
(get_local $0)
(f32.add
(f32.add
(f32.add
(f32.mul
(get_local $7)
(tee_local $7
(f32.load offset=48
(get_local $2)
)
)
)
(f32.mul
(get_local $8)
(tee_local $3
(f32.load offset=52
(get_local $2)
)
)
)
)
(f32.mul
(get_local $21)
(tee_local $8
(f32.load offset=56
(get_local $2)
)
)
)
)
(f32.mul
(get_local $22)
(tee_local $4
(f32.load offset=60
(get_local $2)
)
)
)
)
)
(f32.store offset=52
(get_local $0)
(f32.add
(f32.add
(f32.add
(f32.mul
(get_local $9)
(get_local $7)
)
(f32.mul
(get_local $12)
(get_local $3)
)
)
(f32.mul
(get_local $15)
(get_local $8)
)
)
(f32.mul
(get_local $18)
(get_local $4)
)
)
)
(f32.store offset=56
(get_local $0)
(f32.add
(f32.add
(f32.add
(f32.mul
(get_local $10)
(get_local $7)
)
(f32.mul
(get_local $13)
(get_local $3)
)
)
(f32.mul
(get_local $16)
(get_local $8)
)
)
(f32.mul
(get_local $19)
(get_local $4)
)
)
)
(f32.store offset=60
(get_local $0)
(f32.add
(f32.add
(f32.add
(f32.mul
(get_local $11)
(get_local $7)
)
(f32.mul
(get_local $14)
(get_local $3)
)
)
(f32.mul
(get_local $17)
(get_local $8)
)
)
(f32.mul
(get_local $20)
(get_local $4)
)
)
)
)
(func $gl_matrix_wasm::matrix4::Matrix4::multiply::h990fc61ad8f18638 (; 17 ;) (; has Stack IR ;) (type $11) (param $0 i32) (param $1 i32) (param $2 i32)
(local $3 f32)
(local $4 f32)
(local $5 f32)
(local $6 f32)
(local $7 f32)
(local $8 f32)
(local $9 f32)
(local $10 f32)
(local $11 f32)
(local $12 f32)
(local $13 f32)
(local $14 f32)
(local $15 f32)
(local $16 f32)
(local $17 f32)
(local $18 f32)
(local $19 f32)
(local $20 f32)
(local $21 f32)
(local $22 f32)
(set_local $9
(f32.load offset=48
(get_local $1)
)
)
(set_local $10
(f32.load offset=32
(get_local $1)
)
)
(set_local $11
(f32.load
(get_local $1)
)
)
(set_local $12
(f32.load offset=16
(get_local $1)
)
)
(set_local $13
(f32.load offset=52
(get_local $1)
)
)
(set_local $14
(f32.load offset=36
(get_local $1)
)
)
(set_local $15
(f32.load offset=4
(get_local $1)
)
)
(set_local $16
(f32.load offset=20
(get_local $1)
)
)
(set_local $17
(f32.load offset=56
(get_local $1)
)
)
(set_local $18
(f32.load offset=40
(get_local $1)
)
)
(set_local $19
(f32.load offset=8
(get_local $1)
)
)
(set_local $20
(f32.load offset=24
(get_local $1)
)
)
(f32.store offset=12
(get_local $0)
(f32.add
(f32.add
(f32.add
(f32.mul
(tee_local $7
(f32.load offset=12
(get_local $1)
)
)
(tee_local $3
(f32.load
(get_local $2)
)
)
)
(f32.mul
(tee_local $8
(f32.load offset=28
(get_local $1)
)
)
(tee_local $4
(f32.load offset=4
(get_local $2)
)
)
)
)
(f32.mul
(tee_local $21
(f32.load offset=44
(get_local $1)
)
)
(tee_local $5
(f32.load offset=8
(get_local $2)
)
)
)
)
(f32.mul
(tee_local $22
(f32.load offset=60
(get_local $1)
)
)
(tee_local $6
(f32.load offset=12
(get_local $2)
)
)
)
)
)
(f32.store offset=8
(get_local $0)
(f32.add
(f32.add
(f32.add
(f32.mul
(get_local $19)
(get_local $3)
)
(f32.mul
(get_local $20)
(get_local $4)
)
)
(f32.mul
(get_local $18)
(get_local $5)
)
)
(f32.mul
(get_local $17)
(get_local $6)
)
)
)
(f32.store offset=4
(get_local $0)
(f32.add
(f32.add
(f32.add
(f32.mul
(get_local $15)
(get_local $3)
)
(f32.mul
(get_local $16)
(get_local $4)
)
)
(f32.mul
(get_local $14)
(get_local $5)
)
)
(f32.mul
(get_local $13)
(get_local $6)
)
)
)
(f32.store
(get_local $0)
(f32.add
(f32.add
(f32.add
(f32.mul
(get_local $11)
(get_local $3)
)
(f32.mul
(get_local $12)
(get_local $4)
)
)
(f32.mul
(get_local $10)
(get_local $5)
)
)
(f32.mul
(get_local $9)
(get_local $6)
)
)
)
(f32.store offset=28
(get_local $0)
(f32.add
(f32.add
(f32.add
(f32.mul
(get_local $7)
(tee_local $3
(f32.load offset=16
(get_local $2)
)
)
)
(f32.mul
(get_local $8)
(tee_local $4
(f32.load offset=20
(get_local $2)
)
)
)
)
(f32.mul
(get_local $21)
(tee_local $5
(f32.load offset=24
(get_local $2)
)
)
)
)
(f32.mul
(get_local $22)
(tee_local $6
(f32.load offset=28
(get_local $2)
)
)
)
)
)
(f32.store offset=24
(get_local $0)
(f32.add
(f32.add
(f32.add
(f32.mul
(get_local $19)
(get_local $3)
)
(f32.mul
(get_local $20)
(get_local $4)
)
)
(f32.mul
(get_local $18)
(get_local $5)
)
)
(f32.mul
(get_local $17)
(get_local $6)
)
)
)
(f32.store offset=20
(get_local $0)
(f32.add
(f32.add
(f32.add
(f32.mul
(get_local $15)
(get_local $3)
)
(f32.mul
(get_local $16)
(get_local $4)
)
)
(f32.mul
(get_local $14)
(get_local $5)
)
)
(f32.mul
(get_local $13)
(get_local $6)
)
)
)
(f32.store offset=16
(get_local $0)
(f32.add
(f32.add
(f32.add
(f32.mul
(get_local $11)
(get_local $3)
)
(f32.mul
(get_local $12)
(get_local $4)
)
)
(f32.mul
(get_local $10)
(get_local $5)
)
)
(f32.mul
(get_local $9)
(get_local $6)
)
)
)
(f32.store offset=44
(get_local $0)
(f32.add
(f32.add
(f32.add
(f32.mul
(get_local $7)
(tee_local $3
(f32.load offset=32
(get_local $2)
)
)
)
(f32.mul
(get_local $8)
(tee_local $4
(f32.load offset=36
(get_local $2)
)
)
)
)
(f32.mul
(get_local $21)
(tee_local $5
(f32.load offset=40
(get_local $2)
)
)
)
)
(f32.mul
(get_local $22)
(tee_local $6
(f32.load offset=44
(get_local $2)
)
)
)
)
)
(f32.store offset=40
(get_local $0)
(f32.add
(f32.add
(f32.add
(f32.mul
(get_local $19)
(get_local $3)
)
(f32.mul
(get_local $20)
(get_local $4)
)
)
(f32.mul
(get_local $18)
(get_local $5)
)
)
(f32.mul
(get_local $17)
(get_local $6)
)
)
)
(f32.store offset=36
(get_local $0)
(f32.add
(f32.add
(f32.add
(f32.mul
(get_local $15)
(get_local $3)
)
(f32.mul
(get_local $16)
(get_local $4)
)
)
(f32.mul
(get_local $14)
(get_local $5)
)
)
(f32.mul
(get_local $13)
(get_local $6)
)
)
)
(f32.store offset=32
(get_local $0)
(f32.add
(f32.add
(f32.add
(f32.mul
(get_local $11)
(get_local $3)
)
(f32.mul
(get_local $12)
(get_local $4)
)
)
(f32.mul
(get_local $10)
(get_local $5)
)
)
(f32.mul
(get_local $9)
(get_local $6)
)
)
)
(f32.store offset=60
(get_local $0)
(f32.add
(f32.add
(f32.add
(f32.mul
(get_local $7)
(tee_local $3
(f32.load offset=48
(get_local $2)
)
)
)
(f32.mul
(get_local $8)
(tee_local $7
(f32.load offset=52
(get_local $2)
)
)
)
)
(f32.mul
(get_local $21)
(tee_local $8
(f32.load offset=56
(get_local $2)
)
)
)
)
(f32.mul
(get_local $22)
(tee_local $4
(f32.load offset=60
(get_local $2)
)
)
)
)
)
(f32.store offset=56
(get_local $0)
(f32.add
(f32.add
(f32.add
(f32.mul
(get_local $19)
(get_local $3)
)
(f32.mul
(get_local $20)
(get_local $7)
)
)
(f32.mul
(get_local $18)
(get_local $8)
)
)
(f32.mul
(get_local $17)
(get_local $4)
)
)
)
(f32.store offset=52
(get_local $0)
(f32.add
(f32.add
(f32.add
(f32.mul
(get_local $15)
(get_local $3)
)
(f32.mul
(get_local $16)
(get_local $7)
)
)
(f32.mul
(get_local $14)
(get_local $8)
)
)
(f32.mul
(get_local $13)
(get_local $4)
)
)
)
(f32.store offset=48
(get_local $0)
(f32.add
(f32.add
(f32.add
(f32.mul
(get_local $11)
(get_local $3)
)
(f32.mul
(get_local $12)
(get_local $7)
)
)
(f32.mul
(get_local $10)
(get_local $8)
)
)
(f32.mul
(get_local $9)
(get_local $4)
)
)
)
)
void multiply(float* out, float* a, float* b) {
float a00 = a[0];
float a01 = a[1];
float a02 = a[2];
float a03 = a[3];
float a10 = a[4];
float a11 = a[5];
float a12 = a[6];
float a13 = a[7];
float a20 = a[8];
float a21 = a[9];
float a22 = a[10];
float a23 = a[11];
float a30 = a[12];
float a31 = a[13];
float a32 = a[14];
float a33 = a[15];
// Cache only the current line of the second matrix
float b0 = b[0];
float b1 = b[1];
float b2 = b[2];
float b3 = b[3];
out[0] = b0*a00 + b1*a10 + b2*a20 + b3*a30;
out[1] = b0*a01 + b1*a11 + b2*a21 + b3*a31;
out[2] = b0*a02 + b1*a12 + b2*a22 + b3*a32;
out[3] = b0*a03 + b1*a13 + b2*a23 + b3*a33;
b0 = b[4];
b1 = b[5];
b2 = b[6];
b3 = b[7];
out[4] = b0*a00 + b1*a10 + b2*a20 + b3*a30;
out[5] = b0*a01 + b1*a11 + b2*a21 + b3*a31;
out[6] = b0*a02 + b1*a12 + b2*a22 + b3*a32;
out[7] = b0*a03 + b1*a13 + b2*a23 + b3*a33;
b0 = b[8];
b1 = b[9];
b2 = b[10];
b3 = b[11];
out[8] = b0*a00 + b1*a10 + b2*a20 + b3*a30;
out[9] = b0*a01 + b1*a11 + b2*a21 + b3*a31;
out[10] = b0*a02 + b1*a12 + b2*a22 + b3*a32;
out[11] = b0*a03 + b1*a13 + b2*a23 + b3*a33;
b0 = b[12];
b1 = b[13];
b2 = b[14];
b3 = b[15];
out[12] = b0*a00 + b1*a10 + b2*a20 + b3*a30;
out[13] = b0*a01 + b1*a11 + b2*a21 + b3*a31;
out[14] = b0*a02 + b1*a12 + b2*a22 + b3*a32;
out[15] = b0*a03 + b1*a13 + b2*a23 + b3*a33;
}
pub fn multiply(out: &mut Matrix4, a: &Matrix4, b: &Matrix4) {
let a00 = a.0;
let a01 = a.1;
let a02 = a.2;
let a03 = a.3;
let a10 = a.4;
let a11 = a.5;
let a12 = a.6;
let a13 = a.7;
let a20 = a.8;
let a21 = a.9;
let a22 = a.10;
let a23 = a.11;
let a30 = a.12;
let a31 = a.13;
let a32 = a.14;
let a33 = a.15;
// Cache only the current line of the second matrix
let mut b0 = b.0;
let mut b1 = b.1;
let mut b2 = b.2;
let mut b3 = b.3;
out.0 = b0 * a00 + b1 * a10 + b2 * a20 + b3 * a30;
out.1 = b0 * a01 + b1 * a11 + b2 * a21 + b3 * a31;
out.2 = b0 * a02 + b1 * a12 + b2 * a22 + b3 * a32;
out.3 = b0 * a03 + b1 * a13 + b2 * a23 + b3 * a33;
b0 = b.4;
b1 = b.5;
b2 = b.6;
b3 = b.7;
out.4 = b0 * a00 + b1 * a10 + b2 * a20 + b3 * a30;
out.5 = b0 * a01 + b1 * a11 + b2 * a21 + b3 * a31;
out.6 = b0 * a02 + b1 * a12 + b2 * a22 + b3 * a32;
out.7 = b0 * a03 + b1 * a13 + b2 * a23 + b3 * a33;
b0 = b.8;
b1 = b.9;
b2 = b.10;
b3 = b.11;
out.8 = b0 * a00 + b1 * a10 + b2 * a20 + b3 * a30;
out.9 = b0 * a01 + b1 * a11 + b2 * a21 + b3 * a31;
out.10 = b0 * a02 + b1 * a12 + b2 * a22 + b3 * a32;
out.11 = b0 * a03 + b1 * a13 + b2 * a23 + b3 * a33;
b0 = b.12;
b1 = b.13;
b2 = b.14;
b3 = b.15;
out.12 = b0 * a00 + b1 * a10 + b2 * a20 + b3 * a30;
out.13 = b0 * a01 + b1 * a11 + b2 * a21 + b3 * a31;
out.14 = b0 * a02 + b1 * a12 + b2 * a22 + b3 * a32;
out.15 = b0 * a03 + b1 * a13 + b2 * a23 + b3 * a33;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment