View gist:1557145
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// a0 and a1 are row-major Array | |
Vector3f a_err = (std::abs((a0 - a1)/a0)).colwise().sum()/a0.rows(); |
View gist:1582789
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
v_fma_f64 v[29:30], v[39:40], v[41:42], v[29:30] // 0000025C: D298001D 04765327 | |
v_fma_f64 v[31:32], v[39:40], v[43:44], v[31:32] // 00000264: D298001F 047E5727 | |
v_fma_f64 v[33:34], v[39:40], v[45:46], v[33:34] // 0000026C: D2980021 04865B27 | |
v_fma_f64 v[35:36], v[39:40], v[47:48], v[35:36] // 00000274: D2980023 048E5F27 | |
s_branch label_002E // 0000027C: BF82FF8E |
View Tahiti_DGEMM.isa
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
shader main | |
asic(SI_ASIC) | |
type(PS) | |
// s_ps_state in s0 | |
s_mov_b64 s[44:45], exec // 00000000: BEAC047E | |
s_wqm_b64 exec, exec // 00000004: BEFE0A7E | |
v_floor_f32 v0, v2 // 00000008: 7E004902 | |
v_floor_f32 v1, v3 // 0000000C: 7E024903 | |
v_mul_legacy_f32 v55, 2.0, v0 // 00000010: 0E6E00F4 |
View gist:1700860
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
shader main | |
asic(SI_ASIC) | |
type(PS) | |
// s_ps_state in s0 | |
s_mov_b64 s[44:45], exec // 00000000: BEAC047E | |
s_wqm_b64 exec, exec // 00000004: BEFE0A7E | |
s_load_dwordx4 s[12:15], s[10:11], 0x00 // 00000008: C0860B00 | |
v_floor_f32 v0, v2 // 0000000C: 7E004902 | |
v_floor_f32 v1, v3 // 00000010: 7E024903 |
View e_time.c
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <time.h> | |
#include <sys/time.h> | |
#include <sys/resource.h> | |
// How to use? | |
// | |
// double st = e_time(); | |
// ... where computation going on ... | |
// double en = e_time(); | |
// |
View sph_neighbor.cl
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#define NNMAX 16 | |
float R2(float4 p) | |
{ | |
return p.x*p.x + p.y*p.y + p.z*p.z; | |
} | |
__kernel void sph_neighbor(__global float4 *pos, | |
__global float *size, | |
__global int *next, | |
__global int *more, |
View gravity_v4.cl
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#define READONLY_P const * restrict | |
__kernel | |
void | |
grav1( | |
__global float4 READONLY_P x, | |
__global float4 READONLY_P y, | |
__global float4 READONLY_P z, | |
__global float4 READONLY_P m, | |
__global float4 *ax, |
View gravity_v4v2.cl
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#define READONLY_P const * restrict | |
float4 sum(float8 x) | |
{ | |
float4 tmp; | |
tmp.x = x.s0 + x.s4; | |
tmp.y = x.s1 + x.s5; | |
tmp.z = x.s2 + x.s6; | |
tmp.w = x.s3 + x.s7; | |
return tmp; |
View html.rb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def self.get(url) | |
return self.new(Open3.popen3("curl", "--location", "--compressed", url) {|i,o,e| o.read }) | |
end |
View gravity_v4v2.asm
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
.file "/tmp/5d41e25b-e85e-4f10-836a-5b23eab3f6a7.TMP" | |
.text | |
.globl _Z12native_rsqrtDv8_f | |
.align 16, 0x90 | |
.type _Z12native_rsqrtDv8_f,@function | |
_Z12native_rsqrtDv8_f: # @_Z12native_rsqrtDv8_f | |
# BB#0: | |
vrsqrtps YMM0, YMM0 | |
ret | |
.Ltmp0: |
OlderNewer