This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// a0 and a1 are row-major Array | |
Vector3f a_err = (std::abs((a0 - a1)/a0)).colwise().sum()/a0.rows(); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
v_fma_f64 v[29:30], v[39:40], v[41:42], v[29:30] // 0000025C: D298001D 04765327 | |
v_fma_f64 v[31:32], v[39:40], v[43:44], v[31:32] // 00000264: D298001F 047E5727 | |
v_fma_f64 v[33:34], v[39:40], v[45:46], v[33:34] // 0000026C: D2980021 04865B27 | |
v_fma_f64 v[35:36], v[39:40], v[47:48], v[35:36] // 00000274: D2980023 048E5F27 | |
s_branch label_002E // 0000027C: BF82FF8E |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
shader main | |
asic(SI_ASIC) | |
type(PS) | |
// s_ps_state in s0 | |
s_mov_b64 s[44:45], exec // 00000000: BEAC047E | |
s_wqm_b64 exec, exec // 00000004: BEFE0A7E | |
v_floor_f32 v0, v2 // 00000008: 7E004902 | |
v_floor_f32 v1, v3 // 0000000C: 7E024903 | |
v_mul_legacy_f32 v55, 2.0, v0 // 00000010: 0E6E00F4 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
shader main | |
asic(SI_ASIC) | |
type(PS) | |
// s_ps_state in s0 | |
s_mov_b64 s[44:45], exec // 00000000: BEAC047E | |
s_wqm_b64 exec, exec // 00000004: BEFE0A7E | |
s_load_dwordx4 s[12:15], s[10:11], 0x00 // 00000008: C0860B00 | |
v_floor_f32 v0, v2 // 0000000C: 7E004902 | |
v_floor_f32 v1, v3 // 00000010: 7E024903 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <time.h> | |
#include <sys/time.h> | |
#include <sys/resource.h> | |
// How to use? | |
// | |
// double st = e_time(); | |
// ... where computation going on ... | |
// double en = e_time(); | |
// |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#define NNMAX 16 | |
float R2(float4 p) | |
{ | |
return p.x*p.x + p.y*p.y + p.z*p.z; | |
} | |
__kernel void sph_neighbor(__global float4 *pos, | |
__global float *size, | |
__global int *next, | |
__global int *more, |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#define READONLY_P const * restrict | |
__kernel | |
void | |
grav1( | |
__global float4 READONLY_P x, | |
__global float4 READONLY_P y, | |
__global float4 READONLY_P z, | |
__global float4 READONLY_P m, | |
__global float4 *ax, |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#define READONLY_P const * restrict | |
float4 sum(float8 x) | |
{ | |
float4 tmp; | |
tmp.x = x.s0 + x.s4; | |
tmp.y = x.s1 + x.s5; | |
tmp.z = x.s2 + x.s6; | |
tmp.w = x.s3 + x.s7; | |
return tmp; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def self.get(url) | |
return self.new(Open3.popen3("curl", "--location", "--compressed", url) {|i,o,e| o.read }) | |
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
.file "/tmp/5d41e25b-e85e-4f10-836a-5b23eab3f6a7.TMP" | |
.text | |
.globl _Z12native_rsqrtDv8_f | |
.align 16, 0x90 | |
.type _Z12native_rsqrtDv8_f,@function | |
_Z12native_rsqrtDv8_f: # @_Z12native_rsqrtDv8_f | |
# BB#0: | |
vrsqrtps YMM0, YMM0 | |
ret | |
.Ltmp0: |
OlderNewer