Skip to content

Instantly share code, notes, and snippets.

View syoyo's full-sized avatar
💗
ray tracing

Syoyo Fujita syoyo

💗
ray tracing
View GitHub Profile
$ cat test.n
(none
; This top-level code is dynamic and compiled to Lua code
(function printhello ()
; all statements are symbolic expressions
; known from Lisp and Scheme
(print "None is as dynamic as this!"))
(static mymul(a b)
@syoyo
syoyo / gist:23569871d2557a68f452
Created January 12, 2015 04:38
Simple pipeline analysis
// == sim start ==
00000001 : fmadd %f102, %f66, %f64, %f112
00000002 : fmadd %f2, %f6, %f4, %f64
-- STALL -- : f112(5), f64(6)
-- STALL -- : f112(4), f64(5)
-- STALL -- : f112(3), f64(4)
-- STALL -- : f112(2), f64(3)
-- STALL -- : f112(1), f64(2)
-- STALL -- : f64(1)
@syoyo
syoyo / gist:07dc264f4c5952a456be
Created April 4, 2015 15:48
exp() approximation in HPC-ACE(const value loading is not yet optimised)
#include <cstdio>
#include <cmath>
#include <emmintrin.h>
#define _mm_set1_pd(x) _mm_set_pd((x), (x))
// Based on http://www.chokkan.org/blog/archives/340
inline __m128d myexp(__m128d v)
{
@syoyo
syoyo / gist:2f89e50edd74d4179d03
Created April 4, 2015 16:39
SIMD log2() approximate function in HPC-ACE(not yet optimised)
// Based on glsl-sse2
inline __m128d mylog2(const __m128d v) {
int ibuf[4];
__m128d o = _mm_set_pd(1.0, 1.0);
__m128i infVal = _mm_set_epi32(0x7FF00000, 0x00000000, 0x7FF00000, 0x00000000);
__m128d c = *(reinterpret_cast<__m128d*>(&infVal));
__m128d f = _mm_sub_pd(_mm_or_pd(_mm_andnot_pd(c, v),
_mm_and_pd(c, o)), o);
//const __m128i iVal = *(reinterpret_cast<const __m128i*>(&v));
//__m128i a = _mm_sub_epi32(_mm_srli_epi32(iVal, 20),
@syoyo
syoyo / gist:d80cb6f9936aa5f290da
Created April 18, 2015 16:45
exp(x) using trigonometric function in HPC-ACE
#include <cstdio>
#include <cmath>
#include <emmintrin.h>
#define _mm_set1_pd(x) _mm_set_pd((x), (x))
// Probably somewhat faster.
inline __m128d fastexp(__m128d v)
{
const __m128d inv_log2 = _mm_set1_pd(1.4426950408889634073599);
PRE_ALIGN(128) struct __vec8_d {
__vec8_d() { }
FORCEINLINE __vec8_d(const double v0) {
u.v0 = _mm_set_pd(v0, v0);
u.v1 = _mm_set_pd(v0, v0);
u.v2 = _mm_set_pd(v0, v0);
u.v3 = _mm_set_pd(v0, v0);
}
@syoyo
syoyo / gist:a8ba3b6fb1f2d51e4fff
Created May 4, 2015 16:15
1/x with -Kfast,nounroll,noswp
L21:
.LSSN2:
/* 12 */ frcpad %f0,%f32
/* 12 */ sethi %h44(.LR0),%g1
/* 12 */ or %g1,%m44(.LR0),%g1
/* 12 */ sllx %g1,12,%g1
@syoyo
syoyo / gist:78f6ddb6f99deb49f8ae
Created May 6, 2015 15:41
fmath performance(2015/05/06)
# K frontend
$ FCCpx -Kfast,nounroll,noswp exp_bench.cpp fmath.cpp
exp_bench.cpp:
fmath.cpp:
"fmath.cpp", line 30: warning: variable "fmath::local::LOG_TABLE_SIZE" was declared but never referenced
const size_t LOG_TABLE_SIZE = 12;
^
@syoyo
syoyo / gist:861c3d78ffc6ac5aabd0
Created May 7, 2015 10:03
disassemble of double fmath::exp(double) in SPARC/HPC-ACE
_ZN5fmath3expEd:
.LLFB2:
.L46:
.LSSN28:
/* 167 */ add %sp,-208,%sp
.L47:
.LSSN29:
@syoyo
syoyo / gist:1516aa3e8e5489871fdd
Created May 8, 2015 06:59
doubl2 exp(double2) performance on SPARC/HPC-ACE
#include <cmath>
#include <emmintrin.h>
#define _mm_set1_pd(x) _mm_set_pd((x), (x))
#define FORCE_INLINE __attriabute__((force_inline))
// Compute exp(x) using trigonometric function.
inline __m128d exp_tri(__m128d v)
{