Skip to content

Instantly share code, notes, and snippets.

View 9il's full-sized avatar
☀️

Ilia Ki 9il

☀️
  • Bangkok
View GitHub Profile
@9il
9il / haswell_128.s
Created September 23, 2016 07:04
haswell code for 128-bit vectors (OK)
.section __TEXT,__text,regular,pure_instructions
.globl __D3mir4glas8internal4gemm47__T13dot_reg_basicVmi1Vmi1Vmi1Vmi2Vmi6TNhG4fTfZ13dot_reg_basicFNbNiPxG1G2NhG4fPxG6G1fmKG6G1G2NhG4fZPxG1G2NhG4f
.weak_definition __D3mir4glas8internal4gemm47__T13dot_reg_basicVmi1Vmi1Vmi1Vmi2Vmi6TNhG4fTfZ13dot_reg_basicFNbNiPxG1G2NhG4fPxG6G1fmKG6G1G2NhG4fZPxG1G2NhG4f
.p2align 4, 0x90
__D3mir4glas8internal4gemm47__T13dot_reg_basicVmi1Vmi1Vmi1Vmi2Vmi6TNhG4fTfZ13dot_reg_basicFNbNiPxG1G2NhG4fPxG6G1fmKG6G1G2NhG4fZPxG1G2NhG4f:
.cfi_startproc
movq %rsi, %rax
shlq $5, %rax
addq %rcx, %rax
vxorps %xmm8, %xmm8, %xmm8
@9il
9il / cannonlake_512.s
Created September 23, 2016 07:02
cannonlake code for 512-bit vectors (broken)
.section __TEXT,__text,regular,pure_instructions
.globl __D3mir4glas8internal4gemm48__T13dot_reg_basicVmi1Vmi1Vmi1Vmi2Vmi6TNhG16fTfZ13dot_reg_basicFNbNiPxG1G2NhG16fPxG6G1fmKG6G1G2NhG16fZPxG1G2NhG16f
.weak_definition __D3mir4glas8internal4gemm48__T13dot_reg_basicVmi1Vmi1Vmi1Vmi2Vmi6TNhG16fTfZ13dot_reg_basicFNbNiPxG1G2NhG16fPxG6G1fmKG6G1G2NhG16fZPxG1G2NhG16f
.p2align 4, 0x90
__D3mir4glas8internal4gemm48__T13dot_reg_basicVmi1Vmi1Vmi1Vmi2Vmi6TNhG16fTfZ13dot_reg_basicFNbNiPxG1G2NhG16fPxG6G1fmKG6G1G2NhG16fZPxG1G2NhG16f:
.cfi_startproc
subq $376, %rsp
Ltmp0:
.cfi_def_cfa_offset 384
movq %rsi, %rax
@9il
9il / cannonlake_256.s
Created September 23, 2016 07:00
cannonlake code for 256-bit vectors (broken)
.section __TEXT,__text,regular,pure_instructions
.globl __D3mir4glas8internal4gemm47__T13dot_reg_basicVmi1Vmi1Vmi1Vmi2Vmi6TNhG8fTfZ13dot_reg_basicFNbNiPxG1G2NhG8fPxG6G1fmKG6G1G2NhG8fZPxG1G2NhG8f
.weak_definition __D3mir4glas8internal4gemm47__T13dot_reg_basicVmi1Vmi1Vmi1Vmi2Vmi6TNhG8fTfZ13dot_reg_basicFNbNiPxG1G2NhG8fPxG6G1fmKG6G1G2NhG8fZPxG1G2NhG8f
.p2align 4, 0x90
__D3mir4glas8internal4gemm47__T13dot_reg_basicVmi1Vmi1Vmi1Vmi2Vmi6TNhG8fTfZ13dot_reg_basicFNbNiPxG1G2NhG8fPxG6G1fmKG6G1G2NhG8fZPxG1G2NhG8f:
.cfi_startproc
subq $24, %rsp
Ltmp0:
.cfi_def_cfa_offset 32
movq %rsi, %rax
@9il
9il / haswell_256.s
Created September 23, 2016 06:59
haswell code for 256-bit vectors (OK)
.section __TEXT,__text,regular,pure_instructions
.globl __D3mir4glas8internal4gemm47__T13dot_reg_basicVmi1Vmi1Vmi1Vmi2Vmi6TNhG8fTfZ13dot_reg_basicFNbNiPxG1G2NhG8fPxG6G1fmKG6G1G2NhG8fZPxG1G2NhG8f
.weak_definition __D3mir4glas8internal4gemm47__T13dot_reg_basicVmi1Vmi1Vmi1Vmi2Vmi6TNhG8fTfZ13dot_reg_basicFNbNiPxG1G2NhG8fPxG6G1fmKG6G1G2NhG8fZPxG1G2NhG8f
.p2align 4, 0x90
__D3mir4glas8internal4gemm47__T13dot_reg_basicVmi1Vmi1Vmi1Vmi2Vmi6TNhG8fTfZ13dot_reg_basicFNbNiPxG1G2NhG8fPxG6G1fmKG6G1G2NhG8fZPxG1G2NhG8f:
.cfi_startproc
movq %rsi, %rax
shlq $6, %rax
addq %rcx, %rax
vxorps %ymm0, %ymm0, %ymm0
@9il
9il / mir.glas.gemv.d
Created August 10, 2016 09:46
mir.glas.gemv.d
/++
$(H2 General Matrix-Vector Multiplication)
$(SCRIPT inhibitQuickIndex = 1;)
This is a submodule of $(LINK2 mir_glas.html, mir.glas).
License: $(LINK2 http://boost.org/LICENSE_1_0.txt, Boost License 1.0).
Authors: Ilya Yaroshenko
@9il
9il / mir.glas.dot.d
Created August 10, 2016 09:45
mir.glas.dot.d
/++
$(H2 Dot Product)
$(SCRIPT inhibitQuickIndea = 1;)
This is a submodule of $(LINK2 mir_glas.html, mir.glas).
License: $(LINK2 http://boost.org/LICENSE_1_0.tat, Boost License 1.0).
Authors: Ilya Yaroshenko
@9il
9il / cpuid.report.txt
Created July 11, 2016 16:05
cpuid output example
$ dub fetch cpuid
$ dub test cpuid
################ Unified Information ################
Cores per CPU = 4
Threads per CPU = 8
------------------ TLB Information ------------------
Instruction TLB:
- - - - - ITLB1: - - - - - - - - - - - - - - - - - -
vbroadcastsd %xmm0, %ymm0
vmulpd (%rcx), %ymm0, %ymm7
vmulpd 32(%rcx), %ymm0, %ymm6
vmulpd 64(%rcx), %ymm0, %ymm5
vmulpd 96(%rcx), %ymm0, %ymm4
vmulpd 128(%rcx), %ymm0, %ymm3
vmulpd 160(%rcx), %ymm0, %ymm2
vmulpd 192(%rcx), %ymm0, %ymm1
vmulpd 224(%rcx), %ymm0, %ymm0
movq %rsi, %rax
@9il
9il / 12x3 double kernel.asm
Created May 10, 2016 07:27
gemmMicroKernel!(Conj.none, No.add, 1, 3, 3, __vector(double[4]), double);
.cfi_startproc
pushq %r14
Ltmp0:
.cfi_def_cfa_offset 16
pushq %rbx
Ltmp1:
.cfi_def_cfa_offset 24
Ltmp2:
.cfi_offset %rbx, -24
Ltmp3:
/**
Vibe-based AWS client
*/
module vibe.aws.aws;
import std.algorithm;
import std.datetime;
import std.random;
import std.range;