Last active
July 27, 2020 07:47
-
-
Save coldnight/66f62654952bb707ff89b49ba261860c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Copyright 2014 The Go Authors. All rights reserved. | |
// Use of this source code is governed by a BSD-style | |
// license that can be found in the LICENSE file. | |
// +build !plan9 | |
#include "textflag.h" | |
// NOTE: Windows externalthreadhandler expects memclr to preserve DX. | |
// void runtime·memclr(void* ptr, uintptr n) | |
// 参考资料: | |
// | |
// - https://quasilyte.github.io/blog/post/go-asm-complementary-reference/ | |
// - https://gocn.vip/article/733 | |
// - https://github.com/golang/arch/blob/master/x86/x86.csv | |
// - https://golang.org/doc/asm | |
// | |
// - TEXT 在 Plan9 中用于声明函数 | |
// - · 可在 Mac 下通过 Option+Shift+9 打出 | |
// - · 前是包名后面是函数名 | |
// - SB(伪寄存器)全局静态基指针,用来声明函数或全局变量(此处用来声明函数) | |
// - NOSPLIT 定义在 https://github.com/golang/go/blob/master/src/runtime/textflag.h | |
// - $0-16: | |
// + $0 栈帧大小为0(局部变量+可能需要的额外调用函数的参数空间总大小,不包括调用其他函数时的 ret address 的大小) | |
// + 16 参数基返回值的大小(16 表示两个双四字的参数) | |
TEXT runtime·memclr(SB), NOSPLIT, $0-16 | |
// FP(伪寄存器):通过 `symbol+offset(FP)` 的方式引用输入参数 | |
// symbol 没有任何用,只是增加可读性,但不能省略 | |
// FP 指向整个栈帧的底部的 BP 寄存器 | |
MOVQ ptr+0(FP), DI // 第一个参数移动到 DI 寄存器(DI 目标索引寄存器) | |
MOVQ n+8(FP), BX // 第二个参数移动到 BX 寄存器(BX 为机制寄存器,用于内存寻址) | |
XORQ AX, AX // 清零 AX 寄存器(AX 为累加寄存器) | |
// MOVOU seems always faster than REP STOSQ. | |
tail: | |
TESTQ BX, BX // set ZF to 1 if n is 0 | |
JEQ _0 // jump to _0 if ZF == 1(returns) | |
CMPQ BX, $2 | |
JBE _1or2 // jump to _1or2 if n <= 2 | |
CMPQ BX, $4 | |
JBE _3or4 // jump to _3or4 if n > 2 and n <= 4 | |
CMPQ BX, $8 | |
// ... | |
JBE _5through8 | |
CMPQ BX, $16 | |
JBE _9through16 | |
// 大于 16 开始使用 128 位寄存器 X0, | |
// PXOR 将 X0 寄存器置为 0 | |
PXOR X0, X0 | |
CMPQ BX, $32 | |
JBE _17through32 | |
CMPQ BX, $64 | |
JBE _33through64 | |
CMPQ BX, $128 | |
JBE _65through128 | |
CMPQ BX, $256 | |
JBE _129through256 | |
// TODO: use branch table and BSR to make this just a single dispatch | |
// TODO: for really big clears, use MOVNTDQ. | |
// 大于 256 则通过循环 | |
loop: | |
// MOVOU 相当于 AT&T/Intel 的 MOVDQU -- 移动非对齐的双四字 | |
// X0 相当与 AT&T/Intel 的 SSE 新增的 %xmm0(128位元暂存器) | |
// 参见 https://zh.wikipedia.org/wiki/SSE | |
MOVOU X0, 0(DI) | |
MOVOU X0, 16(DI) | |
MOVOU X0, 32(DI) | |
MOVOU X0, 48(DI) | |
MOVOU X0, 64(DI) | |
MOVOU X0, 80(DI) | |
MOVOU X0, 96(DI) | |
MOVOU X0, 112(DI) | |
MOVOU X0, 128(DI) | |
MOVOU X0, 144(DI) | |
MOVOU X0, 160(DI) | |
MOVOU X0, 176(DI) | |
MOVOU X0, 192(DI) | |
MOVOU X0, 208(DI) | |
MOVOU X0, 224(DI) | |
MOVOU X0, 240(DI) | |
SUBQ $256, BX // 递减 BX | |
ADDQ $256, DI // 递增 DI | |
CMPQ BX, $256 | |
JAE loop // 如果 BX 依然大于 256 则继续循环 | |
JMP tail // 否则进入 tail | |
_1or2: | |
MOVB AX, (DI) | |
MOVB AX, -1(DI)(BX*1) | |
RET | |
_0: | |
RET | |
_3or4: | |
MOVW AX, (DI) | |
MOVW AX, -2(DI)(BX*1) | |
RET | |
_5through8: | |
MOVL AX, (DI) | |
MOVL AX, -4(DI)(BX*1) | |
RET | |
_9through16: | |
MOVQ AX, (DI) | |
MOVQ AX, -8(DI)(BX*1) | |
RET | |
_17through32: | |
MOVOU X0, (DI) | |
MOVOU X0, -16(DI)(BX*1) | |
RET | |
_33through64: | |
MOVOU X0, (DI) | |
MOVOU X0, 16(DI) | |
MOVOU X0, -32(DI)(BX*1) | |
MOVOU X0, -16(DI)(BX*1) | |
RET | |
_65through128: | |
MOVOU X0, (DI) | |
MOVOU X0, 16(DI) | |
MOVOU X0, 32(DI) | |
MOVOU X0, 48(DI) | |
MOVOU X0, -64(DI)(BX*1) | |
MOVOU X0, -48(DI)(BX*1) | |
MOVOU X0, -32(DI)(BX*1) | |
MOVOU X0, -16(DI)(BX*1) | |
RET | |
_129through256: | |
MOVOU X0, (DI) | |
MOVOU X0, 16(DI) | |
MOVOU X0, 32(DI) | |
MOVOU X0, 48(DI) | |
MOVOU X0, 64(DI) | |
MOVOU X0, 80(DI) | |
MOVOU X0, 96(DI) | |
MOVOU X0, 112(DI) | |
MOVOU X0, -128(DI)(BX*1) | |
MOVOU X0, -112(DI)(BX*1) | |
MOVOU X0, -96(DI)(BX*1) | |
MOVOU X0, -80(DI)(BX*1) | |
MOVOU X0, -64(DI)(BX*1) | |
MOVOU X0, -48(DI)(BX*1) | |
MOVOU X0, -32(DI)(BX*1) | |
MOVOU X0, -16(DI)(BX*1) | |
RET |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment