Skip to content

Instantly share code, notes, and snippets.

@klauspost
Last active August 29, 2015 14:23
Show Gist options
  • Save klauspost/4283f3bd450376e52eb6 to your computer and use it in GitHub Desktop.
Save klauspost/4283f3bd450376e52eb6 to your computer and use it in GitHub Desktop.
go memset (byte slice)
package memset
func memset(dst []byte, value byte)
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build !plan9
#include "textflag.h"
// NOTE: Windows externalthreadhandler expects memclr to preserve DX.
// void memset(dst []byte, value byte)
TEXT ·memset(SB), NOSPLIT, $0-25
XORQ AX,AX // clear upper bytes
MOVQ $0x0101010101010101, R8 // Multiply input by this to spread to all values
MOVQ dst+0(FP), DI
MOVQ dst_len+8(FP), BX
MOVB value+24(FP), AX
IMULQ R8, AX
MOVQ AX, X0 // Move to XMM0
PUNPCKLQDQ X0, X0 // Unpack to upper bits (copy lower 64 bit to high)
// MOVOU seems always faster than REP STOSQ.
clr_tail:
TESTQ BX, BX
JEQ clr_0
CMPQ BX, $2
JBE clr_1or2
CMPQ BX, $4
JBE clr_3or4
CMPQ BX, $8
JBE clr_5through8
CMPQ BX, $16
JBE clr_9through16
CMPQ BX, $32
JBE clr_17through32
CMPQ BX, $64
JBE clr_33through64
CMPQ BX, $128
JBE clr_65through128
CMPQ BX, $256
JBE clr_129through256
// TODO: use branch table and BSR to make this just a single dispatch
// TODO: for really big clears, use MOVNTDQ.
clr_loop:
MOVOU X0, 0(DI)
MOVOU X0, 16(DI)
MOVOU X0, 32(DI)
MOVOU X0, 48(DI)
MOVOU X0, 64(DI)
MOVOU X0, 80(DI)
MOVOU X0, 96(DI)
MOVOU X0, 112(DI)
MOVOU X0, 128(DI)
MOVOU X0, 144(DI)
MOVOU X0, 160(DI)
MOVOU X0, 176(DI)
MOVOU X0, 192(DI)
MOVOU X0, 208(DI)
MOVOU X0, 224(DI)
MOVOU X0, 240(DI)
SUBQ $256, BX
ADDQ $256, DI
CMPQ BX, $256
JAE clr_loop
JMP clr_tail
clr_1or2:
MOVB AX, (DI)
MOVB AX, -1(DI)(BX*1)
RET
clr_0:
RET
clr_3or4:
MOVW AX, (DI)
MOVW AX, -2(DI)(BX*1)
RET
clr_5through8:
MOVL AX, (DI)
MOVL AX, -4(DI)(BX*1)
RET
clr_9through16:
MOVQ AX, (DI)
MOVQ AX, -8(DI)(BX*1)
RET
clr_17through32:
MOVOU X0, (DI)
MOVOU X0, -16(DI)(BX*1)
RET
clr_33through64:
MOVOU X0, (DI)
MOVOU X0, 16(DI)
MOVOU X0, -32(DI)(BX*1)
MOVOU X0, -16(DI)(BX*1)
RET
clr_65through128:
MOVOU X0, (DI)
MOVOU X0, 16(DI)
MOVOU X0, 32(DI)
MOVOU X0, 48(DI)
MOVOU X0, -64(DI)(BX*1)
MOVOU X0, -48(DI)(BX*1)
MOVOU X0, -32(DI)(BX*1)
MOVOU X0, -16(DI)(BX*1)
RET
clr_129through256:
MOVOU X0, (DI)
MOVOU X0, 16(DI)
MOVOU X0, 32(DI)
MOVOU X0, 48(DI)
MOVOU X0, 64(DI)
MOVOU X0, 80(DI)
MOVOU X0, 96(DI)
MOVOU X0, 112(DI)
MOVOU X0, -128(DI)(BX*1)
MOVOU X0, -112(DI)(BX*1)
MOVOU X0, -96(DI)(BX*1)
MOVOU X0, -80(DI)(BX*1)
MOVOU X0, -64(DI)(BX*1)
MOVOU X0, -48(DI)(BX*1)
MOVOU X0, -32(DI)(BX*1)
MOVOU X0, -16(DI)(BX*1)
RET
package memset
import (
"testing"
)
func TestByteArray(t *testing.T) {
for size := 0; size < 1000; size++ {
b := make([]byte, size)
for i := range b {
b[i] = byte(i)
}
memset(b, 31)
for i := range b {
if b[i] != 31 {
t.Fatal("Expected value at index", i, "to be 31, it was", b[i])
}
}
// Test sub-slice
if size < 75 {
continue
}
// Reset
for i := range b {
b[i] = byte(i)
}
memset(b[35:75], 65)
for i := range b {
if i >= 35 && i < 75 {
if b[i] != 65 {
t.Log("Expected value at index", i, "to be 65, it was", b[i])
}
} else if b[i] != byte(i) {
t.Fatal("Expected value at index", i, "to be ", i, ", it was", b[i])
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment