Skip to content

Instantly share code, notes, and snippets.

@pascaldekloe
Created December 19, 2023 21:46
Show Gist options
  • Save pascaldekloe/1d3e5e407dffb781fc2a31e2033441ea to your computer and use it in GitHub Desktop.
Save pascaldekloe/1d3e5e407dffb781fc2a31e2033441ea to your computer and use it in GitHub Desktop.
Experimental Integer Compression (Status Unknown)
.global _pack32dec64
.text
.align 2
// Decode64 reads X2 amount of bytes at address X1 and it writes the 32
// encoded-values to address X0. The first delta is applied against X3.
// X2 must be a multiple of 4, and no more than 32 × 8 (bytes).
_pack32dec64:
// TODO(pascaldekloe): validate X2
and x2, x2, 252
adr x5, decode_table
add x5, x5, x2, lsl 1
br x5
decode_table:
adr x4, dec0b64
br x4
adr x4, dec1b64
br x4
adr x4, dec2b64
br x4
adr x4, dec3b64
br x4
.align 2
dec0b64: // no change; just copy offset
stp x3, x3, [x0], 16
stp x3, x3, [x0], 16
stp x3, x3, [x0], 16
stp x3, x3, [x0], 16
stp x3, x3, [x0], 16
stp x3, x3, [x0], 16
stp x3, x3, [x0], 16
stp x3, x3, [x0], 16
stp x3, x3, [x0], 16
stp x3, x3, [x0], 16
stp x3, x3, [x0], 16
stp x3, x3, [x0], 16
stp x3, x3, [x0], 16
stp x3, x3, [x0], 16
stp x3, x3, [x0], 16
stp x3, x3, [x0], 16
ret
dec1b64: // 1-bit reverse delta zig-zag encoding
ldr w4, [x1] // fetch all 4 bytes of input
// A 1-bit zig-zag encoding of reverse delta is either 0 for
// no change against the previous, or 1 for an increment.
sbfx x2, x4, 0, 1 // fetch bit #0
add x2, x2, x3 // against previous
sbfx x3, x4, 1, 1 // fetch bit #1
add x3, x3, x2
stp x2, x3, [x0], 16
sbfx x2, x4, 2, 1
add x2, x2, x3
sbfx x3, x4, 3, 1
add x3, x3, x2
stp x2, x3, [x0], 16
sbfx x2, x4, 4, 1
add x2, x2, x3
sbfx x3, x4, 5, 1
add x3, x3, x2
stp x2, x3, [x0], 16
sbfx x2, x4, 6, 1
add x2, x2, x3
sbfx x3, x4, 7, 1
add x3, x3, x2
stp x2, x3, [x0], 16
sbfx x2, x4, 8, 1
add x2, x2, x3
sbfx x3, x4, 9, 1
add x3, x3, x2
stp x2, x3, [x0], 16
sbfx x2, x4, 10, 1
add x2, x2, x3
sbfx x3, x4, 11, 1
add x3, x3, x2
stp x2, x3, [x0], 16
sbfx x2, x4, 12, 1
add x2, x2, x3
sbfx x3, x4, 13, 1
add x3, x3, x2
stp x2, x3, [x0], 16
sbfx x2, x4, 14, 1
add x2, x2, x3
sbfx x3, x4, 15, 1
add x3, x3, x2
stp x2, x3, [x0], 16
sbfx x2, x4, 16, 1
add x2, x2, x3
sbfx x3, x4, 17, 1
add x3, x3, x2
stp x2, x3, [x0], 16
sbfx x2, x4, 18, 1
add x2, x2, x3
sbfx x3, x4, 19, 1
add x3, x3, x2
stp x2, x3, [x0], 16
sbfx x2, x4, 20, 1
add x2, x2, x3
sbfx x3, x4, 21, 1
add x3, x3, x2
stp x2, x3, [x0], 16
sbfx x2, x4, 22, 1
add x2, x2, x3
sbfx x3, x4, 23, 1
add x3, x3, x2
stp x2, x3, [x0], 16
sbfx x2, x4, 24, 1
add x2, x2, x3
sbfx x3, x4, 25, 1
add x3, x3, x2
stp x2, x3, [x0], 16
sbfx x2, x4, 26, 1
add x2, x2, x3
sbfx x3, x4, 27, 1
add x3, x3, x2
stp x2, x3, [x0], 16
sbfx x2, x4, 28, 1
add x2, x2, x3
sbfx x3, x4, 29, 1
add x3, x3, x2
stp x2, x3, [x0], 16
sbfx x2, x4, 30, 1
add x2, x2, x3
sbfx x3, x4, 31, 1
add x3, x3, x2
stp x2, x3, [x0], 16
ret
dec2b64: // 2-bit reverse delta zig-zag encoding
ldr x4, [x1] // fetch all 8 bytes of input
sbfx x5, x4, 0, 1 // sign-extend first bit (equal to NEG)
ubfx x2, x4, 1, 1 // shift extract remaining bit
eor x2, x2, x5
sub x2, x3, x2 // against previous
sbfx x5, x4, 2, 1 // again …
ubfx x3, x4, 3, 1
eor x3, x3, x5
sub x3, x2, x3
stp x2, x3, [x0], 16
sbfx x5, x4, 4, 1
ubfx x2, x4, 5, 1
eor x2, x2, x5
sub x2, x3, x2
sbfx x5, x4, 6, 1
ubfx x3, x4, 7, 1
eor x3, x3, x5
sub x3, x2, x3
stp x2, x3, [x0], 16
sbfx x5, x4, 8, 1
ubfx x2, x4, 9, 1
eor x2, x2, x5
sub x2, x3, x2
sbfx x5, x4, 10, 1
ubfx x3, x4, 11, 1
eor x3, x3, x5
sub x3, x2, x3
stp x2, x3, [x0], 16
sbfx x5, x4, 12, 1
ubfx x2, x4, 13, 1
eor x2, x2, x5
sub x2, x3, x2
sbfx x5, x4, 14, 1
ubfx x3, x4, 15, 1
eor x3, x3, x5
sub x3, x2, x3
stp x2, x3, [x0], 16
sbfx x5, x4, 16, 1
ubfx x2, x4, 17, 1
eor x2, x2, x5
sub x2, x3, x2
sbfx x5, x4, 18, 1
ubfx x3, x4, 19, 1
eor x3, x3, x5
sub x3, x2, x3
stp x2, x3, [x0], 16
sbfx x5, x4, 20, 1
ubfx x2, x4, 21, 1
eor x2, x2, x5
sub x2, x3, x2
sbfx x5, x4, 22, 1
ubfx x3, x4, 23, 1
eor x3, x3, x5
sub x3, x2, x3
stp x2, x3, [x0], 16
sbfx x5, x4, 24, 1
ubfx x2, x4, 25, 1
eor x2, x2, x5
sub x2, x3, x2
sbfx x5, x4, 26, 1
ubfx x3, x4, 27, 1
eor x3, x3, x5
sub x3, x2, x3
stp x2, x3, [x0], 16
sbfx x5, x4, 28, 1
ubfx x2, x4, 29, 1
eor x2, x2, x5
sub x2, x3, x2
sbfx x5, x4, 30, 1
ubfx x3, x4, 31, 1
eor x3, x3, x5
sub x3, x2, x3
stp x2, x3, [x0], 16
sbfx x5, x4, 32, 1
ubfx x2, x4, 33, 1
eor x2, x2, x5
sub x2, x3, x2
sbfx x5, x4, 34, 1
ubfx x3, x4, 35, 1
eor x3, x3, x5
sub x3, x2, x3
stp x2, x3, [x0], 16
sbfx x5, x4, 36, 1
ubfx x2, x4, 37, 1
eor x2, x2, x5
sub x2, x3, x2
sbfx x5, x4, 37, 1
ubfx x3, x4, 39, 1
eor x3, x3, x5
sub x3, x2, x3
stp x2, x3, [x0], 16
sbfx x5, x4, 40, 1
ubfx x2, x4, 41, 1
eor x2, x2, x5
sub x2, x3, x2
sbfx x5, x4, 42, 1
ubfx x3, x4, 43, 1
eor x3, x3, x5
sub x3, x2, x3
stp x2, x3, [x0], 16
sbfx x5, x4, 44, 1
ubfx x2, x4, 45, 1
eor x2, x2, x5
sub x2, x3, x2
sbfx x5, x4, 46, 1
ubfx x3, x4, 47, 1
eor x3, x3, x5
sub x3, x2, x3
stp x2, x3, [x0], 16
sbfx x5, x4, 48, 1
ubfx x2, x4, 49, 1
eor x2, x2, x5
sub x2, x3, x2
sbfx x5, x4, 50, 1
ubfx x3, x4, 51, 1
eor x3, x3, x5
sub x3, x2, x3
stp x2, x3, [x0], 16
sbfx x5, x4, 52, 1
ubfx x2, x4, 53, 1
eor x2, x2, x5
sub x2, x3, x2
sbfx x5, x4, 54, 1
ubfx x3, x4, 55, 1
eor x3, x3, x5
sub x3, x2, x3
stp x2, x3, [x0], 16
sbfx x5, x4, 56, 1
ubfx x2, x4, 57, 1
eor x2, x2, x5
sub x2, x3, x2
sbfx x5, x4, 58, 1
ubfx x3, x4, 59, 1
eor x3, x3, x5
sub x3, x2, x3
stp x2, x3, [x0], 16
sbfx x5, x4, 60, 1
ubfx x2, x4, 61, 1
eor x2, x2, x5
sub x2, x3, x2
sbfx x5, x4, 62, 1
ubfx x3, x4, 63, 1
eor x3, x3, x5
sub x3, x2, x3
stp x2, x3, [x0], 16
ret
dec3b64: // 3-bit reverse delta zig-zag encoding
ret
NLHTSP:
.ascii "\n\t "
.align 2
// Dump64 writes the (32) 64-bit values from the stack in hexadecimal.
_dump64:
sub sp, sp, #16 // grow stack
str lr, [sp] // persist procedure call link register
// print tab ident
mov x0, 1 // file descriptor 1 is standard output
adr x1, NLHTSP+1 // data pointer
mov x2, #1 // data size
mov x16, #4 // write defined in <syscall.h>
svc #0x80 // invoke syscall
add x13, sp, #16 // first 64-bit integer
add x14, sp, #34*8 // last …
dump_next:
ldr x7, [x13], 8
bl print_x7_hex
// print space suffix
mov x0, 1 // file descriptor 1 is standard output
adr x1, NLHTSP+2 // data pointer
mov x2, #1 // data size, read-only
mov x16, #4 // write defined in <syscall.h>
svc #0x80 // invoke syscall
cmp x13, x14
ble dump_next
// print newline end
mov x0, 1 // file descriptor 1 is standard output
adr x1, NLHTSP // data pointer
mov x2, #1 // data size, read-only
mov x16, #4 // write defined in <syscall.h>
svc #0x80 // invoke syscall
ldr lr, [sp] // restore procedure call link register
add sp, sp, #16 // free stack
ret
hex_dict:
.ascii "0123456789abcdefgh"
.align 2
// Print_x7_hex writes the register value in hexadecimal to standard ouput
print_x7_hex:
adr x6, hex_dict // digits
// map 8 most-significant nibbles into x8
lsr x8, x7, #60
ldrb w8, [x6, x8] // map to digit in 32-bit register
lsr x5, x7, #56 // 2nd nibble
and x5, x5, 0xf
ldrb w5, [x6, x5]
orr x8, x8, x5, lsl 8
lsr x5, x7, #52 // 3rd nibble
and x5, x5, 0xf
ldrb w5, [x6, x5]
orr x8, x8, x5, lsl 16
lsr x5, x7, #48 // 4th nibble
and x5, x5, 0xf
ldrb w5, [x6, x5]
orr x8, x8, x5, lsl 24
lsr x5, x7, #44 // 5th nibble
and x5, x5, 0xf
ldrb w5, [x6, x5]
orr x8, x8, x5, lsl 32
lsr x5, x7, #40 // 6th nibble
and x5, x5, 0xf
ldrb w5, [x6, x5]
orr x8, x8, x5, lsl 40
lsr x5, x7, #36 // 7th nibble
and x5, x5, 0xf
ldrb w5, [x6, x5]
orr x8, x8, x5, lsl 48
lsr x5, x7, #32 // 8th nibble
and x5, x5, 0xf
ldrb w5, [x6, x5]
orr x8, x8, x5, lsl 56
// map 8 least-significant nibbles into x9
lsr x9, x7, #28 // 9th nibble
and x9, x9, 0xf
ldrb w9, [x6, x9]
lsr x5, x7, #24 // 10th nibble
and x5, x5, 0xf
ldrb w5, [x6, x5]
orr x9, x9, x5, lsl 8
lsr x5, x7, #20 // 11th nibble
and x5, x5, 0xf
ldrb w5, [x6, x5]
orr x9, x9, x5, lsl 16
lsr x5, x7, #16 // 12th nibble
and x5, x5, 0xf
ldrb w5, [x6, x5]
orr x9, x9, x5, lsl 24
lsr x5, x7, #12 // 13th nibble
and x5, x5, 0xf
ldrb w5, [x6, x5]
orr x9, x9, x5, lsl 32
lsr x5, x7, #8 // 14th nibble
and x5, x5, 0xf
ldrb w5, [x6, x5]
orr x9, x9, x5, lsl 40
lsr x5, x7, #4 // 15th nibble
and x5, x5, 0xf
ldrb w5, [x6, x5]
orr x9, x9, x5, lsl 48
and x5, x7, 0xf // 15th nibble
ldrb w5, [x6, x5]
orr x9, x9, x5, lsl 56
sub sp, sp, 16 // grow stack
stp x8, x9, [sp] // write hex string
mov x0, 1 // file descriptor 1 is standard output
mov x1, sp // data pointer
mov x2, #16 // data size, read-only
mov x16, #4 // write defined in <syscall.h>
svc #0x80 // invoke syscall
add sp, sp, #16 // free stack
ret
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment