Skip to content

Instantly share code, notes, and snippets.

@TrungNguyen1909
Last active January 10, 2024 08:46
Show Gist options
  • Save TrungNguyen1909/5b323edda9a21550a1621af506e8ce5f to your computer and use it in GitHub Desktop.
Save TrungNguyen1909/5b323edda9a21550a1621af506e8ce5f to your computer and use it in GitHub Desktop.
Apple H10 Mul53 extension
#if 0
Apple A11 (H10) introduces 2 propietary instructions called mul53lo.2d and mul53hi.2d. All of which belongs to Mul53 extensions.
Defintions:
- mul53lo.2d Vd, Vm: Multiplies 2 53-bit doublewords in the Vn vector with 2 53-bit doublewords in Vm vector and store 53 lowest bits in the Vn vector.
- mul53hi.2d Vd, Vm: Multiplies 2 53-bit doublewords in the Vn vector with 2 53-bit doublewords in Vm vector and store the result shifted 53 bits in the Vn vector.
Encodings:
- mul53lo.2d Vd, Vm: 0x00200000 | (m << 5) | (d << 0)
- mul53hi.2d Vd, Vm: 0x00200400 | (m << 5) | (d << 0)
#endif
#include <stdio.h>
#include <stdint.h>
void write_v0(uint64_t lo, uint64_t hi) {
__asm__ __volatile__("mov v0.D[0], %0\n" :: "r"(lo) :);
__asm__ __volatile__("mov v0.D[1], %0\n" :: "r"(hi) :);
}
void read_v0(uint64_t *lo, uint64_t *hi) {
__asm__ __volatile__("mov %0, v0.D[0]\n" : "=r"(*lo) ::"memory");
__asm__ __volatile__("mov %0, v0.D[1]\n" : "=r"(*hi) ::"memory");
}
void write_v1(uint64_t lo, uint64_t hi) {
__asm__ __volatile__("mov v1.D[0], %0\n" :: "r"(lo) :);
__asm__ __volatile__("mov v1.D[1], %0\n" :: "r"(hi) :);
}
void read_v1(uint64_t *lo, uint64_t *hi) {
__asm__ __volatile__("mov %0, v1.D[0]\n" : "=r"(*lo) ::"memory");
__asm__ __volatile__("mov %0, v1.D[1]\n" : "=r"(*hi) ::"memory");
}
int main(int argc, char *argv[]) {
uint64_t a_lo, a_hi;
uint64_t b_lo, b_hi;
a_lo = 0x06050403020100;
a_hi = 0x2020202020202;
b_lo = 0x2020202020202;
b_hi = 1;
write_v0(a_lo, a_hi);
write_v1(b_lo, b_hi);
uint64_t lo0, hi0;
uint64_t lo1_hi, hi1_hi;
uint64_t lo1, hi1;
uint64_t lo0_before, hi0_before;
uint64_t lo1_before, hi1_before;
read_v0(&lo0_before, &hi0_before);
read_v1(&lo1_before, &hi1_before);
__asm__ __volatile__ (".long 0x00200001\n" :::"v0", "v1"); //mul53lo.2d v1, v0
read_v0(&lo0, &hi0);
read_v1(&lo1, &hi1);
write_v0(a_lo, a_hi);
write_v1(b_lo, b_hi);
__asm__ __volatile__ (".long 0x00200401\n" :::"v0", "v1"); //mul53hi.2d v1, v0
read_v1(&lo1_hi, &hi1_hi);
printf("v0: 0x%llx 0x%llx\n", lo0_before, hi0_before); //0x6050403020100 0x2020202020202
printf("v1: 0x%llx 0x%llx\n", lo1_before, hi1_before); //0x2020202020202 0x1
printf("v0 after: 0x%llx 0x%llx\n", lo0, hi0); //0x6050403020100 0x2020202020202
printf("v1 after: 0x%llx 0x%llx\n", lo1, hi1); //0xa1e140c060200 0x2020202020202
printf("v1 after hi: 0x%llx 0x%llx\n", lo1_hi, hi1_hi);//0x60b0f1214151 0x0
#if 0
>>> hex(0x06050403020100*0x02020202020202)
'0xc161e24282a2a1e140c060200
>>> hex(0xa1e140c060200|(0x60b0f1214151<<53))
'0xc161e24282a2a1e140c060200'
#endif
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment