Skip to content

Instantly share code, notes, and snippets.

@dsh0005
Last active August 26, 2021 16:41
Show Gist options
  • Save dsh0005/858c1741b60421ce1299bdad81f7b1f2 to your computer and use it in GitHub Desktop.
Save dsh0005/858c1741b60421ce1299bdad81f7b1f2 to your computer and use it in GitHub Desktop.
CMOV cache side channel mitigation test
#include <stddef.h>
#include <stdint.h>
#include <array>
#ifdef __clang__
#define decAndCMOVmem(ctr, dst, src) \
__asm__( \
"dec\t%[count]\n\t" \
"cmovz\t{%[from], %[to]|%[to], %[from]}" \
: [to] "+r" (dst), [count] "+r" (ctr)\
: [from] "m" (src)\
: "cc")
#define decAndCMOVreg(ctr, dst, src) \
__asm__( \
"dec\t%[count]\n\t" \
"cmovz\t{%[from], %[to]|%[to], %[from]}" \
: [to] "+r" (dst), [count] "+r" (ctr)\
: [from] "r" (src)\
: "cc")
#define cmpAndCMOVmem(ctr, idx, dst, src) \
__asm__( \
"cmpq\t{%[count], %[index]|%[index], %[count]}\n\t" \
"cmovz\t{%[from], %[to]|%[to], %[from]}" \
: [to] "+r" (dst)\
: [from] "m" (src), [count] "r" (ctr), [index] "r" (idx)\
: "cc")
#define cmpAndCMOVreg(ctr, idx, dst, src) \
__asm__( \
"cmpq\t{%[count], %[index]|%[index], %[count]}\n\t" \
"cmovzl\t{%[from], %[to]|%[to], %[from]}" \
: [to] "+r" (dst)\
: [from] "r" (src), [count] "r" (ctr), [index] "r" (idx)\
: "cc")
#else
#define decAndCMOV(ctr, dst, src) \
__asm__( \
"dec\t%[count]\n\t" \
"cmovz\t{%[from], %[to]|%[to], %[from]}" \
: [to] "+r" (dst), [count] "+rm" (ctr)\
: [from] "rm" (src)\
: "cc")
#define decAndCMOVmem decAndCMOV
#define decAndCMOVreg decAndCMOV
#define cmpAndCMOV(ctr, idx, dst, src) \
__asm__( \
"cmp\t{%[count], %[index]|%[index], %[count]}\n\t" \
"cmovz\t{%[from], %[to]|%[to], %[from]}" \
: [to] "+r,r" (dst)\
: [from] "rm,rm" (src), [count] "rm,r" (ctr), [index] "r,rm" (idx)\
: "cc")
#define cmpAndCMOVmem cmpAndCMOV
#define cmpAndCMOVreg cmpAndCMOV
#endif
uint8_t sbox[256];
uint8_t f (size_t i)
{
return sbox[i];
}
template<typename T, size_t arr_len, T arr[arr_len], size_t cache_sz = 64, size_t elems = cache_sz/sizeof(T), size_t banks = arr_len/elems>
T noside_cond(size_t i){
static_assert(elems == cache_sz/sizeof(T));
static_assert(elems*banks == arr_len);
__asm__ __volatile__("# LLVM-MCA-BEGIN noside_cond\n\t");
int rest[banks];
int val;
const size_t mod = i % elems;
const size_t bank = i / elems;
for(size_t load_bank = 0; load_bank < banks; load_bank++){
rest[load_bank] = arr[load_bank*elems + mod];
}
for(size_t ccmov_bank = 0; ccmov_bank < banks; ccmov_bank++){
cmpAndCMOVreg(ccmov_bank, bank, val, rest[ccmov_bank]);
}
val = static_cast<T>(val);
__asm__ __volatile__("# LLVM-MCA-END noside_cond\n\t");
return static_cast<T>(val);
}
template uint8_t noside_cond<uint8_t, sizeof(sbox), sbox, 64>(size_t i);
template<typename T, size_t arr_len, T arr[arr_len], size_t cache_sz = 64, size_t elems = cache_sz/sizeof(T), size_t banks = arr_len/elems>
T noside_asm_dec_mem (size_t i)
{
static_assert(elems == cache_sz/sizeof(T));
static_assert(elems*banks == arr_len);
__asm__ __volatile__("# LLVM-MCA-BEGIN noside_asm_dec_mem\n\t");
//int rest[banks-1];
int val;
const size_t mod = i % elems;
size_t bank = i / elems;
val = arr[0*elems + mod];
for(size_t load_bank = 1; load_bank < banks; load_bank++){
//rest[load_bank-1] = arr[load_bank*elems + mod];
}
for(size_t ccmov_bank = 1; ccmov_bank < banks; ccmov_bank++){
decAndCMOVmem(bank, val, arr[ccmov_bank*elems + mod]);
}
__asm__ __volatile__("# LLVM-MCA-END noside_asm_dec_mem\n\t");
return val;
}
template uint8_t noside_asm_dec_mem<uint8_t, sizeof(sbox), sbox, 64>(size_t i);
#include <stddef.h>
#include <stdint.h>
#ifdef __clang__
#define decAndCMOVmem(ctr, dst, src) \
__asm__( \
"dec\t%[count]\n\t" \
"cmovz\t{%[from], %[to]|%[to], %[from]}" \
: [to] "+r" (dst), [count] "+r" (ctr)\
: [from] "m" (src)\
: "cc")
#define decAndCMOVreg(ctr, dst, src) \
__asm__( \
"dec\t%[count]\n\t" \
"cmovz\t{%[from], %[to]|%[to], %[from]}" \
: [to] "+r" (dst), [count] "+r" (ctr)\
: [from] "r" (src)\
: "cc")
#else
#define decAndCMOV(ctr, dst, src) \
__asm__( \
"dec\t%[count]\n\t" \
"cmovz\t{%[from], %[to]|%[to], %[from]}" \
: [to] "+r" (dst), [count] "+rm" (ctr)\
: [from] "rm" (src)\
: "cc")
#define decAndCMOVmem decAndCMOV
#define decAndCMOVreg decAndCMOV
#endif
uint8_t array[64*4];
uint16_t arr2[64*4];
uint8_t f (size_t i)
{
return array[i];
}
uint8_t noside_asm (size_t i)
{
__asm__ __volatile__("# LLVM-MCA-BEGIN noside_asm\n\t");
int j, k, l;
int val;
const size_t mod = i % 64;
const size_t bank = i / 64;
val = array[0*64 + mod];
j = array[1*64 + mod];
k = array[2*64 + mod];
l = array[3*64 + mod];
__asm__(
"cmp{q|}\t{%[one], %[bank]|%[bank], %[one]}\n\t"
"cmove{l|}\t{%[j], %[val]|%[val], %[j]}\n\t"
"cmp{q|}\t{%[two], %[bank]|%[bank], %[two]}\n\t"
"cmove{l|}\t{%[k], %[val]|%[val], %[k]}\n\t"
"cmp{q|}\t{%[three], %[bank]|%[bank], %[three]}\n\t"
"cmove{l|}\t{%[l], %[val]|%[val], %[l]}\n\t"
: [val] "+&a" (val)
: [j] "r" (j), [k] "r" (k), [l] "r" (l), [bank] "r" (bank),
[one] "n" (1), [two] "n" (2), [three] "n" (3)
: "cc"
); // 49 bytes
__asm__ __volatile__("# LLVM-MCA-END noside_asm\n\t");
return val;
}
uint8_t noside_mem_asm (size_t i)
{
__asm__ __volatile__("# LLVM-MCA-BEGIN noside_mem_asm\n\t");
int j, k, l;
int val;
const size_t mod = i % 64;
const size_t bank = i / 64;
val = array[0*64 + mod];
j = array[1*64 + mod];
k = array[2*64 + mod];
l = array[3*64 + mod];
__asm__(
"cmp{q|}\t{%[one], %[bank]|%[bank], %[one]}\n\t"
"cmove{l|}\t{%[j], %[val]|%[val], %[j]}\n\t"
"cmp{q|}\t{%[two], %[bank]|%[bank], %[two]}\n\t"
"cmove{l|}\t{%[k], %[val]|%[val], %[k]}\n\t"
"cmp{q|}\t{%[three], %[bank]|%[bank], %[three]}\n\t"
"cmove{l|}\t{%[l], %[val]|%[val], %[l]}\n\t"
: [val] "+&a,a" (val)
: [j] "rm,rm" (j), [k] "rm,rm" (k), [l] "rm,rm" (l), [bank] "m,r" (bank),
[one] "n,n" (1), [two] "n,n" (2), [three] "n,n" (3)
: "cc"
); // 43 bytes
__asm__ __volatile__("# LLVM-MCA-END noside_mem_asm\n\t");
return val;
}
uint8_t noside_asm_dec (size_t i)
{
__asm__ __volatile__("# LLVM-MCA-BEGIN noside_asm_dec\n\t");
int j, k, l;
int val;
const size_t mod = i % 64;
int bank = i / 64;
val = array[0*64 + mod];
j = array[1*64 + mod];
k = array[2*64 + mod];
l = array[3*64 + mod];
__asm__(
"dec{l|}\t%[bank]\n\t"
"cmovz{l|}\t{%[j], %[val]|%[val], %[j]}\n\t"
"dec{l|}\t%[bank]\n\t"
"cmovz{l|}\t{%[k], %[val]|%[val], %[k]}\n\t"
"dec{l|}\t%[bank]\n\t"
"cmovz{l|}\t{%[l], %[val]|%[val], %[l]}\n\t"
: [val] "+&a" (val), [bank] "+&r" (bank)
: [j] "r" (j), [k] "r" (k), [l] "r" (l)
: "cc"
); // 46 bytes
__asm__ __volatile__("# LLVM-MCA-END noside_asm_dec\n\t");
return val;
}
uint8_t noside_asm_dec_split (size_t i)
{
__asm__ __volatile__("# LLVM-MCA-BEGIN noside_asm_dec_split\n\t");
int j, k, l;
int val;
const size_t mod = i % 64;
int bank = i / 64;
val = array[0*64 + mod];
j = array[1*64 + mod];
k = array[2*64 + mod];
l = array[3*64 + mod];
decAndCMOVreg(bank, val, j);
decAndCMOVreg(bank, val, k);
decAndCMOVreg(bank, val, l);
__asm__ __volatile__("# LLVM-MCA-END noside_asm_dec_split\n\t");
return val;
}
uint8_t noside_asm_dec_mem (size_t i)
{
__asm__ __volatile__("# LLVM-MCA-BEGIN noside_asm_dec_mem\n\t");
uint16_t val;
const size_t mod = i % 32;
int bank = i / 32;
val = arr2[0*32 + mod];
__asm__(
"dec{l|}\t%[bank]\n\t"
"cmovz{w|}\t{64(%[arr2],%[mod],2), %[val]|%[val], [%[arr2] + %[mod]*2 + 32]}\n\t"
"dec{l|}\t%[bank]\n\t"
"cmovz{w|}\t{128(%[arr2],%[mod],2), %[val]|%[val], [%[arr2] + %[mod]*2 + 64]}\n\t"
"dec{l|}\t%[bank]\n\t"
"cmovz{w|}\t{192(%[arr2],%[mod],2), %[val]|%[val], [%[arr2] + %[mod]*2 + 96]}\n\t"
"dec{l|}\t%[bank]\n\t"
"cmovz{w|}\t{256(%[arr2],%[mod],2), %[val]|%[val], [%[arr2] + %[mod]*2 + 128]}\n\t"
"dec{l|}\t%[bank]\n\t"
"cmovz{w|}\t{320(%[arr2],%[mod],2), %[val]|%[val], [%[arr2] + %[mod]*2 + 160]}\n\t"
"dec{l|}\t%[bank]\n\t"
"cmovz{w|}\t{384(%[arr2],%[mod],2), %[val]|%[val], [%[arr2] + %[mod]*2 + 192]}\n\t"
"dec{l|}\t%[bank]\n\t"
"cmovz{w|}\t{448(%[arr2],%[mod],2), %[val]|%[val], [%[arr2] + %[mod]*2 + 224]}\n\t"
: [val] "+&a" (val), [bank] "+&r" (bank)
: [arr2] "r" (arr2), [mod] "r" (mod)
: "cc"
); // ? bytes
__asm__ __volatile__("# LLVM-MCA-END noside_asm_dec_mem\n\t");
return val;
}
uint8_t noside_asm_dec_mem_split (size_t i)
{
__asm__ __volatile__("# LLVM-MCA-BEGIN noside_asm_dec_mem_split\n\t");
uint16_t val;
const size_t mod = i % 32;
int bank = i / 32;
val = arr2[0*32 + mod];
for(size_t j = 1; j < 256/32; j++){
decAndCMOVmem(bank, val, arr2[j*32 + mod]);
}
__asm__ __volatile__("# LLVM-MCA-END noside_asm_dec_mem_split\n\t");
return val;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment