Skip to content

Instantly share code, notes, and snippets.

@gamozolabs
Created April 19, 2019 21:56
Show Gist options
  • Save gamozolabs/7e6fa7ca00256b2bb826af1a8bea8572 to your computer and use it in GitHub Desktop.
Save gamozolabs/7e6fa7ca00256b2bb826af1a8bea8572 to your computer and use it in GitHub Desktop.
Compare coverage implementation in the old Vectorized Emulator
/// Generate a hash using the values in Zmm0 and return Zmm0
///
/// Clobbers Zmm1!!!
fn hash_zmm0(outasm: &mut falkasm::AsmStream, conststore: &mut ConstStore)
{
let kmask = Operand::KmaskRegister(KmaskType::Merge(KmaskReg::K1));
let thirt = Membc(Some(R10), None, conststore.add_const(13)
.unwind("Out of room for constant storage") as i64 * 4);
let sevent = Membc(Some(R10), None, conststore.add_const(17)
.unwind("Out of room for constant storage") as i64 * 4);
let five = Membc(Some(R10), None, conststore.add_const(15)
.unwind("Out of room for constant storage") as i64 * 4);
let rval = Membc(Some(R10), None, conststore.add_const(0xd21e9c0c)
.unwind("Out of room for constant storage") as i64 * 4);
/* Xor in a random value to start the hash */
outasm.vpxord(&[Vreg(Zmm0), kmask, Vreg(Zmm0), rval]);
for _ in 0..4 {
/* zmm0 ^= zmm0 << 13 */
outasm.vpsllvd(&[Vreg(Zmm1), kmask, Vreg(Zmm0), thirt]);
outasm.vpxord(&[Vreg(Zmm0), kmask, Vreg(Zmm0), Vreg(Zmm1)]);
/* zmm0 ^= zmm0 >> 17 */
outasm.vpsrlvd(&[Vreg(Zmm1), kmask, Vreg(Zmm0), sevent]);
outasm.vpxord(&[Vreg(Zmm0), kmask, Vreg(Zmm0), Vreg(Zmm1)]);
/* zmm0 ^= zmm0 << 15 */
outasm.vpsllvd(&[Vreg(Zmm1), kmask, Vreg(Zmm0), five]);
outasm.vpxord(&[Vreg(Zmm0), kmask, Vreg(Zmm0), Vreg(Zmm1)]);
}
}
fn hash_coverage_zmm0(outasm: &mut falkasm::AsmStream)
{
let kmask = Operand::KmaskRegister(KmaskType::Merge(KmaskReg::K1));
/* Store the hashes to the stack */
outasm.vmovdqa32(&[Mem(Some(Rsp), None, 0x10), kmask, Vreg(Zmm0)]);
/* Get the register mask */
/* kmovw eax, k1 */
outasm.raw_bytes(b"\xc5\xf8\x93\xc1");
/* Dword counter */
outasm.xor(&[Reg(Rcx), Reg(Rcx)]);
outasm.label("daloop");
/* Shift the kmask by 1. If the carry is not set then this component
* is disabled and we should skip it.
*/
outasm.shr(&[Reg(Rax), Imm(1)]);
outasm.jnc(&[BranchShort("next_iter")]);
/* Get the hash into edx */
outasm.mov(&[Reg(Rdx), Mem(Some(Rsp), Some((Rcx, 4)), 0x10)]);
/* Get the pointer to the hash coverage table */
outasm.set_mode(AsmMode::Bits64);
outasm.mov(&[Reg(Rbx), Mem(Some(Rsp), None, 64*4 + 8 + 0x10)]);
outasm.set_mode(AsmMode::Bits32);
/* Mask off the 'hash' we created for insertion into the
* table. Currently we allocate 16 MiB for the table,
* this translates to 128 Mbits which we can insert
* into the table
*/
outasm.and(&[Reg(Rdx), Imm(0x7FFFFFF)]);
/* Insert the hash into the table, this table is shared
* and this operation is not atomic so it's possible
* there are races. But this is fine, it just means
* we might double report things.
*/
outasm.bts(&[Mem(Some(Rbx), None, 0), Reg(Rdx)]);
/* If the entry was already in the table we skip reporting
* up new coverage information
*/
outasm.jc(&[BranchShort("already_in_table")]);
/* Now we want to set a bit reporting that this vector
* component hit new coverage
*/
outasm.bts(&[Mem(Some(Rsp), None, 64 * 4 + 0x10), Reg(Rcx)]);
outasm.label("already_in_table");
/* If the result of the shift was zero, we're all done */
outasm.label("next_iter");
outasm.test(&[Reg(Rax), Reg(Rax)]);
outasm.jz(&[BranchShort("done")]);
/* Advance destination pointer by 1 and loop again */
outasm.add(&[Reg(Rcx), Imm(1)]);
outasm.jmp(&[BranchShort("daloop")]);
outasm.label("done");
}
{
// Compare coverage
let mut outasm = falkasm::AsmStream::new(AsmMode::Bits32);
outasm.set_vecwidth(falkasm::VecWidth::Width512);
let path_mask = Membc(Some(R10), None, conststore.add_const(LIGHT_PATH_MASK)
.unwind("Out of room for constant storage") as i64 * 4);
let one = Membc(Some(R10), None, conststore.add_const(1)
.unwind("Out of room for constant storage") as i64 * 4);
let mask_000000ff = Membc(Some(R10), None, conststore.add_const(0xff)
.unwind("Out of room for constant storage") as i64 * 4);
let mask_0000ff00 = Membc(Some(R10), None, conststore.add_const(0xff00)
.unwind("Out of room for constant storage") as i64 * 4);
let mask_00ff0000 = Membc(Some(R10), None, conststore.add_const(0xff0000)
.unwind("Out of room for constant storage") as i64 * 4);
let mask_ff000000 = Membc(Some(R10), None, conststore.add_const(0xff000000)
.unwind("Out of room for constant storage") as i64 * 4);
outasm.vpxord(&[Vreg(Zmm2), kmask, Vreg(Zmm2), Vreg(Zmm2)]);
outasm.vpandd(&[Vreg(Zmm0), kmask, Vreg(Zmm5), mask_000000ff]);
outasm.vpandd(&[Vreg(Zmm1), kmask, Vreg(Zmm6), mask_000000ff]);
outasm.vpcmpeqd(&[tmpkmask, kmask, Vreg(Zmm0), Vreg(Zmm1)]);
outasm.vpaddd(&[Vreg(Zmm2), tmpkmask, Vreg(Zmm2), one]);
outasm.vpandd(&[Vreg(Zmm0), kmask, Vreg(Zmm5), mask_0000ff00]);
outasm.vpandd(&[Vreg(Zmm1), kmask, Vreg(Zmm6), mask_0000ff00]);
outasm.vpcmpeqd(&[tmpkmask, kmask, Vreg(Zmm0), Vreg(Zmm1)]);
outasm.vpaddd(&[Vreg(Zmm2), tmpkmask, Vreg(Zmm2), one]);
outasm.vpandd(&[Vreg(Zmm0), kmask, Vreg(Zmm5), mask_00ff0000]);
outasm.vpandd(&[Vreg(Zmm1), kmask, Vreg(Zmm6), mask_00ff0000]);
outasm.vpcmpeqd(&[tmpkmask, kmask, Vreg(Zmm0), Vreg(Zmm1)]);
outasm.vpaddd(&[Vreg(Zmm2), tmpkmask, Vreg(Zmm2), one]);
outasm.vpandd(&[Vreg(Zmm0), kmask, Vreg(Zmm5), mask_ff000000]);
outasm.vpandd(&[Vreg(Zmm1), kmask, Vreg(Zmm6), mask_ff000000]);
outasm.vpcmpeqd(&[tmpkmask, kmask, Vreg(Zmm0), Vreg(Zmm1)]);
outasm.vpaddd(&[Vreg(Zmm2), tmpkmask, Vreg(Zmm2), one]);
/* Determine if all values are the same */
outasm.vpbroadcastd(&[Vreg(Zmm0), kmask, Vreg(Zmm2)]);
outasm.vpcmpeqd(&[
tmpkmask, kmask, Vreg(Zmm0), Vreg(Zmm2)]);
/* kxorw k2, k2, k1 */
outasm.raw_bytes(b"\xc5\xec\x47\xd1");
/* kortestw k2, k2 */
outasm.raw_bytes(b"\xc5\xf8\x98\xd2");
outasm.jz(&[BranchNear("regs_match")]);
/* Convert the matching byte count into a hash */
outasm.vmovdqa32(&[Vreg(Zmm0), kmask, Vreg(Zmm2)]);
hash_zmm0(&mut outasm, conststore);
/* Xor in Eip to the hash */
outasm.vpxord(&[Vreg(Zmm0), kmask, Vreg(Zmm0),
Membc(Some(R9), None, 8 * 64)]);
/* Rehash zmm0 to make sure Eip is shuffled in well */
hash_zmm0(&mut outasm, conststore);
if COMPUTE_PATH_HASH {
/* Mask the path hash to limit the number of unique paths */
outasm.vpandd(&[Vreg(Zmm1), kmask, Vreg(Zmm30), path_mask]);
/* Xor in the path hash */
outasm.vpxord(&[Vreg(Zmm0), kmask, Vreg(Zmm0), Vreg(Zmm1)]);
}
/* Perform hash coverage on the value in Zmm0 */
hash_coverage_zmm0(&mut outasm);
outasm.label("regs_match");
outasm.ret(&[]);
jitcache.insert_callable_cache(
(MemOpSize::Bits32, Some(Zmm31), Zmm31, Some(Zmm31)), outasm);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment