Skip to content

Instantly share code, notes, and snippets.

@edubart
Last active January 26, 2021 15:21
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save edubart/565dec332f5ce4d59164c3236bcbb8dc to your computer and use it in GitHub Desktop.
Save edubart/565dec332f5ce4d59164c3236bcbb8dc to your computer and use it in GitHub Desktop.
rdtsc in luajit for x86_64
collectgarbage('stop') -- we don't want the GC to generate noise in the benchmarks
local function bench(f, n)
collectgarbage('collect')
n = n or 1000000
local rdtsc = require 'rdtsc'
local s = rdtsc()
for _=1,n do
f()
end
local e = rdtsc()
return tonumber(e - s) / n
end
local function cbench(name, ...)
-- warmup
for i=1,select('#', ...) do
local f = select(i, ...)
bench(f)
end
print('== '..name)
for i=1,select('#', ...) do
local f = select(i, ...)
print(string.format('f%d %.1f cycles', i, bench(f)))
end
end
do
-- version 1
local function f1()
os.time()
end
-- version 2
local os = require 'os'
local function f2()
os.time()
end
-- version 3
local time = require 'os'.time
local function f3()
time()
end
cbench('localize', f1, f2, f3)
end
if not jit then
return require 'sys'.rdtscp
end
local ffi = require'ffi'
local C = ffi.C
local function checkh(ptr) return assert(ptr ~= nil and ptr) end
local function checkz(ret) assert(ret == 0) end
local function checknz(ret) assert(ret ~= 0) end
local new, free, protect
--Using VirtualAlloc allows memory protection, but can only allocate memory in multiple-of-64K chunks.
local USE_VIRTUALALLOC = false
if ffi.os == 'Windows' then
if USE_VIRTUALALLOC then
ffi.cdef[[
void* VirtualAlloc(void* lpAddress, size_t dwSize, uint32_t flAllocationType, uint32_t flProtect);
int VirtualFree(void* lpAddress, size_t dwSize, uint32_t dwFreeType);
int VirtualProtect(void* lpAddress, size_t dwSize, uint32_t flNewProtect, uint32_t* lpflOldProtect);
]]
local PAGE_READWRITE = 0x04
local PAGE_EXECUTE_READ = 0x20
local MEM_COMMIT = 0x1000
local MEM_RESERVE = 0x2000
local MEM_RELEASE = 0x8000
function new(size)
return checkh(C.VirtualAlloc(nil, size, bit.bor(MEM_RESERVE, MEM_COMMIT), PAGE_READWRITE))
end
function protect(addr, size)
local oldprotect = ffi.new'uint32_t[1]' --because null not accepted
checknz(C.VirtualProtect(addr, size, PAGE_EXECUTE_READ, oldprotect))
end
function free(addr, size)
assert(size, 'size required') --on other platforms
checknz(C.VirtualFree(addr, 0, MEM_RELEASE))
end
else
local HEAP_NO_SERIALIZE = 0x00000001
local HEAP_ZERO_MEMORY = 0x00000008
local HEAP_CREATE_ENABLE_EXECUTE = 0x00040000
ffi.cdef[[
void* HeapCreate(uint32_t flOptions, size_t dwInitialSize, size_t dwMaximumSize);
void* HeapAlloc(void* hHeap, uint32_t dwFlags, size_t dwBytes);
int HeapFree(void* hHeap, uint32_t dwFlags, void* lpMem);
]]
local heap
function new(size)
heap = heap or checkh(C.HeapCreate(bit.bor(HEAP_NO_SERIALIZE, HEAP_CREATE_ENABLE_EXECUTE), 0, 0))
return checkh(C.HeapAlloc(heap, HEAP_ZERO_MEMORY, size))
end
function protect(addr, size) end
function free(addr, size)
assert(size, 'size required') --on other platforms
checknz(C.HeapFree(heap, HEAP_NO_SERIALIZE, addr))
end
end
elseif ffi.os == 'Linux' or ffi.os == 'BSD' or ffi.os == 'OSX' then
if ffi.os == 'OSX' then
ffi.cdef'typedef int64_t off_t;'
else
ffi.cdef'typedef long int off_t;'
end
ffi.cdef[[
void* mmap(void *addr, size_t length, int prot, int flags, int fd, off_t offset);
int munmap(void *addr, size_t length);
int mprotect(void *addr, size_t len, int prot);
]]
local PROT_READ = 1
local PROT_WRITE = 2
local PROT_EXEC = 4
local MAP_PRIVATE = 2
local MAP_ANON = ffi.os == 'Linux' and 0x20 or 0x1000
function new(size)
local ret = C.mmap(nil, size, bit.bor(PROT_READ, PROT_WRITE), bit.bor(MAP_PRIVATE, MAP_ANON), -1, 0)
if ffi.cast('intptr_t', ret) == ffi.cast('intptr_t', -1) then
error(string.format('mmap errno: %d', ffi.errno()))
end
return checkh(ret)
end
function protect(addr, size)
checkz(C.mprotect(addr, size, bit.bor(PROT_READ, PROT_EXEC)))
end
function free(addr, size)
checkz(C.munmap(addr, size))
end
end
local code = {0x0F, 0x01, 0xF9, 0x48, 0xC1, 0xE2, 0x20, 0x48, 0x09, 0xD0, 0xC3}
local addr = new(#code)
local buf = ffi.cast('uint8_t*',addr)
for i=1,#code do
buf[i-1] = code[i]
end
protect(addr, #code)
local rdtscp = ffi.cast("uint64_t __cdecl (*)()", addr)
return rdtscp
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment