Created
July 24, 2021 17:52
-
-
Save jin-x/88358e9bb1d58d01b7a318d0873208e3 to your computer and use it in GitHub Desktop.
Code Speed Measurement Tool
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
; Speed Test for Windows x64, v1.01 / fasm 1 | |
; (c) 2020 Jin X (jin_x@list.ru) | |
format PE64 Console 5.0 | |
include 'win64axp.inc' | |
define REQUIRE_ADMIN_RIGHTS 1 ; 1 - run with the highest (realtime) priority, 0 - run with just high priority | |
;-- CODE SECTION ------------------------------------------------------------------------------------------------------- | |
.code | |
entry: | |
frame | |
; -1 is handle of current process (GetCurrentProcess), -2 is handle of current thread (GetCurrentThread) | |
invoke SetProcessAffinityMask, -1, 1 ; to aviod CPU migration | |
invoke SetPriorityClass, -1, REALTIME_PRIORITY_CLASS ; available only when running with administrator rights | |
invoke SetThreadPriority, -2, THREAD_PRIORITY_TIME_CRITICAL | |
stdcall SpeedTestInit, -1 | |
stdcall SpeedTestMsg, test1, 'Testing 1...', <' %llu ticks%s',10>, ' (CPU migration is detected)' | |
stdcall SpeedTestMsg, test2, 'Testing 2...', <' %llu ticks%s',10>, ' (CPU migration is detected)' | |
stdcall SpeedTestMsg, test3, 'Testing 3...', <' %llu ticks%s',10>, ' (CPU migration is detected)' | |
stdcall SpeedTestMsg, test4, 'Testing 4...', <' %llu ticks%s',10>, ' (CPU migration is detected)' | |
invoke SetThreadPriority, -2, THREAD_PRIORITY_NORMAL | |
invoke SetPriorityClass, -1, NORMAL_PRIORITY_CLASS | |
cinvoke printf, 'Press a key to exit...' | |
cinvoke getch | |
invoke ExitProcess, 0 | |
endf | |
align 16 | |
test1: ; Procedure under test 1 | |
ret | |
align 16 | |
test2: ; Procedure under test 2 | |
xor eax,eax | |
cpuid | |
ret | |
align 16 | |
test3: ; Procedure under test 3 | |
mov ecx,65536 | |
@@: dec ecx | |
jnz @B | |
ret | |
align 16 | |
test4: ; Procedure under test 4 | |
mov ecx,65536 | |
loop $ | |
ret | |
;-- SPEED TEST PROCEDURES ---------------------------------------------------------------------------------------------- | |
SPEEDTEST_REPEATS = 4096 ; number of code execution repeats (must be power of two!!!) | |
SPEEDTEST_WARMUPS = 1 shl (bsr SPEEDTEST_REPEATS / 2) ; number of warming-up executions | |
assert SPEEDTEST_WARMUPS >= 0 & SPEEDTEST_REPEATS > 0 & bsf SPEEDTEST_REPEATS = bsr SPEEDTEST_REPEATS | |
; Initialize speed-test and show message if needed (via printf) | |
; Parameters: ecx = show message flags: bit 0 - when rdtscp is NOT supported, bit 1 - when invariant TSC is NOT supported, bit 2 - when everything's ok (ecx = -1 - all messages) | |
; Returns: rax = unsupported feature flags: bit 0 - rdtscp is NOT supported, bit 1 - invariant TSC is NOT supported (eax = 0 - both features are supported) | |
proc SpeedTestInit uses rbx, MsgFlags | |
frame | |
mov r8b,3 ; temp result | |
mov r9d,ecx ; show message flags | |
mov eax,0x80000000 | |
cpuid | |
mov r10d,eax ; max extended cpuid leaf level | |
; RDTSCP instruction support check | |
mov eax,0x80000001 | |
cmp r10d,eax | |
jb .no_inv ; both features are NOT supported | |
cpuid | |
bt edx,27 ; rdtscp support bit | |
jnc .no_rdtscp | |
and r8b,not 1 ; mark as supported | |
mov [SpeedTestGetTSC],SpeedTestRDTSCP ; use RDTSC instruction | |
.no_rdtscp: | |
; Invariant TSC support check | |
mov eax,0x80000007 | |
cmp r10d,eax | |
jb .no_inv | |
cpuid | |
bt edx,8 ; invariant TSC support bit | |
jnc .no_inv | |
and r8b,not 2 ; mark as supported | |
.no_inv: | |
mov bl,r8b | |
mov bh,bl ; save result mask | |
test bl,bl | |
setz cl | |
shl cl,2 | |
or bl,cl ; set bit 2 in r8d if both features are supported | |
and bl,r9b ; bit mask for messages | |
; Messages | |
test bl,1 | |
jz @F | |
cinvoke printf, <"Warning: RDTSCP instruction is not supported, RDTSC will be used instead (CPU migration can't be detected)!", 10> | |
@@: test bl,2 | |
jz @F | |
cinvoke printf, <"Warning: invariant TSC is not supported (results may be inaccurate)!", 10> | |
@@: test bl,4 | |
jz @F | |
cinvoke printf, <"Success: both RDTSCP instruction and invariant TSC are supported.", 10> | |
@@: | |
; Measure overhead | |
xor eax,eax | |
mov [SpeedTestOverhead],rax | |
stdcall SpeedTest, SpeedTestEmptyFunc ; overhead test | |
mov [SpeedTestOverhead],rax | |
movzx eax,bh ; results | |
ret | |
endf | |
endp ; SpeedTestInit | |
; Measure procedure speed and show message (via printf) | |
; Parameters: | |
; * rcx = procedure address | |
; * rdx = starting message address (0 - no message, -1 - 'Testing...' message); | |
; * r8 = result message address (0 - no message, -1 - just a number of ticks and new line), must contain '%llu' for result TSC count and then '%s' (optional) for CPU migration message (specified by r9); | |
; * r9 = CPU migration message address (optional, must be used only is r8 message contains '%s'). | |
; Returns: rax = TSC count (always positive value), zf = 1 if no CPU migration is occured | |
proc SpeedTestMsg ProcAddr, PreMsg, ResultMsg, MigMsg | |
SpeedTestMsg% = 0 ; turn off parameter count check | |
frame | |
mov [ProcAddr],rcx | |
mov [ResultMsg],r8 | |
mov [MigMsg],r9 | |
; Starting message | |
test rdx,rdx | |
jz .no_start | |
cmp rdx,-1 | |
jne @F | |
mov rdx,.testing_msg | |
@@: cinvoke printf, '%s', rdx | |
.no_start: | |
; Test speed | |
stdcall SpeedTest, [ProcAddr] | |
mov [ProcAddr],rax | |
setz byte [PreMsg] ; save zf | |
; Result message | |
mov r8,.no_message | |
jz @F ; jump if no CPU migration | |
mov r8,[MigMsg] | |
@@: mov rcx,[ResultMsg] | |
test rcx,rcx | |
jz .no_results | |
cmp rcx,-1 | |
jne @F | |
mov rcx,.just_ticks | |
@@: cinvoke printf, rcx, rax, r8 | |
.no_results: | |
; Return values | |
mov rax,[ProcAddr] | |
dec byte [PreMsg] ; restore zf | |
ret | |
endf | |
.testing_msg db 'Testing...',0 | |
.just_ticks db ' %llu',10 | |
.no_message db 0 | |
endp ; SpeedTestMsg | |
; Measure procedure speed | |
; Parameters: rcx = procedure address | |
; Returns: rax = TSC count (always positive value), zf = 1 if no CPU migration is occured | |
proc SpeedTest uses rbx rsi rdi r12 r13 r14 r15, ProcAddr | |
frame | |
mov r12,rcx | |
; Warming-up calls | |
if SPEEDTEST_WARMUPS > 0 | |
mov esi,SPEEDTEST_WARMUPS | |
@@: stdcall r12 | |
dec esi | |
jnz @B | |
end if | |
; Main tests | |
cld | |
mov rdi,SpeedTestResults | |
xor r15d,r15d | |
mov esi,SPEEDTEST_REPEATS | |
align 16 | |
@@:; invoke SwitchToThread ; try to update thread time slice | |
invoke SpeedTestGetTSC ; get ticks in rax, CPU id in ecx | |
mov r13,rax | |
mov r14d,ecx | |
stdcall r12 ; main call | |
invoke SpeedTestGetTSC ; get ticks in rax, CPU id in ecx | |
sub rax,r13 | |
sub rax,[SpeedTestOverhead] ; result TSC count | |
stosq ; store to SpeedTestResults | |
sub ecx,r14d ; detect CPU migration | |
or r15d,ecx ; migration flag for all tests | |
dec esi | |
jnz @B | |
if SPEEDTEST_REPEATS > 2 | |
; Sort results | |
mov rcx,SpeedTestResults | |
mov rdx,SPEEDTEST_REPEATS | |
stdcall InsertionSort64 | |
; Calculate average CPU ticks | |
mov ecx,SPEEDTEST_REPEATS/2 ; use only 50% of results from array middle (assuming that 25% at the start and end are errors) | |
else | |
mov ecx,SPEEDTEST_REPEATS | |
end if | |
xor eax,eax | |
xor edx,edx | |
@@: add rax,[SpeedTestResults+(SPEEDTEST_REPEATS/4)*8 + rdx*8] ; sum of all relevant results | |
inc edx | |
dec ecx | |
jnz @B | |
if SPEEDTEST_REPEATS > 2 | |
sar rax,bsr (SPEEDTEST_REPEATS/2) ; average value | |
else if SPEEDTEST_REPEATS = 2 | |
sar rax,bsr SPEEDTEST_REPEATS ; average value | |
test rax,rax | |
end if | |
cmovs eax,ecx ; zero result if negative | |
test r15d,r15d ; zf = 1 if no CPU migration is occured | |
ret | |
endf | |
endp ; SpeedTest | |
; Read TSC via RDTSC [for internal use] | |
; Returns: rax = current TSC counter value, ecx = 0 (processor id detection is not supported) | |
; Changes ebx !!! | |
if used SpeedTestRDTSC | |
SpeedTestRDTSC: | |
xor eax,eax ; cpuid execution time may vary depending on eax value | |
cpuid ; serialization | |
xor ecx,ecx ; processor id (not supported) | |
rdtsc | |
shl rdx,32 | |
or rax,rdx | |
mfence | |
ret | |
end if ; used SpeedTestRDTSC | |
; Read TSC via RDTSCP [for internal use] | |
; Returns: rax = current TSC counter value, ecx = processor id | |
if used SpeedTestRDTSCP | |
SpeedTestRDTSCP: | |
rdtscp | |
shl rdx,32 | |
or rax,rdx | |
mfence | |
SpeedTestEmptyFunc: | |
ret | |
end if ; used SpeedTestRDTSCP | |
if used InsertionSort64 | |
; Insertion sort of 64-bit elements | |
; Parameters: rcx = array address, rdx = number of elements | |
InsertionSort64: | |
mov r8d,1 ; start key_index | |
cmp rdx,r8 | |
jle .exit ; jump if number of element <= 1 | |
.loop1: | |
mov rax,[rcx+r8*8] ; key | |
mov r9,r8 ; el_index | |
.loop2: | |
mov r10,[rcx+(r9-1)*8] ; prev_el | |
cmp r10,rax ; prev_el <=> key ? | |
jng @F | |
mov [rcx+r9*8],r10 ; if (prev_el > key) el = prev_el | |
dec r9 ; --el_index | |
jnz .loop2 ; repeat if el_index > 0 | |
@@: | |
mov [rcx+r9*8],rax ; el = key | |
inc r8 ; ++key_index | |
cmp r8,rdx | |
jb .loop1 ; repeat if key_index < number of elements | |
.exit: | |
ret | |
end if ; used InsertionSort64 | |
;-- DATA SECTION ------------------------------------------------------------------------------------------------------- | |
.data | |
if used SpeedTestInit | |
align 16 | |
SpeedTestGetTSC dq SpeedTestRDTSC ; TSC read procedure | |
SpeedTestOverhead rq 1 ; TSC read overhead tick count | |
SpeedTestResults rq SPEEDTEST_REPEATS ; Temporary result array | |
end if ; used SpeedTestInit | |
;-- REQUIRE ADMIN RIGHTS ----------------------------------------------------------------------------------------------- | |
match =1, REQUIRE_ADMIN_RIGHTS | |
{ | |
section '.rsrc' data readable resource | |
directory RT_MANIFEST, manifest | |
resource manifest, 1, LANG_NEUTRAL, require_admin_rights | |
resdata require_admin_rights | |
db '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>' | |
db '<assembly xmlns="urn:schemas-microsoft-com:asm.v1" manifestVersion="1.0">' | |
db '<assemblyIdentity version="1.0.0.0" name="." type="win32"/>' | |
db '<trustInfo xmlns="urn:schemas-microsoft-com:asm.v2"><security><requestedPrivileges>' | |
db '<requestedExecutionLevel level="requireAdministrator" uiAccess="false"/>' | |
db '</requestedPrivileges></security></trustInfo></assembly>' | |
endres | |
} | |
;-- IMPORT SECTION ----------------------------------------------------------------------------------------------------- | |
section '.idata' import data readable | |
library kernel32, 'kernel32.dll',\ | |
msvcrt, 'msvcrt.dll' | |
import_kernel32 | |
all_api | |
import msvcrt,\ | |
printf, 'printf',\ | |
getch, '_getch' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment