Skip to content

Instantly share code, notes, and snippets.

Last active June 22, 2022 09:53
Show Gist options
  • Save niklasf/a33a7405f88fbc7a27f0bd6f33a73678 to your computer and use it in GitHub Desktop.
Save niklasf/a33a7405f88fbc7a27f0bd6f33a73678 to your computer and use it in GitHub Desktop.
Detect support for popcnt and pext instructions on amd64, x86-64, x86 and i686 CPUs
import ctypes
import contextlib
import os
import platform
def detect_cpu_capabilities():
# Detects support for popcnt and pext instructions
modern, bmi2 = False, False
with make_cpuid() as cpuid:
for eax in [0x0, 0x80000000]:
highest, _, _, _ = cpuid(eax)
for eax in range(eax, highest + 1):
a, b, c, d = cpuid(eax)
# popcnt
if eax == 1 and c & (1 << 23):
modern = True
# pext
if eax == 7 and b & (1 << 8):
bmi2 = True
except OSError:
return modern, bmi2
def make_cpuid():
# Loosely based on by Anders Høst, licensed MIT:
# Prepare system information
is_windows = == "nt"
is_64bit = ctypes.sizeof(ctypes.c_void_p) == 8
if platform.machine().lower() not in ["amd64", "x86_64", "x86", "i686"]:
raise OSError("Got no CPUID opcodes for %s" % platform.machine())
# Struct for return value
class CPUID_struct(ctypes.Structure):
_fields_ = [("eax", ctypes.c_uint32),
("ebx", ctypes.c_uint32),
("ecx", ctypes.c_uint32),
("edx", ctypes.c_uint32)]
# Select kernel32 or libc
if is_windows:
if is_64bit:
libc = ctypes.CDLL("kernel32.dll")
libc = ctypes.windll.kernel32
libc = ctypes.pythonapi
# Select opcodes
if is_64bit:
if is_windows:
# Windows x86_64
# Two first call registers : RCX, RDX
# Volatile registers : RAX, RCX, RDX, R8-11
opc = [
0x53, # push %rbx
0x48, 0x89, 0xd0, # mov %rdx,%rax
0x49, 0x89, 0xc8, # mov %rcx,%r8
0x31, 0xc9, # xor %ecx,%ecx
0x0f, 0xa2, # cpuid
0x41, 0x89, 0x00, # mov %eax,(%r8)
0x41, 0x89, 0x58, 0x04, # mov %ebx,0x4(%r8)
0x41, 0x89, 0x48, 0x08, # mov %ecx,0x8(%r8)
0x41, 0x89, 0x50, 0x0c, # mov %edx,0xc(%r8)
0x5b, # pop %rbx
0xc3 # retq
# Posix x86_64
# Two first call registers : RDI, RSI
# Volatile registers : RAX, RCX, RDX, RSI, RDI, R8-11
opc = [
0x53, # push %rbx
0x48, 0x89, 0xf0, # mov %rsi,%rax
0x31, 0xc9, # xor %ecx,%ecx
0x0f, 0xa2, # cpuid
0x89, 0x07, # mov %eax,(%rdi)
0x89, 0x5f, 0x04, # mov %ebx,0x4(%rdi)
0x89, 0x4f, 0x08, # mov %ecx,0x8(%rdi)
0x89, 0x57, 0x0c, # mov %edx,0xc(%rdi)
0x5b, # pop %rbx
0xc3 # retq
# CDECL 32 bit
# Two first call registers : Stack (%esp)
# Volatile registers : EAX, ECX, EDX
opc = [
0x53, # push %ebx
0x57, # push %edi
0x8b, 0x7c, 0x24, 0x0c, # mov 0xc(%esp),%edi
0x8b, 0x44, 0x24, 0x10, # mov 0x10(%esp),%eax
0x31, 0xc9, # xor %ecx,%ecx
0x0f, 0xa2, # cpuid
0x89, 0x07, # mov %eax,(%edi)
0x89, 0x5f, 0x04, # mov %ebx,0x4(%edi)
0x89, 0x4f, 0x08, # mov %ecx,0x8(%edi)
0x89, 0x57, 0x0c, # mov %edx,0xc(%edi)
0x5f, # pop %edi
0x5b, # pop %ebx
0xc3 # ret
code_size = len(opc)
code = (ctypes.c_ubyte * code_size)(*opc)
if is_windows:
# Allocate executable memory
addr = libc.VirtualAlloc(None, code_size, 0x1000, 0x40)
if not addr:
raise MemoryError("Could not VirtualAlloc RWX memory")
# Allocate memory
libc.valloc.restype = ctypes.c_void_p
libc.valloc.argtypes = [ctypes.c_size_t]
addr = libc.valloc(code_size)
if not addr:
raise MemoryError("Could not valloc memory")
# Make executable
libc.mprotect.restype = ctypes.c_int
libc.mprotect.argtypes = [ctypes.c_void_p, ctypes.c_size_t, ctypes.c_int]
if 0 != libc.mprotect(addr, code_size, 1 | 2 | 4):
raise OSError("Failed to set RWX using mprotect")
# Copy code to allocated executable memory. No need to flush instruction
# cache for CPUID.
ctypes.memmove(addr, code, code_size)
# Create and yield callable
result = CPUID_struct()
func_type = ctypes.CFUNCTYPE(None, ctypes.POINTER(CPUID_struct), ctypes.c_uint32)
func_ptr = func_type(addr)
def cpuid(eax):
func_ptr(result, eax)
return result.eax, result.ebx, result.ecx, result.edx
yield cpuid
# Free
if is_windows:
libc.VirtualFree(addr, 0, 0x8000)
else: = None = [ctypes.c_void_p]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment