NicolasT/cpuid.py

## cpuid.py
# Some code to retrieve CPUID information in pure Python
#
# Copyright (C) 2009 Nicolas Trangez  <eikke eikke com>
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation, version 2.1
# of the License.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
# MA  02110-1301  USA

import os
import ctypes
import itertools

# Current code only runs in 32bit executables
assert ctypes.sizeof(ctypes.c_void_p) == 4, 'Only 32bit supported'

__author__ = 'Nicolas Trangez'
__version__ = 0, 0, 1

# TODO
# ====
# - Add function to check whether CPUID is supported on the system
# - x86_64 support?
# - Fix opcode constant naming, it's a mess

NULL = 0x0

# Retrieve handlers to some C functions
# These lookups fail if the functions are not found, so it's some sanity
# checking at import time as well
def _check_not_null(name, result, func, arguments):
    if result == NULL:
        raise RuntimeError('Call to %s returned NULL' % name)

    return result

def _check_zero(name, result, func, arguments):
    if result != 0:
        raise RuntimeError('Call to %s returned %d' % (name, result))

    return result

py_valloc = ctypes.pythonapi.valloc
py_valloc.restype = ctypes.c_void_p
py_valloc.argtypes = (ctypes.c_size_t, )
py_valloc.errcheck = lambda *a: _check_not_null('valloc', *a)

py_free = ctypes.pythonapi.free
py_free.restype = None
py_free.argtypes = (ctypes.c_void_p, )

py_memmove = ctypes.pythonapi.memmove
py_memmove.restype = ctypes.c_void_p
py_memmove.argtypes = (ctypes.c_void_p, ctypes.c_void_p, ctypes.c_size_t, )
py_memmove.errcheck = lambda *a: _check_not_null('memmove', *a)

py_mprotect = ctypes.pythonapi.mprotect
py_mprotect.restype = ctypes.c_int
py_mprotect.argtypes = (ctypes.c_void_p, ctypes.c_size_t, ctypes.c_int)
py_mprotect.errcheck = lambda *a: _check_zero('mprotect', *a)

# Some definitions we need for mprotect(). Let's hope they are the same on all
# target platforms
PROT_NONE, PROT_READ, PROT_WRITE, PROT_EXEC = 0, 1, 2, 4


class X86Function(object):
    '''A function created from raw X86 CPU opcodes'''
    __slots__ = '_page', '_fun',

    def __init__(self, opcodes, prototype):
        '''Initialize the function

        :param opcodes: instruction stream to execute
        :type opcodes: iterable<number>
        :param prototype: function prototype as passed to ctypes.CFUNCTYPE
        :type prototype: iterable
        '''
        # Basic input validation
        assert all(0 <= i < 0xff for i in opcodes)

        # Reference to the page(s) of memory our opcodes are copied to
        # We don't initialize with None but with NULL so the check later on can
        # be C-style (on NULL)
        self._page = NULL
        # Reference to the ctypes function we create
        self._fun = None

        # Create a real instruction stream as Python string
        opcode_string = ''.join(map(chr, opcodes))
        # Turn into a C-string
        opcode_bytes = ctypes.create_string_buffer(opcode_string)
        # Calculate instruction stream length
        opcode_size = len(opcode_bytes) # This includes ending \0,
                                        # we might want to drop it

        # We need to allocate a multiple of the pagesize, otherwist 64bit
        # systems seem to be rather unhappy: calls to mprotect() apply to
        # whole pages. If we don't allocate a multiple of pagesize, then call
        # mprotect, we might protect pages which might later be allocated by
        # the memory allocator (since they're free), but accessing them will
        # fail because of the protection. This code makes sure we allocate a
        # correct number of bytes to allocate all pages we need.
        # Whilst on 32bit systems the old approach didn't fail, on 64bit
        # systems segmentation faults occur at every run.
        pagesize = os.sysconf('SC_PAGESIZE')
        pages, extra = divmod(opcode_size, pagesize)
        pages = pages if extra == 0 else pages + 1
        allocation_size = pages * pagesize

        # Little sanity check
        assert allocation_size >= opcode_size

        # Allocate memory to keep our instruction stream
        # Note we can't use malloc (well, we could, but then we need to perform
        # some tricks later on) because on some systems, functions need to be
        # page-alligned. valloc provides page-alligned memory for free.
        page = py_valloc(allocation_size)

        # Move instruction stream to the newly allocated space
        v = py_memmove(page, opcode_bytes, opcode_size)
        # Writing done, set only READ and EXEC permissions on the page(s)
        py_mprotect(page, allocation_size, PROT_READ | PROT_EXEC)

        # Keep a reference to the allocated memory so we can free() in our
        # destructor
        self._page = page
        # Create a function trampoline to the page we just created, using the
        # given function prototype as prototype
        self._fun = ctypes.cast(page, ctypes.CFUNCTYPE(*prototype))

    def __del__(self):
        # If we got a reference to a page, try to free it nicely
        if self._page != NULL and py_free:
            py_free(self._page)
            self._page = NULL

    def __call__(self, *args):
        '''Call the assembled function'''
        # This is a simple trampoline
        return self._fun(*args)


# Some X86 CPU opcodes we need
# Note these should be iterables, all of them. That's how our 'assembler'
# expects them.
PUSHL_EBP = 0x55,
MOVL_ESP_EBP = 0x89, 0xe5,
SUBL_NN_ESP = 0x83, 0xec,
MOVL_NN_EBP_EDX = 0x8b, 0x55,
MOVL_ESI_NN_EBP = 0x89, 0x75,
MOVL_NN_EBP_EAX = 0x8b, 0x45,
MOVL_EDI_NN_EBP = 0x89, 0x7d,
MOVL_NN_NN_NN_NN_NN_EDX = 0xc7, 0x42,
MOVL_NN_NN_NN_NN_EDX = 0xc7, 0x02,
PUSHL_EBX = 0x53,
CPUID = 0x0f, 0xa2,
MOVL_EBX_ESI = 0x89, 0xde,
POPL_EBX = 0x5b,
MOVL_EDX_EDI = 0x89, 0xd7,
MOVL_EAX_EDX = 0x89, 0x02,
MOVL_ESI_NN_EDX = 0x89, 0x72,
MOVL_ECX_NN_EDX = 0x89, 0x4a,
MOVL_EDI_NN_EDX = 0x89, 0x7a,
MOVL_NN_EBP_ESI = 0x8b, 0x75,
MOVL_NN_EBP_EDI = 0x8b, 0x7d,
LEAVE = 0xc9,
RET = 0xc3,

# 'zero' uint32, encoded as an iterable of 4 0 bytes
UINT32_ZERO = (0x00, 0x00, 0x00, 0x00)

# Result of compiling the following function on Mac OSX 10.7.8 using
# i686-apple-darwin9-gcc-4.0.1 (GCC) 4.0.1 (Apple Inc. build 5490)
# Command line: gcc -c -O3 -fPIC -o cpuid.o cpuid.c
#
# typedef unsigned int uint32_t;
#
# typedef struct _Regs {
#     uint32_t eax;
#     uint32_t ebx;
#     uint32_t ecx;
#     uint32_t edx;
# } Regs;
#
# void cpuid(uint32_t op, Regs *regs) {
#     regs->eax = regs->ebx = regs->ecx = regs->edx = 0;
#
#     asm volatile("pushl %%ebx      \n\t"
#                  "cpuid            \n\t"
#                  "movl %%ebx, %1   \n\t"
#                  "popl %%ebx       \n\t"
#                  : "=a"(regs->eax), "=r"(regs->ebx),
#                    "=c"(regs->ecx), "=d"(regs->edx)
#                  : "a"(op)
#                  : "cc");
# }
#
# Notice the assembly is less trivial than simply 'cpuid', because it's
# PIC-compatible (preserves the EBX register content).
#
# Opcodes dumped using otool and hexdump:
#
# $ otool -tv cpuid.o
# cpuid.o:
# (__TEXT,__text) section
# _cpuid:
# 00000000	pushl	%ebp
# 00000001	movl	%esp,%ebp
# 00000003	subl	$0x10,%esp
# 00000006	movl	0x0c(%ebp),%edx
# 00000009	movl	%esi,0xf8(%ebp)
# 0000000c	movl	0x08(%ebp),%eax
# 0000000f	movl	%edi,0xfc(%ebp)
# 00000012	movl	$0x00000000,0x0c(%edx)
# 00000019	movl	$0x00000000,0x08(%edx)
# 00000020	movl	$0x00000000,0x04(%edx)
# 00000027	movl	$0x00000000,(%edx)
# 0000002d	pushl	%ebx
# 0000002e	cpuid
# 00000030	movl	%ebx,%esi
# 00000032	popl	%ebx
# 00000033	movl	%edx,%edi
# 00000035	movl	0x0c(%ebp),%edx
# 00000038	movl	%eax,(%edx)
# 0000003a	movl	%esi,0x04(%edx)
# 0000003d	movl	%ecx,0x08(%edx)
# 00000040	movl	%edi,0x0c(%edx)
# 00000043	movl	0xf8(%ebp),%esi
# 00000046	movl	0xfc(%ebp),%edi
# 00000049	leave
# 0000004a	ret
#
# $ hexdump -s 0x100 cpuid.o
# We store this in a variable so the result of our 'assembler' can be checked
_CPUID_HEXDUMP = '''
0000100 55 89 e5 83 ec 10 8b 55 0c 89 75 f8 8b 45 08 89
0000110 7d fc c7 42 0c 00 00 00 00 c7 42 08 00 00 00 00
0000120 c7 42 04 00 00 00 00 c7 02 00 00 00 00 53 0f a2
0000130 89 de 5b 89 d7 8b 55 0c 89 02 89 72 04 89 4a 08
0000140 89 7a 0c 8b 75 f8 8b 7d fc c9 c3
'''

CPUID_OPCODES = tuple(itertools.chain(*(
    PUSHL_EBP,
    MOVL_ESP_EBP,
    SUBL_NN_ESP, (0x10, ),
    MOVL_NN_EBP_EDX, (0x0c, ),
    MOVL_ESI_NN_EBP, (0xf8, ),
    MOVL_NN_EBP_EAX, (0x08, ),
    MOVL_EDI_NN_EBP, (0xfc, ),
    MOVL_NN_NN_NN_NN_NN_EDX, (0x0c, ), UINT32_ZERO,
    MOVL_NN_NN_NN_NN_NN_EDX, (0x08, ), UINT32_ZERO,
    MOVL_NN_NN_NN_NN_NN_EDX, (0x04, ), UINT32_ZERO,
    MOVL_NN_NN_NN_NN_EDX, UINT32_ZERO,
    PUSHL_EBX,
    CPUID,
    MOVL_EBX_ESI,
    POPL_EBX,
    MOVL_EDX_EDI,
    MOVL_NN_EBP_EDX, (0x0c, ),
    MOVL_EAX_EDX,
    MOVL_ESI_NN_EDX, (0x04, ),
    MOVL_ECX_NN_EDX, (0x08, ),
    MOVL_EDI_NN_EDX, (0x0c, ),
    MOVL_NN_EBP_ESI, (0xf8, ),
    MOVL_NN_EBP_EDI, (0xfc, ),
    LEAVE,
    RET,
)))


# Little helper function
flatmap = lambda f, i: itertools.chain(*itertools.imap(f, i))
flatmap.__doc__ = '''
Lazy flatmap

:param f: function to map
:type f: callable
:param i: iterable to flatmap
:type i: iterabe
'''.strip()

# Check assembly result
# This helper routine takes hexdump output and parses it into an iterable of
# instruction bytes
parse_hexdump = lambda output: tuple(
        # Parse every hex string into an integer
        itertools.imap(lambda i: int(i, 16),
            # Drop first column, take all other columns
            flatmap(lambda s: s.split()[1:],
                # Skip (potential) empty lines
                itertools.ifilter(bool,
                    # Strip every line
                    itertools.imap(lambda s: s.strip(),
                        # Split all lines
                        output.splitlines())))))

# Assert our 'assemby' results in the same instruction stream as the one
# generated by GCC.
# I know we could, obviously, just hard-code the GCC-emitted bytes, but that
# is, IMO, less readable/clean etc.
assert CPUID_OPCODES == parse_hexdump(_CPUID_HEXDUMP)
del _CPUID_HEXDUMP

class _REGS(ctypes.Structure):
    '''Register structure

    Fields are C-style packed by default, which is what we need.
    '''
    _fields_ = [
        ('eax', ctypes.c_uint32),
        ('ebx', ctypes.c_uint32),
        ('ecx', ctypes.c_uint32),
        ('edx', ctypes.c_uint32),
    ]

    # Being able to print this is useful
    def __iter__(self):
        return iter((self.eax, self.ebx, self.ecx, self.edx, ))

    def __str__(self):
        return str(tuple(map(hex, self)))


# Function definition. It returns nothing and takes 2 arguments: an uint32
# containing the CPUID table to retrieve, and a pointer to a REGS structure.
_cpuid = X86Function(CPUID_OPCODES,
                     (None, ctypes.c_uint32, ctypes.POINTER(_REGS)))
del CPUID_OPCODES

def cpuid(eax):
    '''Execute a CPUID call

    The passed value will be stored in EAX before executing the CPUID
    instruction. The function returns the values found in EAX, EBX, ECX and
    EDX when the CPUID call returned.

    :param eax: CPUID query identifier
    :type eax: number

    :return: register values
    :rtype: tuple<number>
    '''
    assert eax >= 0

    regs = _REGS()
    _cpuid(eax, ctypes.byref(regs))

    return regs.eax, regs.ebx, regs.ecx, regs.edx


# Now we got 'cpuid' defined, let's get 'check_cpuid' working, which checks
# whether the CPU supports the cpuid call
#
# Here's some C code to achieve this. We split up the check in 2 parts: one in
# pure assembler, which is as limited as possible (this will be embedded in
# _check_cpuid), and one more to handle variable allocation etc, which will be
# wrapped in check_cpuid.
#
# include <stdio.h>
#
# typedef unsigned int uint32_t;
#
# void _check_cpuid(uint32_t *f1, uint32_t *f2) {
#     asm volatile("pushfl                 \n\t"
#                  "popl %%eax             \n\t"
#                  "movl %%eax, %0         \n\t"
#                  "xorl $0x200000, %%eax  \n\t"
#                  "pushl %%eax            \n\t"
#                  "popfl                  \n\t"
#                  "pushfl                 \n\t"
#                  "popl %%eax             \n\t"
#                  "movl %%eax, %1         \n\t"
#                  "pushl %0               \n\t"
#                  "popfl                  \n\t"
#                  : "=r" (*f1),
#                    "=r" (*f2)
#                  :
#                  : "eax"
#     );
# }
#
# int check_cpuid() {
#     uint32_t fl1, fl2;
#
#     _check_cpuid(&fl1, &fl2);
#
#     fl1 &= 0x200000;
#     fl2 &= 0x200000;
#
#     if(fl1 == fl2)
#         return 0;
#     return 1;
# }
#
# int main() {
#     if(!check_cpuid()) {
#         printf("No CPUID support\n");
#         return 1;
#     }
#
#     printf("CPUID supported\n");
#
#     return 0;
# }
#
#
# Compiled on the same system and using the same arguments as above,
# disassembly of the _check_cpuid function yields
#
# __check_cpuid:
# 00000000	pushl	%ebp
# 00000001	movl	%esp,%ebp
# 00000003	subl	$0x08,%esp
# 00000006	pushfl
# 00000007	popl	%eax
# 00000008	movl	%eax,%edx
# 0000000a	xorl	$0x00200000,%eax
# 0000000f	pushl	%eax
# 00000010	popfl
# 00000011	pushfl
# 00000012	popl	%eax
# 00000013	movl	%eax,%ecx
# 00000015	pushl	%edx
# 00000016	popfl
# 00000017	movl	0x08(%ebp),%eax
# 0000001a	movl	%edx,(%eax)
# 0000001c	movl	0x0c(%ebp),%eax
# 0000001f	movl	%ecx,(%eax)
# 00000021	leave
# 00000022	ret
#
# Or, in hex format:
_CHECK_CPUID_HEXDUMP = '''
0000100 55 89 e5 83 ec 08 9c 58 89 c2 35 00 00 20 00 50
0000110 9d 9c 58 89 c1 52 9d 8b 45 08 89 10 8b 45 0c 89
0000120 08 c9 c3
'''

PUSHFL = 0x9c,
POPFL = 0x9d,
POPL_EAX = 0x58,
XORL_NN_NN_NN_NN_EAX = 0x35,
PUSHL_EAX = 0x50,
MOVL_EAX_ECX = 0x89, 0xc1,
MOVL_EDX_EAX = 0x89, 0x10,
MOVL_ECX_EAX = 0x89, 0x08,
PUSHL_EDX = 0x52,
MOVL_EDX_NN_EBP = 0x89, 0x55,
MOVL_ECX_NN_EBP = 0x89, 0x4d,

# We used MOVL_EAX_EDX before, but that's
#
# movl %eax,(%edx)
#
# whilst now we need register access
MOVL_EAX_rEDX = 0x89, 0xc2,

CHECK_CPUID_OPCODES = tuple(itertools.chain(*(
    PUSHL_EBP,
    MOVL_ESP_EBP,
    SUBL_NN_ESP, (0x08, ),
    PUSHFL,
    POPL_EAX,
    MOVL_EAX_rEDX,
    XORL_NN_NN_NN_NN_EAX, (0x00, 0x00, 0x20, 0x00, ),
    PUSHL_EAX,
    POPFL,
    PUSHFL,
    POPL_EAX,
    MOVL_EAX_ECX,
    PUSHL_EDX,
    POPFL,
    MOVL_NN_EBP_EAX, (0x08, ),
    MOVL_EDX_EAX,
    MOVL_NN_EBP_EAX, (0x0c, ),
    MOVL_ECX_EAX,
    LEAVE,
    RET,
)))

# Sanity check
assert CHECK_CPUID_OPCODES == parse_hexdump(_CHECK_CPUID_HEXDUMP)
del _CHECK_CPUID_HEXDUMP, parse_hexdump

_check_cpuid = X86Function(CHECK_CPUID_OPCODES,
                           (None, ctypes.POINTER(ctypes.c_uint32),
                            ctypes.POINTER(ctypes.c_uint32)))
del CHECK_CPUID_OPCODES

def check_cpuid():
    '''Check whether the CPU supports CPUID

    :return: the CPU supports the CPUID instruction
    :rtype: bool
    '''
    fl1, fl2 = ctypes.c_uint32(), ctypes.c_uint32()

    _check_cpuid(ctypes.byref(fl1), ctypes.byref(fl2))

    fl1 = fl1.value & 0x200000
    fl2 = fl2.value & 0x200000

    return (fl1 != fl2)


uint32_to_string = lambda u: ''.join(chr((u >> (8 * i)) & 0xff)
                                     for i in xrange(4))
uint32_to_string.__doc__ = '''
Convert 4 bytes stored in a 32bit unsigned integer into the string they
represent

:param u: int32 to convert
:type u: number

:return: string value of the 4 characters stored in the uint32
:rtype: str
'''.strip()

def get_vendor():
    '''Get the vendor name of the CPU'''
    _, ebx, ecx, edx = cpuid(0)

    # Convert to string
    # Data is stored in EBX, EDX, ECX (see Wikipedia), so we can't use natural
    # ordering in the loop
    brand = ''.join(uint32_to_string(reg) for reg in (ebx, edx, ecx, ))

    return brand

def get_brand():
    '''Get the brand of the CPU'''
    # Values define a null-terminated string, so we only take characters as
    # long they're not null
    return ''.join(itertools.takewhile(lambda c: c != chr(0),
                       itertools.imap(uint32_to_string,
                           flatmap(cpuid,
                                   (0x80000002, 0x80000003, 0x80000004, )))))

def _check_vmx_intel():
    '''Check whether VMX support is available, on Intel CPUs

    :return: VMX supported on the system
    :rtype: bool'''
    # Check whether bit 5 in the ECX register after CPUID function 1 is set
    _, _, ecx, _ = cpuid(1)

    return ((ecx & 0x20) != 0)

def main():
    assert check_cpuid(), "CPUID not available on this system"

    vendor = get_vendor()

    print 'CPU Information'
    print '==============='
    print 'Vendor:', vendor
    print 'Brand:', get_brand()

    if vendor == 'GenuineIntel':
        print 'VMX supported:', _check_vmx_intel()

if __name__ == '__main__':
    main()
	# Some code to retrieve CPUID information in pure Python
	#
	# Copyright (C) 2009 Nicolas Trangez <eikke eikke com>
	#
	# This library is free software; you can redistribute it and/or
	# modify it under the terms of the GNU Lesser General Public
	# License as published by the Free Software Foundation, version 2.1
	# of the License.
	#
	# This library is distributed in the hope that it will be useful,
	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	# Lesser General Public License for more details.
	#
	# You should have received a copy of the GNU Lesser General Public
	# License along with this library; if not, write to the Free Software
	# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
	# MA 02110-1301 USA

	import os
	import ctypes
	import itertools

	# Current code only runs in 32bit executables
	assert ctypes.sizeof(ctypes.c_void_p) == 4, 'Only 32bit supported'

	__author__ = 'Nicolas Trangez'
	__version__ = 0, 0, 1

	# TODO
	# ====
	# - Add function to check whether CPUID is supported on the system
	# - x86_64 support?
	# - Fix opcode constant naming, it's a mess

	NULL = 0x0

	# Retrieve handlers to some C functions
	# These lookups fail if the functions are not found, so it's some sanity
	# checking at import time as well
	def _check_not_null(name, result, func, arguments):
	if result == NULL:
	raise RuntimeError('Call to %s returned NULL' % name)

	return result

	def _check_zero(name, result, func, arguments):
	if result != 0:
	raise RuntimeError('Call to %s returned %d' % (name, result))

	return result

	py_valloc = ctypes.pythonapi.valloc
	py_valloc.restype = ctypes.c_void_p
	py_valloc.argtypes = (ctypes.c_size_t, )
	py_valloc.errcheck = lambda a: _check_not_null('valloc', a)

	py_free = ctypes.pythonapi.free
	py_free.restype = None
	py_free.argtypes = (ctypes.c_void_p, )

	py_memmove = ctypes.pythonapi.memmove
	py_memmove.restype = ctypes.c_void_p
	py_memmove.argtypes = (ctypes.c_void_p, ctypes.c_void_p, ctypes.c_size_t, )
	py_memmove.errcheck = lambda a: _check_not_null('memmove', a)

	py_mprotect = ctypes.pythonapi.mprotect
	py_mprotect.restype = ctypes.c_int
	py_mprotect.argtypes = (ctypes.c_void_p, ctypes.c_size_t, ctypes.c_int)
	py_mprotect.errcheck = lambda a: _check_zero('mprotect', a)

	# Some definitions we need for mprotect(). Let's hope they are the same on all
	# target platforms
	PROT_NONE, PROT_READ, PROT_WRITE, PROT_EXEC = 0, 1, 2, 4


	class X86Function(object):
	'''A function created from raw X86 CPU opcodes'''
	__slots__ = '_page', '_fun',

	def __init__(self, opcodes, prototype):
	'''Initialize the function

	:param opcodes: instruction stream to execute
	:type opcodes: iterable<number>
	:param prototype: function prototype as passed to ctypes.CFUNCTYPE
	:type prototype: iterable
	'''
	# Basic input validation
	assert all(0 <= i < 0xff for i in opcodes)

	# Reference to the page(s) of memory our opcodes are copied to
	# We don't initialize with None but with NULL so the check later on can
	# be C-style (on NULL)
	self._page = NULL
	# Reference to the ctypes function we create
	self._fun = None

	# Create a real instruction stream as Python string
	opcode_string = ''.join(map(chr, opcodes))
	# Turn into a C-string
	opcode_bytes = ctypes.create_string_buffer(opcode_string)
	# Calculate instruction stream length
	opcode_size = len(opcode_bytes) # This includes ending \0,
	# we might want to drop it

	# We need to allocate a multiple of the pagesize, otherwist 64bit
	# systems seem to be rather unhappy: calls to mprotect() apply to
	# whole pages. If we don't allocate a multiple of pagesize, then call
	# mprotect, we might protect pages which might later be allocated by
	# the memory allocator (since they're free), but accessing them will
	# fail because of the protection. This code makes sure we allocate a
	# correct number of bytes to allocate all pages we need.
	# Whilst on 32bit systems the old approach didn't fail, on 64bit
	# systems segmentation faults occur at every run.
	pagesize = os.sysconf('SC_PAGESIZE')
	pages, extra = divmod(opcode_size, pagesize)
	pages = pages if extra == 0 else pages + 1
	allocation_size = pages * pagesize

	# Little sanity check
	assert allocation_size >= opcode_size

	# Allocate memory to keep our instruction stream
	# Note we can't use malloc (well, we could, but then we need to perform
	# some tricks later on) because on some systems, functions need to be
	# page-alligned. valloc provides page-alligned memory for free.
	page = py_valloc(allocation_size)

	# Move instruction stream to the newly allocated space
	v = py_memmove(page, opcode_bytes, opcode_size)
	# Writing done, set only READ and EXEC permissions on the page(s)
	py_mprotect(page, allocation_size, PROT_READ \| PROT_EXEC)

	# Keep a reference to the allocated memory so we can free() in our
	# destructor
	self._page = page
	# Create a function trampoline to the page we just created, using the
	# given function prototype as prototype
	self._fun = ctypes.cast(page, ctypes.CFUNCTYPE(*prototype))

	def __del__(self):
	# If we got a reference to a page, try to free it nicely
	if self._page != NULL and py_free:
	py_free(self._page)
	self._page = NULL

	def __call__(self, *args):
	'''Call the assembled function'''
	# This is a simple trampoline
	return self._fun(*args)


	# Some X86 CPU opcodes we need
	# Note these should be iterables, all of them. That's how our 'assembler'
	# expects them.
	PUSHL_EBP = 0x55,
	MOVL_ESP_EBP = 0x89, 0xe5,
	SUBL_NN_ESP = 0x83, 0xec,
	MOVL_NN_EBP_EDX = 0x8b, 0x55,
	MOVL_ESI_NN_EBP = 0x89, 0x75,
	MOVL_NN_EBP_EAX = 0x8b, 0x45,
	MOVL_EDI_NN_EBP = 0x89, 0x7d,
	MOVL_NN_NN_NN_NN_NN_EDX = 0xc7, 0x42,
	MOVL_NN_NN_NN_NN_EDX = 0xc7, 0x02,
	PUSHL_EBX = 0x53,
	CPUID = 0x0f, 0xa2,
	MOVL_EBX_ESI = 0x89, 0xde,
	POPL_EBX = 0x5b,
	MOVL_EDX_EDI = 0x89, 0xd7,
	MOVL_EAX_EDX = 0x89, 0x02,
	MOVL_ESI_NN_EDX = 0x89, 0x72,
	MOVL_ECX_NN_EDX = 0x89, 0x4a,
	MOVL_EDI_NN_EDX = 0x89, 0x7a,
	MOVL_NN_EBP_ESI = 0x8b, 0x75,
	MOVL_NN_EBP_EDI = 0x8b, 0x7d,
	LEAVE = 0xc9,
	RET = 0xc3,

	# 'zero' uint32, encoded as an iterable of 4 0 bytes
	UINT32_ZERO = (0x00, 0x00, 0x00, 0x00)

	# Result of compiling the following function on Mac OSX 10.7.8 using
	# i686-apple-darwin9-gcc-4.0.1 (GCC) 4.0.1 (Apple Inc. build 5490)
	# Command line: gcc -c -O3 -fPIC -o cpuid.o cpuid.c
	#
	# typedef unsigned int uint32_t;
	#
	# typedef struct _Regs {
	# uint32_t eax;
	# uint32_t ebx;
	# uint32_t ecx;
	# uint32_t edx;
	# } Regs;
	#
	# void cpuid(uint32_t op, Regs *regs) {
	# regs->eax = regs->ebx = regs->ecx = regs->edx = 0;
	#
	# asm volatile("pushl %%ebx \n\t"
	# "cpuid \n\t"
	# "movl %%ebx, %1 \n\t"
	# "popl %%ebx \n\t"
	# : "=a"(regs->eax), "=r"(regs->ebx),
	# "=c"(regs->ecx), "=d"(regs->edx)
	# : "a"(op)
	# : "cc");
	# }
	#
	# Notice the assembly is less trivial than simply 'cpuid', because it's
	# PIC-compatible (preserves the EBX register content).
	#
	# Opcodes dumped using otool and hexdump:
	#
	# $ otool -tv cpuid.o
	# cpuid.o:
	# (__TEXT,__text) section
	# _cpuid:
	# 00000000 pushl %ebp
	# 00000001 movl %esp,%ebp
	# 00000003 subl $0x10,%esp
	# 00000006 movl 0x0c(%ebp),%edx
	# 00000009 movl %esi,0xf8(%ebp)
	# 0000000c movl 0x08(%ebp),%eax
	# 0000000f movl %edi,0xfc(%ebp)
	# 00000012 movl $0x00000000,0x0c(%edx)
	# 00000019 movl $0x00000000,0x08(%edx)
	# 00000020 movl $0x00000000,0x04(%edx)
	# 00000027 movl $0x00000000,(%edx)
	# 0000002d pushl %ebx
	# 0000002e cpuid
	# 00000030 movl %ebx,%esi
	# 00000032 popl %ebx
	# 00000033 movl %edx,%edi
	# 00000035 movl 0x0c(%ebp),%edx
	# 00000038 movl %eax,(%edx)
	# 0000003a movl %esi,0x04(%edx)
	# 0000003d movl %ecx,0x08(%edx)
	# 00000040 movl %edi,0x0c(%edx)
	# 00000043 movl 0xf8(%ebp),%esi
	# 00000046 movl 0xfc(%ebp),%edi
	# 00000049 leave
	# 0000004a ret
	#
	# $ hexdump -s 0x100 cpuid.o
	# We store this in a variable so the result of our 'assembler' can be checked
	_CPUID_HEXDUMP = '''
	0000100 55 89 e5 83 ec 10 8b 55 0c 89 75 f8 8b 45 08 89
	0000110 7d fc c7 42 0c 00 00 00 00 c7 42 08 00 00 00 00
	0000120 c7 42 04 00 00 00 00 c7 02 00 00 00 00 53 0f a2
	0000130 89 de 5b 89 d7 8b 55 0c 89 02 89 72 04 89 4a 08
	0000140 89 7a 0c 8b 75 f8 8b 7d fc c9 c3
	'''

	CPUID_OPCODES = tuple(itertools.chain(*(
	PUSHL_EBP,
	MOVL_ESP_EBP,
	SUBL_NN_ESP, (0x10, ),
	MOVL_NN_EBP_EDX, (0x0c, ),
	MOVL_ESI_NN_EBP, (0xf8, ),
	MOVL_NN_EBP_EAX, (0x08, ),
	MOVL_EDI_NN_EBP, (0xfc, ),
	MOVL_NN_NN_NN_NN_NN_EDX, (0x0c, ), UINT32_ZERO,
	MOVL_NN_NN_NN_NN_NN_EDX, (0x08, ), UINT32_ZERO,
	MOVL_NN_NN_NN_NN_NN_EDX, (0x04, ), UINT32_ZERO,
	MOVL_NN_NN_NN_NN_EDX, UINT32_ZERO,
	PUSHL_EBX,
	CPUID,
	MOVL_EBX_ESI,
	POPL_EBX,
	MOVL_EDX_EDI,
	MOVL_NN_EBP_EDX, (0x0c, ),
	MOVL_EAX_EDX,
	MOVL_ESI_NN_EDX, (0x04, ),
	MOVL_ECX_NN_EDX, (0x08, ),
	MOVL_EDI_NN_EDX, (0x0c, ),
	MOVL_NN_EBP_ESI, (0xf8, ),
	MOVL_NN_EBP_EDI, (0xfc, ),
	LEAVE,
	RET,
	)))


	# Little helper function
	flatmap = lambda f, i: itertools.chain(*itertools.imap(f, i))
	flatmap.__doc__ = '''
	Lazy flatmap

	:param f: function to map
	:type f: callable
	:param i: iterable to flatmap
	:type i: iterabe
	'''.strip()

	# Check assembly result
	# This helper routine takes hexdump output and parses it into an iterable of
	# instruction bytes
	parse_hexdump = lambda output: tuple(
	# Parse every hex string into an integer
	itertools.imap(lambda i: int(i, 16),
	# Drop first column, take all other columns
	flatmap(lambda s: s.split()[1:],
	# Skip (potential) empty lines
	itertools.ifilter(bool,
	# Strip every line
	itertools.imap(lambda s: s.strip(),
	# Split all lines
	output.splitlines())))))

	# Assert our 'assemby' results in the same instruction stream as the one
	# generated by GCC.
	# I know we could, obviously, just hard-code the GCC-emitted bytes, but that
	# is, IMO, less readable/clean etc.
	assert CPUID_OPCODES == parse_hexdump(_CPUID_HEXDUMP)
	del _CPUID_HEXDUMP

	class _REGS(ctypes.Structure):
	'''Register structure

	Fields are C-style packed by default, which is what we need.
	'''
	_fields_ = [
	('eax', ctypes.c_uint32),
	('ebx', ctypes.c_uint32),
	('ecx', ctypes.c_uint32),
	('edx', ctypes.c_uint32),
	]

	# Being able to print this is useful
	def __iter__(self):
	return iter((self.eax, self.ebx, self.ecx, self.edx, ))

	def __str__(self):
	return str(tuple(map(hex, self)))


	# Function definition. It returns nothing and takes 2 arguments: an uint32
	# containing the CPUID table to retrieve, and a pointer to a REGS structure.
	_cpuid = X86Function(CPUID_OPCODES,
	(None, ctypes.c_uint32, ctypes.POINTER(_REGS)))
	del CPUID_OPCODES

	def cpuid(eax):
	'''Execute a CPUID call

	The passed value will be stored in EAX before executing the CPUID
	instruction. The function returns the values found in EAX, EBX, ECX and
	EDX when the CPUID call returned.

	:param eax: CPUID query identifier
	:type eax: number

	:return: register values
	:rtype: tuple<number>
	'''
	assert eax >= 0

	regs = _REGS()
	_cpuid(eax, ctypes.byref(regs))

	return regs.eax, regs.ebx, regs.ecx, regs.edx


	# Now we got 'cpuid' defined, let's get 'check_cpuid' working, which checks
	# whether the CPU supports the cpuid call
	#
	# Here's some C code to achieve this. We split up the check in 2 parts: one in
	# pure assembler, which is as limited as possible (this will be embedded in
	# _check_cpuid), and one more to handle variable allocation etc, which will be
	# wrapped in check_cpuid.
	#
	# include <stdio.h>
	#
	# typedef unsigned int uint32_t;
	#
	# void _check_cpuid(uint32_t f1, uint32_t f2) {
	# asm volatile("pushfl \n\t"
	# "popl %%eax \n\t"
	# "movl %%eax, %0 \n\t"
	# "xorl $0x200000, %%eax \n\t"
	# "pushl %%eax \n\t"
	# "popfl \n\t"
	# "pushfl \n\t"
	# "popl %%eax \n\t"
	# "movl %%eax, %1 \n\t"
	# "pushl %0 \n\t"
	# "popfl \n\t"
	# : "=r" (*f1),
	# "=r" (*f2)
	# :
	# : "eax"
	# );
	# }
	#
	# int check_cpuid() {
	# uint32_t fl1, fl2;
	#
	# _check_cpuid(&fl1, &fl2);
	#
	# fl1 &= 0x200000;
	# fl2 &= 0x200000;
	#
	# if(fl1 == fl2)
	# return 0;
	# return 1;
	# }
	#
	# int main() {
	# if(!check_cpuid()) {
	# printf("No CPUID support\n");
	# return 1;
	# }
	#
	# printf("CPUID supported\n");
	#
	# return 0;
	# }
	#
	#
	# Compiled on the same system and using the same arguments as above,
	# disassembly of the _check_cpuid function yields
	#
	# __check_cpuid:
	# 00000000 pushl %ebp
	# 00000001 movl %esp,%ebp
	# 00000003 subl $0x08,%esp
	# 00000006 pushfl
	# 00000007 popl %eax
	# 00000008 movl %eax,%edx
	# 0000000a xorl $0x00200000,%eax
	# 0000000f pushl %eax
	# 00000010 popfl
	# 00000011 pushfl
	# 00000012 popl %eax
	# 00000013 movl %eax,%ecx
	# 00000015 pushl %edx
	# 00000016 popfl
	# 00000017 movl 0x08(%ebp),%eax
	# 0000001a movl %edx,(%eax)
	# 0000001c movl 0x0c(%ebp),%eax
	# 0000001f movl %ecx,(%eax)
	# 00000021 leave
	# 00000022 ret
	#
	# Or, in hex format:
	_CHECK_CPUID_HEXDUMP = '''
	0000100 55 89 e5 83 ec 08 9c 58 89 c2 35 00 00 20 00 50
	0000110 9d 9c 58 89 c1 52 9d 8b 45 08 89 10 8b 45 0c 89
	0000120 08 c9 c3
	'''

	PUSHFL = 0x9c,
	POPFL = 0x9d,
	POPL_EAX = 0x58,
	XORL_NN_NN_NN_NN_EAX = 0x35,
	PUSHL_EAX = 0x50,
	MOVL_EAX_ECX = 0x89, 0xc1,
	MOVL_EDX_EAX = 0x89, 0x10,
	MOVL_ECX_EAX = 0x89, 0x08,
	PUSHL_EDX = 0x52,
	MOVL_EDX_NN_EBP = 0x89, 0x55,
	MOVL_ECX_NN_EBP = 0x89, 0x4d,

	# We used MOVL_EAX_EDX before, but that's
	#
	# movl %eax,(%edx)
	#
	# whilst now we need register access
	MOVL_EAX_rEDX = 0x89, 0xc2,

	CHECK_CPUID_OPCODES = tuple(itertools.chain(*(
	PUSHL_EBP,
	MOVL_ESP_EBP,
	SUBL_NN_ESP, (0x08, ),
	PUSHFL,
	POPL_EAX,
	MOVL_EAX_rEDX,
	XORL_NN_NN_NN_NN_EAX, (0x00, 0x00, 0x20, 0x00, ),
	PUSHL_EAX,
	POPFL,
	PUSHFL,
	POPL_EAX,
	MOVL_EAX_ECX,
	PUSHL_EDX,
	POPFL,
	MOVL_NN_EBP_EAX, (0x08, ),
	MOVL_EDX_EAX,
	MOVL_NN_EBP_EAX, (0x0c, ),
	MOVL_ECX_EAX,
	LEAVE,
	RET,
	)))

	# Sanity check
	assert CHECK_CPUID_OPCODES == parse_hexdump(_CHECK_CPUID_HEXDUMP)
	del _CHECK_CPUID_HEXDUMP, parse_hexdump

	_check_cpuid = X86Function(CHECK_CPUID_OPCODES,
	(None, ctypes.POINTER(ctypes.c_uint32),
	ctypes.POINTER(ctypes.c_uint32)))
	del CHECK_CPUID_OPCODES

	def check_cpuid():
	'''Check whether the CPU supports CPUID

	:return: the CPU supports the CPUID instruction
	:rtype: bool
	'''
	fl1, fl2 = ctypes.c_uint32(), ctypes.c_uint32()

	_check_cpuid(ctypes.byref(fl1), ctypes.byref(fl2))

	fl1 = fl1.value & 0x200000
	fl2 = fl2.value & 0x200000

	return (fl1 != fl2)


	uint32_to_string = lambda u: ''.join(chr((u >> (8 * i)) & 0xff)
	for i in xrange(4))
	uint32_to_string.__doc__ = '''
	Convert 4 bytes stored in a 32bit unsigned integer into the string they
	represent

	:param u: int32 to convert
	:type u: number

	:return: string value of the 4 characters stored in the uint32
	:rtype: str
	'''.strip()

	def get_vendor():
	'''Get the vendor name of the CPU'''
	_, ebx, ecx, edx = cpuid(0)

	# Convert to string
	# Data is stored in EBX, EDX, ECX (see Wikipedia), so we can't use natural
	# ordering in the loop
	brand = ''.join(uint32_to_string(reg) for reg in (ebx, edx, ecx, ))

	return brand

	def get_brand():
	'''Get the brand of the CPU'''
	# Values define a null-terminated string, so we only take characters as
	# long they're not null
	return ''.join(itertools.takewhile(lambda c: c != chr(0),
	itertools.imap(uint32_to_string,
	flatmap(cpuid,
	(0x80000002, 0x80000003, 0x80000004, )))))

	def _check_vmx_intel():
	'''Check whether VMX support is available, on Intel CPUs

	:return: VMX supported on the system
	:rtype: bool'''
	# Check whether bit 5 in the ECX register after CPUID function 1 is set
	_, _, ecx, _ = cpuid(1)

	return ((ecx & 0x20) != 0)

	def main():
	assert check_cpuid(), "CPUID not available on this system"

	vendor = get_vendor()

	print 'CPU Information'
	print '==============='
	print 'Vendor:', vendor
	print 'Brand:', get_brand()

	if vendor == 'GenuineIntel':
	print 'VMX supported:', _check_vmx_intel()

	if __name__ == '__main__':
	main()