Skip to content

Instantly share code, notes, and snippets.

@mratsim
Created December 13, 2022 17:42
Show Gist options
  • Save mratsim/d52309cb1f49bf329d6b2e0f8d1f7bd6 to your computer and use it in GitHub Desktop.
Save mratsim/d52309cb1f49bf329d6b2e0f8d1f7bd6 to your computer and use it in GitHub Desktop.
LLVM JIT for cryptography
# Constantine
# Copyright (c) 2018-2019 Status Research & Development GmbH
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.
{.passc: gorge("llvm-config --cflags").}
{.passl: gorge("llvm-config --libs").}
# ############################################################
#
# Bindings to LLVM JIT
#
# ############################################################
# https://llvm.org/doxygen/group__LLVMC.html
# Constantine is a library. It is possible that applications relying on Constantine
# also link to libLLVM, for example if they implement a virtual machine (for the EVM, for Snarks/zero-knowledge, ...).
# Hence Constantine should always use LLVM context to "namespace" its own codegen and avoid collisions in the global context.
# ############################################################
#
# LLVM
#
# ############################################################
type
LlvmBool* = distinct int32
ContextRef* {.importc: "LLVMContextRef", header: "<llvm-c/Core.h>".} = distinct pointer
ModuleRef* {.importc: "LLVMModuleRef", header: "<llvm-c/Core.h>".} = distinct pointer
ExecutionEngineRef* {.importc: "LLVMExecutionEngineRef", header: "<llvm-c/ExecutionEngine.h>".} = distinct pointer
TypeRef* {.importc: "LLVMTypeRef", header: "<llvm-c/Core.h>".} = distinct pointer
ValueRef* {.importc: "LLVMValueRef", header: "<llvm-c/Core.h>".} = distinct pointer
{.push header: "<llvm-c/Core.h>".}
proc createContext*(): ContextRef {.importc: "LLVMContextCreate".}
proc dispose*(ctx: ContextRef) {.importc: "LLVMContextDispose".}
proc llvmCreateModule(name: cstring, ctx: ContextRef): ModuleRef {.importc: "LLVMModuleCreateWithNameInContext".}
template createModule*(ctx: ContextRef, name: cstring): ModuleRef =
llvmCreateModule(name, ctx)
proc dispose*(m: ModuleRef) {.importc: "LLVMDisposeModule".}
## Destroys a module
## Note: destroying an Execution Engine will also destroy modules attached to it
proc dispose*(msg: cstring) {.importc: "LLVMDisposeMessage".}
## cstring in LLVM are owned by LLVM and must be destroyed with a specific function
{.pop.} # {.push header: "<llvm-c/Core.h>".}
{.push header: "<llvm-c/Analysis.h>".}
type VerifierFailureAction* = enum
AbortProcessAction # verifier will print to stderr and abort()
PrintMessageAction # verifier will print to stderr and return 1
ReturnStatusAction # verifier will just return 1
proc verify*(module: ModuleRef, failureAction: VerifierFailureAction, msg: var cstring): LlvmBool {.importc: "LLVMVerifyModule".}
proc verify*(fn: ValueRef, failureAction: VerifierFailureAction): LlvmBool {.importc: "LLVMVerifyFunction".}
{.pop.}
# ############################################################
#
# Target
#
# ############################################################
{.push header: "<llvm-c/Target.h>".}
proc initializeNativeTarget*(): LlvmBool {.discardable, importc: "LLVMInitializeNativeTarget".}
proc initializeNativeAsmPrinter*(): LlvmBool {.discardable, importc: "LLVMInitializeNativeAsmPrinter".}
{.pop.}
# ############################################################
#
# Execution Engine
#
# ############################################################
{.push header: "<llvm-c/ExecutionEngine.h>".}
proc linkInMCJIT*() {.importc: "LLVMLinkInMCJIT".}
proc createJITCompilerForModule*(
engine: var ExecutionEngineRef,
module: ModuleRef,
optLevel: uint32,
err: var cstring): LlvmBool {.importc: "LLVMCreateJITCompilerForModule".}
proc remove(
engine: ExecutionEngineRef,
m: ModuleRef,
outMod: var ModuleRef,
err: var cstring): LlvmBool {.importc: "LLVMRemoveModule".}
proc dispose*(engine: ExecutionEngineRef) {.importc: "LLVMDisposeExecutionEngine".}
## Destroys an execution engine
## Note: destroying an Execution Engine will also destroy modules attached to it
proc getFunctionAddress*(engine: ExecutionEngineRef, name: cstring): distinct pointer {.importc: "LLVMGetFunctionAddress".}
{.pop}
# ############################################################
#
# Types
#
# ############################################################
# https://llvm.org/doxygen/group__LLVMCCoreType.html
{.push header: "<llvm-c/Core.h>".}
# Integers
# ------------------------------------------------------------
proc int1_t*(ctx: ContextRef): TypeRef {.importc: "LLVMInt1TypeInContext".}
proc int8_t*(ctx: ContextRef): TypeRef {.importc: "LLVMInt8TypeInContext".}
proc int16_t*(ctx: ContextRef): TypeRef {.importc: "LLVMInt16TypeInContext".}
proc int32_t*(ctx: ContextRef): TypeRef {.importc: "LLVMInt32TypeInContext".}
proc int64_t*(ctx: ContextRef): TypeRef {.importc: "LLVMInt64TypeInContext".}
proc int128_t*(ctx: ContextRef): TypeRef {.importc: "LLVMInt128TypeInContext".}
proc int_t*(ctx: ContextRef, numBits: uint32): TypeRef {.importc: "LLVMIntTypeInContext".}
# Composite
# ------------------------------------------------------------
proc struct_t*(
ctx: ContextRef,
elemTypes: openArray[TypeRef], # requires implicit conversion of the length to uint32, which requires the header
packed: LlvmBool): TypeRef {.importc: "LLVMStructTypeInContext".}
proc array_t*(elemType: TypeRef, elemCount: uint32): TypeRef {.importc: "LLVMArrayType".}
# Functions
# ------------------------------------------------------------
proc function_t*(
returnType: TypeRef,
paramTypes: openArray[TypeRef], # requires implicit conversion of the length to uint32, which requires the header
isVarArg: LlvmBool): TypeRef {.importc: "LLVMFunctionType".}
proc addFunction*(m: ModuleRef, name: cstring, ty: TypeRef): ValueRef {.importc: "LLVMAddFunction".}
## Declare a function `name` in a module.
## Returns a handle to specify its instructions
{.pop.} # {.push header: "<llvm-c/Core.h>".}
# ############################################################
#
# Values
#
# ############################################################
{.push header: "<llvm-c/Core.h>".}
# Constants
# ------------------------------------------------------------
# https://llvm.org/doxygen/group__LLVMCCoreValueConstant.html
proc constNull*(ty: TypeRef): ValueRef {.importc: "LLVMConstNull".}
proc constAllOnes*(ty: TypeRef): ValueRef {.importc: "LLVMConstAllOnes".}
proc constStruct*(
vals: openArray[ValueRef], # requires implicit conversion of the length to uint32, which requires the header
packed: LlvmBool): ValueRef {.importc: "LLVMConstStruct".}
proc constArray*(
ty: TypeRef,
constantVals: openArray[ValueRef] # requires implicit conversion of the length to uint32, which requires the header
): ValueRef {.importc: "LLVMConstArray".}
{.pop.} # {.push header: "<llvm-c/Core.h>".}
# ############################################################
#
# IR builder
#
# ############################################################
# https://llvm.org/doxygen/group__LLVMCCoreInstructionBuilder.html
type
BasicBlockRef* {.importc: "LLVMBasicBlockRef", header: "<llvm-c/Core.h>".} = distinct pointer
BuilderRef* {.importc: "LLVMBuilderRef", header: "<llvm-c/Core.h>".} = distinct pointer
## An instruction builder represents a point within a basic block and is
## the exclusive means of building instructions using the C interface.
IntPredicate* {.size: sizeof(cint).} = enum
IntEQ = 32, ## equal
IntNE, ## not equal
IntUGT, ## unsigned greater than
IntUGE, ## unsigned greater or equal
IntULT, ## unsigned less than
IntULE, ## unsigned less or equal
IntSGT, ## signed greater than
IntSGE, ## signed greater or equal
IntSLT, ## signed less than
IntSLE ## signed less or equal
{.push header: "<llvm-c/Core.h>".}
# Instantiation
# ------------------------------------------------------------
proc appendBasicBlock*(ctx: ContextRef, fn: ValueRef, name: cstring): BasicBlockRef {.importc: "LLVMAppendBasicBlockInContext".}
## Append a basic block to the end of a function
proc createBuilder*(ctx: ContextRef): BuilderRef {.importc: "LLVMCreateBuilderInContext".}
proc dispose*(builder: BuilderRef) {.importc: "LLVMDisposeBuilder".}
# Functions
# ------------------------------------------------------------
proc getParam*(fn: ValueRef, index: uint32): ValueRef {.importc: "LLVMGetParam".}
proc retVoid*(builder: BuilderRef): ValueRef {.importc: "LLVMBuildRetVoid".}
proc ret*(builder: BuilderRef, returnVal: ValueRef) {.importc: "LLVMBuildRet".}
# Positioning
# ------------------------------------------------------------
proc position*(builder: BuilderRef, blck: BasicBlockRef, instr: ValueRef) {.importc: "LLVMPositionBuilder".}
proc positionBefore*(builder: BuilderRef, instr: ValueRef) {.importc: "LLVMPositionBuilderBefore".}
proc positionAtEnd*(builder: BuilderRef, blck: BasicBlockRef) {.importc: "LLVMPositionBuilderAtEnd".}
# Intermediate Representation
# ------------------------------------------------------------
#
# - NSW: no signed wrap, signed value cannot over- or underflow.
# - NUW: no unsigned wrap, unsigned value cannot over- or underflow.
proc add*(builder: BuilderRef, lhs, rhs: ValueRef, name: cstring): ValueRef {.importc: "LLVMBuildAdd".}
proc addNSW*(builder: BuilderRef, lhs, rhs: ValueRef, name: cstring): ValueRef {.importc: "LLVMBuildNSWAdd".}
proc addNUW*(builder: BuilderRef, lhs, rhs: ValueRef, name: cstring): ValueRef {.importc: "LLVMBuildNUWAdd".}
proc sub*(builder: BuilderRef, lhs, rhs: ValueRef, name: cstring): ValueRef {.importc: "LLVMBuildSub".}
proc subNSW*(builder: BuilderRef, lhs, rhs: ValueRef, name: cstring): ValueRef {.importc: "LLVMBuildNSWSub".}
proc subNUW*(builder: BuilderRef, lhs, rhs: ValueRef, name: cstring): ValueRef {.importc: "LLVMBuildNUWSub".}
proc neg*(builder: BuilderRef, lhs, rhs: ValueRef, name: cstring): ValueRef {.importc: "LLVMBuildNeg".}
proc negNSW*(builder: BuilderRef, lhs, rhs: ValueRef, name: cstring): ValueRef {.importc: "LLVMBuildNSWNeg".}
proc negNUW*(builder: BuilderRef, lhs, rhs: ValueRef, name: cstring): ValueRef {.importc: "LLVMBuildNUWNeg".}
proc mul*(builder: BuilderRef, lhs, rhs: ValueRef, name: cstring): ValueRef {.importc: "LLVMBuildMul".}
proc mulNSW*(builder: BuilderRef, lhs, rhs: ValueRef, name: cstring): ValueRef {.importc: "LLVMBuildNSWMul".}
proc mulNUW*(builder: BuilderRef, lhs, rhs: ValueRef, name: cstring): ValueRef {.importc: "LLVMBuildNUWMul".}
proc divU*(builder: BuilderRef, lhs, rhs: ValueRef, name: cstring): ValueRef {.importc: "LLVMBuildUDiv".}
proc divU_exact*(builder: BuilderRef, lhs, rhs: ValueRef, name: cstring): ValueRef {.importc: "LLVMBuildExactUDiv".}
proc divS*(builder: BuilderRef, lhs, rhs: ValueRef, name: cstring): ValueRef {.importc: "LLVMBuildSDiv".}
proc divS_exact*(builder: BuilderRef, lhs, rhs: ValueRef, name: cstring): ValueRef {.importc: "LLVMBuildExactSDiv".}
proc remU*(builder: BuilderRef, lhs, rhs: ValueRef, name: cstring): ValueRef {.importc: "LLVMBuildURem".}
proc remS*(builder: BuilderRef, lhs, rhs: ValueRef, name: cstring): ValueRef {.importc: "LLVMBuildSRem".}
proc lshl*(builder: BuilderRef, lhs, rhs: ValueRef, name: cstring): ValueRef {.importc: "LLVMBuildShl".}
proc lshr*(builder: BuilderRef, lhs, rhs: ValueRef, name: cstring): ValueRef {.importc: "LLVMBuildLShr".}
proc ashr*(builder: BuilderRef, lhs, rhs: ValueRef, name: cstring): ValueRef {.importc: "LLVMBuildAShr".}
proc `and`*(builder: BuilderRef, lhs, rhs: ValueRef, name: cstring): ValueRef {.importc: "LLVMBuildAnd".}
proc `or`*(builder: BuilderRef, lhs, rhs: ValueRef, name: cstring): ValueRef {.importc: "LLVMBuildOr".}
proc `xor`*(builder: BuilderRef, lhs, rhs: ValueRef, name: cstring): ValueRef {.importc: "LLVMBuildXor".}
proc `not`*(builder: BuilderRef, val: ValueRef, name: cstring): ValueRef {.importc: "LLVMBuildNot".}
proc select*(builder: BuilderRef, condition, then, otherwise: ValueRef, name: cstring): ValueRef {.importc: "LLVMBuildNot".}
proc icmp*(builder: BuilderRef, op: IntPredicate, lhs, rhs: ValueRef, name: cstring): ValueRef {.importc: "LLVMBuildICmp".}
proc bitcast*(builder: BuilderRef, val: ValueRef, destTy: TypeRef, name: cstring) {.importc: "LLVMBuildBitcast".}
proc trunc*(builder: BuilderRef, val: ValueRef, destTy: TypeRef, name: cstring) {.importc: "LLVMBuildTrunc".}
proc zext*(builder: BuilderRef, val: ValueRef, destTy: TypeRef, name: cstring) {.importc: "LLVMBuildZExt".}
## Zero-extend
proc sext*(builder: BuilderRef, val: ValueRef, destTy: TypeRef, name: cstring) {.importc: "LLVMBuildSExt".}
## Sign-extend
proc malloc*(builder: BuilderRef, ty: TypeRef): ValueRef {.importc: "LLVMBuildMalloc".}
proc mallocArray*(builder: BuilderRef, ty: TypeRef, val: ValueRef): ValueRef {.importc: "LLVMBuildMallocArray".}
proc free*(builder: BuilderRef, ty: TypeRef, `ptr`: ValueRef): ValueRef {.importc: "LLVMBuildFree".}
proc alloca*(builder: BuilderRef, ty: TypeRef): ValueRef {.importc: "LLVMBuildAlloca".}
proc allocaArray*(builder: BuilderRef, ty: TypeRef, val: ValueRef): ValueRef {.importc: "LLVMBuildAllocaArray".}
proc getElementPtr2*(
builder: BuilderRef,
ty: TypeRef,
`ptr`: ValueRef,
indices: openArray[ValueRef], # requires implicit conversion of the length to uint32, which requires the header
name: cstring
): ValueRef {.importc: "LLVMBuildGEP2".}
## https://www.llvm.org/docs/GetElementPtr.html
proc getElementPtr2_InBounds*(
builder: BuilderRef,
ty: TypeRef,
`ptr`: ValueRef,
indices: openArray[ValueRef], # requires implicit conversion of the length to uint32, which requires the header
name: cstring
): ValueRef {.importc: "LLVMBuildInBoundsGEP2".}
## https://www.llvm.org/docs/GetElementPtr.html
## If the GEP lacks the inbounds keyword, the value is the result from evaluating the implied two’s complement integer computation.
## However, since there’s no guarantee of where an object will be allocated in the address space, such values have limited meaning.
proc getElementPtr2_Struct*(
builder: BuilderRef,
ty: TypeRef,
`ptr`: ValueRef,
idx: uint32,
name: cstring
): ValueRef {.importc: "LLVMBuildStructGEP2".}
## https://www.llvm.org/docs/GetElementPtr.html
## If the GEP lacks the inbounds keyword, the value is the result from evaluating the implied two’s complement integer computation.
## However, since there’s no guarantee of where an object will be allocated in the address space, such values have limited meaning.
proc load2*(builder: BuilderRef, ty: TypeRef, `ptr`: ValueRef, name: cstring): ValueRef {.importc: "LLVMBuildLoad2".}
proc store*(builder: BuilderRef, val, `ptr`: ValueRef): ValueRef {.importc: "LLVMBuildStore".}
proc memset*(builder: BuilderRef, `ptr`, val, len: ValueRef, align: uint32) {.importc: "LLVMBuildMemset".}
proc memcpy*(builder: BuilderRef, dst: ValueRef, dstAlign: uint32, src: ValueRef, srcAlign: uint32, size: ValueRef) {.importc: "LLVMBuildMemcpy".}
proc memmove*(builder: BuilderRef, dst: ValueRef, dstAlign: uint32, src: ValueRef, srcAlign: uint32, size: ValueRef) {.importc: "LLVMBuildMemmove".}
{.pop.} # {.push header: "<llvm-c/Core.h>".}
# ############################################################
#
# Sanity Check
#
# ############################################################
when isMainModule:
echo "LLVM JIT compiler sanity check"
let ctx = createContext()
var module = ctx.createModule("addition")
let i32 = ctx.int32_t()
let addType = function_t(i32, [i32, i32], isVarArg = LlvmBool(false))
let addBody = module.addFunction("add", addType)
let builder = ctx.createBuilder()
let blck = ctx.append_basic_block(addBody, "addBody")
builder.positionAtEnd(blck)
block:
let a = addBody.getParam(0)
let b = addBody.getParam(1)
let sum = builder.add(a, b, "sum")
builder.ret(sum)
block:
var errMsg: cstring
let errCode = module.verify(AbortProcessAction, errMsg)
echo "Verification: code ", int(errCode), ", message \"", errMsg, "\""
errMsg.dispose()
var engine: ExecutionEngineRef
block:
let errCode = initializeNativeTarget()
echo "Target init: code ", int(errCode)
let errCodeASMPrinter = initializeNativeAsmPrinter() # Why is this necessary? Otherwise we get "LLVM ERROR: Target does not support MC emission!"
echo "ASM printer init: code ", int(errCodeASMPrinter)
var errMsg: cstring
if bool createJITCompilerForModule(engine, module, optLevel = 0, errMsg):
if errMsg.len > 0:
echo errMsg
echo "exiting ..."
else:
echo "JIT compiler: error without details ... exiting"
quit 1
let jitAdd = cast[proc(a, b: int32): int32 {.noconv.}](
engine.getFunctionAddress("add"))
echo "jitAdd(1, 2) = ", jitAdd(1, 2)
doAssert jitAdd(1, 2) == 1 + 2
block:
# Cleanup
# Note: when disposing the Execution Engine, attached modules are also disposed.
# here we go the extra mile of detaching the module for testing.
builder.dispose()
var errMsg: cstring
let errCode = engine.remove(module, module, errMsg)
echo "Detaching module from Execution Engine: code ", int(errCode), ", message \"", errMsg, "\""
module.dispose()
engine.dispose()
ctx.dispose()
echo "LLVM JIT - SUCCESS"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment