Skip to content

Instantly share code, notes, and snippets.

@andrewrk
Created December 19, 2023 00:26
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save andrewrk/bd27dc0a66694c13b435b88c8114fdb9 to your computer and use it in GitHub Desktop.
Save andrewrk/bd27dc0a66694c13b435b88c8114fdb9 to your computer and use it in GitHub Desktop.
demo of specifying different target CPU features per module. context: https://github.com/ziglang/zig/pull/18160
pub fn add(a: i32, b: i32) i32 {
return a + b;
}
const bar = @import("bar");
export fn entry() i32 {
const a: i32 = 1234;
const b: i32 = 5678;
return bar.add(a, b);
}
pub fn panic(msg: []const u8, st: ?*std.builtin.StackTrace, start: ?usize) noreturn {
_ = .{ msg, st, start };
@trap();
}
const std = @import("std");
$ stage4/bin/zig build-obj -fno-formatted-panics -target x86_64-linux --dep bar -fstrip --mod root foo.zig -target native --mod bar bar.zig --verbose-llvm-ir
; ModuleID = 'root'
source_filename = "root"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-musl"
%Target.Cpu.Feature.Set = type { [5 x i64] }
%Target.Cpu.Model = type { { ptr, i64 }, { ptr, i64 }, %Target.Cpu.Feature.Set }
%Target.Cpu = type { ptr, %Target.Cpu.Feature.Set, i6, [7 x i8] }
@builtin.zig_backend = internal unnamed_addr constant i64 2, align 8
@Target.Cpu.Feature.Set.empty = internal unnamed_addr constant %Target.Cpu.Feature.Set zeroinitializer, align 8
@Target.x86.cpu.x86_64 = internal unnamed_addr constant %Target.Cpu.Model { { ptr, i64 } { ptr @__anon_447, i64 6 }, { ptr, i64 } { ptr @__anon_450, i64 6 }, %Target.Cpu.Feature.Set { [5 x i64] [i64 149533581377552, i64 551367491584, i64 1168232153472, i64 0, i64 0] } }, align 8
@__anon_447 = internal unnamed_addr global [7 x i8] c"x86_64\00", align 1
@__anon_450 = internal unnamed_addr global [7 x i8] c"x86-64\00", align 1
@builtin.cpu = internal unnamed_addr constant %Target.Cpu { ptr @Target.x86.cpu.x86_64, %Target.Cpu.Feature.Set { [5 x i64] [i64 149533581377552, i64 551367491584, i64 1168232677760, i64 0, i64 0] }, i6 -27, [7 x i8] undef }, align 8
@start.simplified_logic = internal unnamed_addr constant i1 false, align 1
@builtin.output_mode = internal unnamed_addr constant i2 -2, align 1
@__anon_1045 = internal unnamed_addr global [17 x i8] c"integer overflow\00", align 1
@0 = private unnamed_addr constant { i64, i8, [7 x i8] } { i64 undef, i8 0, [7 x i8] undef }, align 8
@builtin.panic_messages.integer_overflow = internal unnamed_addr constant ptr @__anon_1045, align 8
; Function Attrs: nounwind uwtable
define dso_local i32 @entry() #0 {
%1 = call fastcc i32 @bar.add(i32 1234, i32 5678)
ret i32 %1
}
; Function Attrs: nounwind uwtable
define internal fastcc i32 @bar.add(i32 %0, i32 %1) unnamed_addr #1 {
%3 = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %0, i32 %1)
%4 = extractvalue { i32, i1 } %3, 1
br i1 %4, label %5, label %6
5: ; preds = %2
call fastcc void @foo.panic(ptr @__anon_1045, i64 16, ptr null, ptr @0)
unreachable
6: ; preds = %2
%7 = extractvalue { i32, i1 } %3, 0
ret i32 %7
}
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %0, i32 %1) #2
; Function Attrs: noreturn nounwind uwtable
define internal fastcc void @foo.panic(ptr nonnull readonly align 1 %0, i64 %1, ptr align 8 %2, ptr nonnull readonly align 8 %3) unnamed_addr #3 {
%5 = insertvalue { ptr, i64 } poison, ptr %0, 0
%6 = insertvalue { ptr, i64 } %5, i64 %1, 1
call void @llvm.trap()
unreachable
}
; Function Attrs: cold noreturn nounwind
declare void @llvm.trap() #4
attributes #0 = { nounwind uwtable "frame-pointer"="all" "probe-stack"="__zig_probe_stack" "target-cpu"="x86-64" "target-features"="-16bit-mode,-32bit-mode,-3dnow,-3dnowa,+64bit,-adx,-aes,-allow-light-256-bit,-amx-bf16,-amx-complex,-amx-fp16,-amx-int8,-amx-tile,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512fp16,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint16,-avxvnniint8,-bmi,-bmi2,-branchfusion,-cldemote,-clflushopt,-clwb,-clzero,+cmov,-cmpccxadd,-crc32,-cx16,+cx8,-enqcmd,-ermsb,-f16c,-false-deps-getmant,-false-deps-lzcnt-tzcnt,-false-deps-mulc,-false-deps-mullq,-false-deps-perm,-false-deps-popcnt,-false-deps-range,-fast-11bytenop,-fast-15bytenop,-fast-7bytenop,-fast-bextr,-fast-gather,-fast-hops,-fast-lzcnt,-fast-movbe,-fast-scalar-fsqrt,-fast-scalar-shift-masks,-fast-shld-rotate,-fast-variable-crosslane-shuffle,-fast-variable-perlane-shuffle,-fast-vector-fsqrt,-fast-vector-shift-masks,-faster-shift-than-shuffle,-fma,-fma4,-fsgsbase,-fsrm,+fxsr,-gfni,-harden-sls-ijmp,-harden-sls-ret,-hreset,-idivl-to-divb,+idivq-to-divl,-invpcid,-kl,-lea-sp,-lea-uses-ag,-lvi-cfi,-lvi-load-hardening,-lwp,-lzcnt,+macrofusion,+mmx,-movbe,-movdir64b,-movdiri,-mwaitx,-no-bypass-delay,-no-bypass-delay-blend,-no-bypass-delay-mov,-no-bypass-delay-shuffle,+nopl,-pad-short-functions,-pclmul,-pconfig,-pku,-popcnt,-prefer-128-bit,-prefer-256-bit,-prefer-mask-registers,-prefer-movmsk-over-vtest,-prefetchi,-prefetchwt1,-prfchw,-ptwrite,-raoint,-rdpid,-rdpru,-rdrnd,-rdseed,-retpoline,-retpoline-external-thunk,-retpoline-indirect-branches,-retpoline-indirect-calls,-rtm,-sahf,-sbb-dep-breaking,-serialize,-seses,-sgx,-sha,-sha512,-shstk,+slow-3ops-lea,+slow-incdec,-slow-lea,-slow-pmaddwd,-slow-pmulld,-slow-shld,-slow-two-mem-ops,-slow-unaligned-mem-16,-slow-unaligned-mem-32,-sm3,-sm4,-soft-float,+sse,+sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-sse-unaligned-mem,-ssse3,-tagged-globals,-tbm,-tsxldtrk,-tuning-fast-imm-vector-shift,-uintr,-use-glm-div-sqrt-costs,-use-slm-arith-costs,-vaes,-vpclmulqdq,+vzeroupper,-waitpkg,-wbnoinvd,-widekl,+x87,-xop,-xsave,-xsavec,-xsaveopt,-xsaves" }
attributes #1 = { nounwind uwtable "frame-pointer"="all" "probe-stack"="__zig_probe_stack" "target-cpu"="skylake" "target-features"="-16bit-mode,-32bit-mode,-3dnow,-3dnowa,+64bit,+adx,+aes,+allow-light-256-bit,-amx-bf16,-amx-complex,-amx-fp16,-amx-int8,-amx-tile,+avx,+avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512fp16,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint16,-avxvnniint8,+bmi,+bmi2,-branchfusion,-cldemote,+clflushopt,-clwb,-clzero,+cmov,-cmpccxadd,+crc32,+cx16,+cx8,-enqcmd,+ermsb,+f16c,-false-deps-getmant,-false-deps-lzcnt-tzcnt,-false-deps-mulc,-false-deps-mullq,-false-deps-perm,+false-deps-popcnt,-false-deps-range,-fast-11bytenop,+fast-15bytenop,-fast-7bytenop,-fast-bextr,+fast-gather,-fast-hops,-fast-lzcnt,-fast-movbe,+fast-scalar-fsqrt,-fast-scalar-shift-masks,+fast-shld-rotate,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle,+fast-vector-fsqrt,-fast-vector-shift-masks,-faster-shift-than-shuffle,+fma,-fma4,+fsgsbase,-fsrm,+fxsr,-gfni,-harden-sls-ijmp,-harden-sls-ret,-hreset,-idivl-to-divb,+idivq-to-divl,+invpcid,-kl,-lea-sp,-lea-uses-ag,-lvi-cfi,-lvi-load-hardening,-lwp,+lzcnt,+macrofusion,+mmx,+movbe,-movdir64b,-movdiri,-mwaitx,-no-bypass-delay,+no-bypass-delay-blend,+no-bypass-delay-mov,+no-bypass-delay-shuffle,+nopl,-pad-short-functions,+pclmul,-pconfig,-pku,+popcnt,-prefer-128-bit,-prefer-256-bit,-prefer-mask-registers,-prefer-movmsk-over-vtest,-prefetchi,-prefetchwt1,+prfchw,-ptwrite,-raoint,-rdpid,-rdpru,+rdrnd,+rdseed,-retpoline,-retpoline-external-thunk,-retpoline-indirect-branches,-retpoline-indirect-calls,-rtm,+sahf,-sbb-dep-breaking,-serialize,-seses,+sgx,-sha,-sha512,-shstk,+slow-3ops-lea,-slow-incdec,-slow-lea,-slow-pmaddwd,-slow-pmulld,-slow-shld,-slow-two-mem-ops,-slow-unaligned-mem-16,-slow-unaligned-mem-32,-sm3,-sm4,-soft-float,+sse,+sse2,+sse3,+sse4.1,+sse4.2,-sse4a,-sse-unaligned-mem,+ssse3,-tagged-globals,-tbm,-tsxldtrk,-tuning-fast-imm-vector-shift,-uintr,-use-glm-div-sqrt-costs,-use-slm-arith-costs,-vaes,-vpclmulqdq,+vzeroupper,-waitpkg,-wbnoinvd,-widekl,+x87,-xop,+xsave,+xsavec,+xsaveopt,+xsaves" }
attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
attributes #3 = { noreturn nounwind uwtable "frame-pointer"="all" "probe-stack"="__zig_probe_stack" "target-cpu"="x86-64" "target-features"="-16bit-mode,-32bit-mode,-3dnow,-3dnowa,+64bit,-adx,-aes,-allow-light-256-bit,-amx-bf16,-amx-complex,-amx-fp16,-amx-int8,-amx-tile,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512fp16,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint16,-avxvnniint8,-bmi,-bmi2,-branchfusion,-cldemote,-clflushopt,-clwb,-clzero,+cmov,-cmpccxadd,-crc32,-cx16,+cx8,-enqcmd,-ermsb,-f16c,-false-deps-getmant,-false-deps-lzcnt-tzcnt,-false-deps-mulc,-false-deps-mullq,-false-deps-perm,-false-deps-popcnt,-false-deps-range,-fast-11bytenop,-fast-15bytenop,-fast-7bytenop,-fast-bextr,-fast-gather,-fast-hops,-fast-lzcnt,-fast-movbe,-fast-scalar-fsqrt,-fast-scalar-shift-masks,-fast-shld-rotate,-fast-variable-crosslane-shuffle,-fast-variable-perlane-shuffle,-fast-vector-fsqrt,-fast-vector-shift-masks,-faster-shift-than-shuffle,-fma,-fma4,-fsgsbase,-fsrm,+fxsr,-gfni,-harden-sls-ijmp,-harden-sls-ret,-hreset,-idivl-to-divb,+idivq-to-divl,-invpcid,-kl,-lea-sp,-lea-uses-ag,-lvi-cfi,-lvi-load-hardening,-lwp,-lzcnt,+macrofusion,+mmx,-movbe,-movdir64b,-movdiri,-mwaitx,-no-bypass-delay,-no-bypass-delay-blend,-no-bypass-delay-mov,-no-bypass-delay-shuffle,+nopl,-pad-short-functions,-pclmul,-pconfig,-pku,-popcnt,-prefer-128-bit,-prefer-256-bit,-prefer-mask-registers,-prefer-movmsk-over-vtest,-prefetchi,-prefetchwt1,-prfchw,-ptwrite,-raoint,-rdpid,-rdpru,-rdrnd,-rdseed,-retpoline,-retpoline-external-thunk,-retpoline-indirect-branches,-retpoline-indirect-calls,-rtm,-sahf,-sbb-dep-breaking,-serialize,-seses,-sgx,-sha,-sha512,-shstk,+slow-3ops-lea,+slow-incdec,-slow-lea,-slow-pmaddwd,-slow-pmulld,-slow-shld,-slow-two-mem-ops,-slow-unaligned-mem-16,-slow-unaligned-mem-32,-sm3,-sm4,-soft-float,+sse,+sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-sse-unaligned-mem,-ssse3,-tagged-globals,-tbm,-tsxldtrk,-tuning-fast-imm-vector-shift,-uintr,-use-glm-div-sqrt-costs,-use-slm-arith-costs,-vaes,-vpclmulqdq,+vzeroupper,-waitpkg,-wbnoinvd,-widekl,+x87,-xop,-xsave,-xsavec,-xsaveopt,-xsaves" }
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment