Skip to content

Instantly share code, notes, and snippets.

#= 4.7 ms =# precompile(Tuple{typeof(JuliaLowering._register_kinds)})
#= 0.7 ms =# precompile(Tuple{typeof(JuliaLowering._match_srcref), Symbol})
#= 27.1 ms =# precompile(Tuple{typeof(JuliaLowering._expand_ast_tree), Symbol, Symbol, Expr})
#= 7.7 ms =# precompile(Tuple{typeof(JuliaLowering._match_kind), JuliaLowering.var"#_expand_ast_tree##0#_expand_ast_tree##1"{Symbol}, Symbol, Expr})
#= 4.4 ms =# precompile(Tuple{JuliaLowering.var"#_expand_ast_tree##4#_expand_ast_tree##5"{Symbol}, Expr})
#= 0.9 ms =# precompile(Tuple{JuliaLowering.var"#_expand_ast_tree##4#_expand_ast_tree##5"{Symbol}, Symbol})
#= 6.7 ms =# precompile(Tuple{typeof(JuliaLowering._match_kind), JuliaLowering.var"#_expand_ast_tree##2#_expand_ast_tree##3"{Symbol, Expr}, Symbol, Expr})
#= 0.8 ms =# precompile(Tuple{JuliaLowering.var"#_expand_ast_tree##2#_expand_ast_tree##3"{Symbol, Expr}, Expr, Symbol, Array{Any, 1}})
#= 1.0 ms =# precompile(Tuple{typeof(JuliaLowering._expand_ast_tree), Symbol, Symbol, Symbol})
#= 1.3
using NonlinearSolve, ModelingToolkit, OrdinaryDiffEqBDF, OrdinaryDiffEqNonlinearSolve
using OrdinaryDiffEqNonlinearSolve: NonlinearSolveAlg
using ModelingToolkit: t_nounits as t, D_nounits as D
using LinearAlgebra
@parameters k₁ k₂ k₃
@variables y₁(t) y₂(t) y₃(t)
eqs = [D(y₁) ~ -k₁ * y₁ + k₃ * y₂ * y₃,
D(y₂) ~ k₁ * y₁ - k₂ * y₂^2 - k₃ * y₂ * y₃,
diff --git a/src/llvm-remove-addrspaces.cpp b/src/llvm-remove-addrspaces.cpp
index e291e47b59..bd0fd14d70 100644
--- a/src/llvm-remove-addrspaces.cpp
+++ b/src/llvm-remove-addrspaces.cpp
@@ -270,7 +270,7 @@ bool removeAddrspaces(Module &M, AddrspaceRemapFunction ASRemapper)
Name,
(GlobalVariable *)nullptr,
GV->getThreadLocalMode(),
- GV->getType()->getAddressSpace());
+ cast<PointerType>(TypeRemapper.remapType(GV->getType()))->getAddressSpace());
@gbaraldi
gbaraldi / f.ll
Created January 16, 2024 20:52
Register Spills
; ModuleID = 'f'
source_filename = "f"
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
target triple = "aarch64-apple-darwin23.2.0"
@"+Core.Tuple#1439" = private unnamed_addr constant ptr @"+Core.Tuple#1439.jit", !julia.constgv !0
@"+Core.Tuple#1439.jit" = private alias ptr, inttoptr (i64 4529264784 to ptr)
; Function Signature: f(NTuple{50, Int64}, Int64)
This file has been truncated, but you can view the full file.
I[10:48:55.490] clangd version 17.0.3 (https://github.com/llvm/llvm-project 888437e1b60011b8a375dd30928ec925b448da57)
I[10:48:55.490] Features: mac+grpc+xpc
I[10:48:55.490] PID: 2605
I[10:48:55.490] Working directory: /Users/gabrielbaraldi/julia
I[10:48:55.490] argv[0]: /Users/gabrielbaraldi/Library/Application Support/Code - Insiders/User/globalStorage/llvm-vs-code-extensions.vscode-clangd/install/17.0.3/clangd_17.0.3/bin/clangd
I[10:48:55.490] argv[1]: --enable-config
I[10:48:55.491] Starting LSP over stdin/stdout
I[10:48:55.491] <-- initialize(0)
I[10:48:55.499] --> reply:initialize(0) 8 ms
I[10:48:55.500] <-- initialized
@gbaraldi
gbaraldi / llvm
Last active January 3, 2024 20:47
memcmp generated by llvm vs ours
Iterations: 100
Instructions: 1400
Total Cycles: 1403
Total uOps: 1800
Dispatch Width: 6
uOps Per Cycle: 1.28
IPC: 1.00
Block RThroughput: 7.0
@gbaraldi
gbaraldi / gist:8504833053d6c77865379dedc6824130
Last active January 1, 2024 08:00
M3 max 36gb output!
[2, 3, 4, 8, 9, 10, 16, 27, 32, 64, 81, 100, 128, 243, 256, 512, 729, 1000, 1024, 2048, 2187, 4096, 6561, 8192, 10000, 16384, 19683, 32768]
[6.406406406406406e-06, 0.0009810690018531303, 0.02457757296466974, 0.00036613377040552175, 0.05841814247936533, 0.15335071308081583, 0.31059715639810426, 2.2765440666204024, 3.7718561151079135, 1.1752042588960494, 5.866863170444948, 2.9560261549194187, 1.8095471067249096, 5.087776863956844, 11.97214693789007, 12.792844578413735, 47.16240600277262, 45.73406059941392, 65.07148199906084, 80.04057586790874, 205.91364619739895, 174.1874089125134, 354.94201641466566, 599.6347164155466, 748.8818960202927, 619.0967398250291, 774.0532000340861, 632.1859241014777]
2023,Macbook Pro 14'inch, Apple M3 Max, 36 GBs
@gbaraldi
gbaraldi / new.s
Last active December 27, 2023 20:46
Benchmarks
.section __TEXT,__text,regular,pure_instructions
.build_version macos, 14, 0
.globl "_julia_perf_manual_example!_46294" ; -- Begin function julia_perf_manual_example!_46294
.p2align 2
"_julia_perf_manual_example!_46294": ; @"julia_perf_manual_example!_46294"
; Function Signature: perf_manual_example!(Base.SubArray{Float32, 3, Array{Float32, 3}, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}, Base.OneTo{Int64}}, false}, Base.SubArray{Float32, 3, Array{Float32, 3}, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}, Base.OneTo{Int64}}, false}, Base.SubArray{Float32, 3, Array{Float32, 3}, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}, Base.OneTo{Int64}}, false}, Base.IteratorsMD.CartesianIndices{3, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}, Base.OneTo{Int64}}})
.cfi_startproc
; %bb.0: ; %top
;DEBUG_VALUE: perf_manual_example!:X <- [DW_OP_deref] [$x0+0]
;DEBUG_VALUE: perf_manual_example!:X <- [DW_OP_deref] [$x0+0]
; Function Signature: perf_sumeach_view(Base.BitArray{2})
define { ptr, i8 } @julia_perf_sumeach_view_49258(ptr noalias nocapture noundef nonnull align 8 dereferenceable(8) %union_bytes_return, ptr noundef nonnull align 8 dereferenceable(32) %"A::BitArray") #0 {
top:
%jlcallframe1 = alloca [4 x ptr], align 8
%gcframe2 = alloca [4 x ptr], align 16
call void @llvm.memset.p0.i64(ptr align 16 %gcframe2, i8 0, i64 32, i1 true)
%thread_ptr = call ptr asm "movq %fs:0, $0", "=r"() #12
%tls_ppgcstack = getelementptr i8, ptr %thread_ptr, i64 -8
%tls_pgcstack = load ptr, ptr %tls_ppgcstack, align 8
store i64 8, ptr %gcframe2, align 16
# This file is machine-generated - editing it directly is not advised
julia_version = "1.10.0-rc1"
manifest_format = "2.0"
project_hash = "acb9f21a4f584e84960b90ee7ee41028ca84e530"
[[deps.Adapt]]
deps = ["LinearAlgebra", "Requires"]
git-tree-sha1 = "02f731463748db57cc2ebfbd9fbc9ce8280d3433"
uuid = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"