Skip to content

Instantly share code, notes, and snippets.

@ranocha
Last active November 25, 2021 18:19
Show Gist options
  • Save ranocha/f0daf3ea1a7184389119dcde96723741 to your computer and use it in GitHub Desktop.
Save ranocha/f0daf3ea1a7184389119dcde96723741 to your computer and use it in GitHub Desktop.
Construct roofline model for Trixi.jl volume terms
## activate project environment
using Pkg
Pkg.activate(@__DIR__)
Pkg.instantiate()
## load packages
using LIKWID
using PyCall
import PyPlot; plt = PyPlot
cycler = pyimport("cycler").cycler
line_cycler = (cycler(color=["#E69F00", "#56B4E9", "#009E73", "#0072B2", "#D55E00", "#CC79A7", "#F0E442"]) +
cycler(linestyle=["-", "--", "-.", ":", "-", "--", "-."]))
marker_cycler = (cycler(color=["#E69F00", "#56B4E9", "#009E73", "#0072B2", "#D55E00", "#CC79A7", "#F0E442"]) +
cycler(linestyle=["none", "none", "none", "none", "none", "none", "none"]) +
cycler(marker=["4", "2", "3", "1", "+", "x", "."]))
plt.rc("axes", prop_cycle=line_cycler)
# plt.rc("text", usetex=true)
# plt.rc("text.latex", preamble="\\usepackage{newpxtext}\\usepackage{newpxmath}\\usepackage{commath}\\usepackage{mathtools}")
plt.rc("font", family="serif", size=18.)
plt.rc("savefig", dpi=100)
plt.rc("legend", loc="best", fontsize="medium", fancybox=true, framealpha=0.5)
plt.rc("lines", linewidth=2.5, markersize=10, markeredgewidth=2.5)
## gather data for the empirical roofline model
# measure optimistic peakflops (AVX2 FMA or AVX512 FMA if available)
L1_cache_size = LIKWID.get_cpu_topology().cacheLevels[1].size ÷ 1024 # in kB
cpuinfo = LIKWID.get_cpu_info()
if occursin("AVX512", cpuinfo.features)
likwid_bench_kernel = "peakflops_avx512_fma"
elseif occursin("AVX2", cpuinfo.features)
likwid_bench_kernel = "peakflops_avx_fma"
else
likwid_bench_kernel = "peakflops_sse_fma"
end
max_flops_string = read(`likwid-bench -t $likwid_bench_kernel -W N:$(L1_cache_size)kB:1`, String)
max_flops = parse(Float64, match(r"(MFlops/s:\s+)(\d+\.\d+)", max_flops_string).captures[2]) / 1024
# measure optimistic memory bandwidth using reads
if occursin("AVX512", cpuinfo.features)
likwid_bench_kernel = "load_avx512"
elseif occursin("AVX2", cpuinfo.features)
likwid_bench_kernel = "load_avx"
else
likwid_bench_kernel = "load_sse"
end
max_bandwidth_string = read(`likwid-bench -t $likwid_bench_kernel -W N:2GB:1`, String)
max_bandwidth = parse(Float64, match(r"(MByte/s:\s+)(\d+\.\d+)", max_bandwidth_string).captures[2])
## gather data for volume terms implemented in Trixi.jl
measured_string = read(`likwid-perfctr -C 0 -g MEM_DP -m $(Base.julia_cmd()) --check-bounds=no --threads=1 $(joinpath(@__DIR__, "measure_volume_terms.jl"))`, String)
numerical_flux = "flux_shima_etal"
offset = findfirst("Region TreeMesh-$numerical_flux", measured_string) |> last
m = match(r"(DP \[MFLOP/s\]\s+\|\s+)(\d+\.\d+)", measured_string, offset)
flops_TreeMesh = parse(Float64, m.captures[2]) / 1024
m = match(r"(Operational intensity\s+\|\s+)(\d+\.\d+)", measured_string, offset)
intensity_TreeMesh = parse(Float64, m.captures[2])
@info "TreeMesh" intensity_TreeMesh flops_TreeMesh
offset = findfirst("Region StructuredMesh-$numerical_flux", measured_string) |> last
m = match(r"(DP \[MFLOP/s\]\s+\|\s+)(\d+\.\d+)", measured_string, offset)
flops_StructuredMesh = parse(Float64, m.captures[2]) / 1024
m = match(r"(Operational intensity\s+\|\s+)(\d+\.\d+)", measured_string, offset)
intensity_StructuredMesh = parse(Float64, m.captures[2])
@info "StructuredMesh" intensity_StructuredMesh flops_StructuredMesh
offset = findfirst("Region P4estMesh-$numerical_flux", measured_string) |> last
m = match(r"(DP \[MFLOP/s\]\s+\|\s+)(\d+\.\d+)", measured_string, offset)
flops_P4estMesh = parse(Float64, m.captures[2]) / 1024
m = match(r"(Operational intensity\s+\|\s+)(\d+\.\d+)", measured_string, offset)
intensity_P4estMesh = parse(Float64, m.captures[2])
@info "P4estMesh" intensity_P4estMesh flops_P4estMesh
## plot results
roof(x) = max_flops > (x * max_bandwidth) ? (x * max_bandwidth) : max_flops
max_intensity = round(Int, 1.1 *
max(intensity_TreeMesh, intensity_StructuredMesh, intensity_P4estMesh))
x = range(0, max_intensity, length=1000)
plt.plot(x, roof.(x), label="Empirical roofline")
plt.xlabel("Operational intensity (Flops/Byte)")
plt.ylabel("Attainable GFlops/sec")
plt.scatter(intensity_TreeMesh, flops_TreeMesh, label="TreeMesh")
plt.scatter(intensity_StructuredMesh, flops_StructuredMesh, label="StructuredMesh")
plt.scatter(intensity_P4estMesh, flops_P4estMesh, label="P4estMesh")
plt.legend()
processor_name = replace(cpuinfo.osname, " " => "_", "." => "_", "@" => "",
"(R)" => "", "(TM)" => "")
plt.savefig(joinpath(@__DIR__, "roofline_model_$(processor_name).pdf"), bbox_inches="tight")
# This file is machine-generated - editing it directly is not advised
julia_version = "1.7.0-rc3"
manifest_format = "2.0"
[[deps.AbstractFFTs]]
deps = ["LinearAlgebra"]
git-tree-sha1 = "485ee0867925449198280d4af84bdb46a2a404d0"
uuid = "621f4979-c628-5d54-868e-fcf4e3e8185c"
version = "1.0.1"
[[deps.Adapt]]
deps = ["LinearAlgebra"]
git-tree-sha1 = "84918055d15b3114ede17ac6a7182f68870c16f7"
uuid = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
version = "3.3.1"
[[deps.ArgCheck]]
git-tree-sha1 = "dedbbb2ddb876f899585c4ec4433265e3017215a"
uuid = "dce04be8-c92d-5529-be00-80e4d2c0e197"
version = "2.1.0"
[[deps.ArgTools]]
uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f"
[[deps.ArrayInterface]]
deps = ["Compat", "IfElse", "LinearAlgebra", "Requires", "SparseArrays", "Static"]
git-tree-sha1 = "e527b258413e0c6d4f66ade574744c94edef81f8"
uuid = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9"
version = "3.1.40"
[[deps.ArrayLayouts]]
deps = ["FillArrays", "LinearAlgebra", "SparseArrays"]
git-tree-sha1 = "e1ba79094cae97b688fb42d31cbbfd63a69706e4"
uuid = "4c555306-a7a7-4459-81d9-ec55ddd5c99a"
version = "0.7.8"
[[deps.Artifacts]]
uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
[[deps.Base64]]
uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
[[deps.BinaryProvider]]
deps = ["Libdl", "Logging", "SHA"]
git-tree-sha1 = "ecdec412a9abc8db54c0efc5548c64dfce072058"
uuid = "b99e7846-7c00-51b0-8f62-c81ae34c0232"
version = "0.5.10"
[[deps.BitTwiddlingConvenienceFunctions]]
deps = ["Static"]
git-tree-sha1 = "bc1317f71de8dce26ea67fcdf7eccc0d0693b75b"
uuid = "62783981-4cbd-42fc-bca8-16325de8dc4b"
version = "0.1.1"
[[deps.Blosc]]
deps = ["Blosc_jll"]
git-tree-sha1 = "217da19d6f3a94753e580a8bc241c7cbefd9281f"
uuid = "a74b3585-a348-5f62-a45c-50e91977d574"
version = "0.7.1"
[[deps.Blosc_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Lz4_jll", "Pkg", "Zlib_jll", "Zstd_jll"]
git-tree-sha1 = "e747dac84f39c62aff6956651ec359686490134e"
uuid = "0b7ba130-8d10-5ba8-a3d6-c5182647fed9"
version = "1.21.0+0"
[[deps.BufferedStreams]]
deps = ["Compat", "Test"]
git-tree-sha1 = "5d55b9486590fdda5905c275bb21ce1f0754020f"
uuid = "e1450e63-4bb3-523b-b2a4-4ffa8c0fd77d"
version = "1.0.0"
[[deps.CBinding]]
deps = ["Libdl", "Random", "Test"]
git-tree-sha1 = "358e34d0ea8823812d17525fc581fcc332cf4874"
uuid = "d43a6710-96b8-4a2d-833c-c424785e5374"
version = "0.9.4"
[[deps.CEnum]]
git-tree-sha1 = "215a9aa4a1f23fbd05b92769fdd62559488d70e9"
uuid = "fa961155-64e5-5f13-b03f-caf6b980ea82"
version = "0.4.1"
[[deps.CPUSummary]]
deps = ["Hwloc", "IfElse", "Static"]
git-tree-sha1 = "87b0c9c6ee0124d6c1f4ce8cb035dcaf9f90b803"
uuid = "2a0fbf3d-bb9c-48f3-b0a9-814d99fd7ab9"
version = "0.1.6"
[[deps.ChainRulesCore]]
deps = ["Compat", "LinearAlgebra", "SparseArrays"]
git-tree-sha1 = "f885e7e7c124f8c92650d61b9477b9ac2ee607dd"
uuid = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
version = "1.11.1"
[[deps.ChangesOfVariables]]
deps = ["LinearAlgebra", "Test"]
git-tree-sha1 = "9a1d594397670492219635b35a3d830b04730d62"
uuid = "9e997f8a-9a97-42d5-a9f1-ce6bfc15e2c0"
version = "0.1.1"
[[deps.CloseOpenIntervals]]
deps = ["ArrayInterface", "Static"]
git-tree-sha1 = "7b8f09d58294dc8aa13d91a8544b37c8a1dcbc06"
uuid = "fb6a15b2-703c-40df-9091-08a04967cfa9"
version = "0.1.4"
[[deps.CodeTracking]]
deps = ["InteractiveUtils", "UUIDs"]
git-tree-sha1 = "9aa8a5ebb6b5bf469a7e0e2b5202cf6f8c291104"
uuid = "da1fd8a2-8d9e-5ec2-8556-3022fb5608a2"
version = "1.0.6"
[[deps.CodecZlib]]
deps = ["TranscodingStreams", "Zlib_jll"]
git-tree-sha1 = "ded953804d019afa9a3f98981d99b33e3db7b6da"
uuid = "944b1d66-785c-5afd-91f1-9de20f533193"
version = "0.7.0"
[[deps.ColorSchemes]]
deps = ["ColorTypes", "Colors", "FixedPointNumbers", "Random"]
git-tree-sha1 = "a851fec56cb73cfdf43762999ec72eff5b86882a"
uuid = "35d6a980-a343-548e-a6ea-1d62b119f2f4"
version = "3.15.0"
[[deps.ColorTypes]]
deps = ["FixedPointNumbers", "Random"]
git-tree-sha1 = "024fe24d83e4a5bf5fc80501a314ce0d1aa35597"
uuid = "3da002f7-5984-5a60-b8a6-cbb66c0b333f"
version = "0.11.0"
[[deps.Colors]]
deps = ["ColorTypes", "FixedPointNumbers", "Reexport"]
git-tree-sha1 = "417b0ed7b8b838aa6ca0a87aadf1bb9eb111ce40"
uuid = "5ae59095-9a9b-59fe-a467-6f913c188581"
version = "0.12.8"
[[deps.CommonSolve]]
git-tree-sha1 = "68a0743f578349ada8bc911a5cbd5a2ef6ed6d1f"
uuid = "38540f10-b2f7-11e9-35d8-d573e4eb0ff2"
version = "0.2.0"
[[deps.CommonSubexpressions]]
deps = ["MacroTools", "Test"]
git-tree-sha1 = "7b8a93dba8af7e3b42fecabf646260105ac373f7"
uuid = "bbf7d656-a473-5ed7-a52c-81e309532950"
version = "0.3.0"
[[deps.Compat]]
deps = ["Base64", "Dates", "DelimitedFiles", "Distributed", "InteractiveUtils", "LibGit2", "Libdl", "LinearAlgebra", "Markdown", "Mmap", "Pkg", "Printf", "REPL", "Random", "SHA", "Serialization", "SharedArrays", "Sockets", "SparseArrays", "Statistics", "Test", "UUIDs", "Unicode"]
git-tree-sha1 = "dce3e3fea680869eaa0b774b2e8343e9ff442313"
uuid = "34da2185-b29b-5c13-b0c7-acf172513d20"
version = "3.40.0"
[[deps.CompilerSupportLibraries_jll]]
deps = ["Artifacts", "Libdl"]
uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae"
[[deps.Conda]]
deps = ["JSON", "VersionParsing"]
git-tree-sha1 = "299304989a5e6473d985212c28928899c74e9421"
uuid = "8f4d0f93-b110-5947-807f-2305c1781a2d"
version = "1.5.2"
[[deps.ConstructionBase]]
deps = ["LinearAlgebra"]
git-tree-sha1 = "f74e9d5388b8620b4cee35d4c5a618dd4dc547f4"
uuid = "187b0558-2788-49d3-abe0-74a17ed4e7c9"
version = "1.3.0"
[[deps.CovarianceEstimation]]
deps = ["LinearAlgebra", "Statistics", "StatsBase"]
git-tree-sha1 = "bc3930158d2be029e90b7c40d1371c4f54fa04db"
uuid = "587fd27a-f159-11e8-2dae-1979310e6154"
version = "0.2.6"
[[deps.DEDataArrays]]
deps = ["ArrayInterface", "DocStringExtensions", "LinearAlgebra", "RecursiveArrayTools", "SciMLBase", "StaticArrays"]
git-tree-sha1 = "31186e61936fbbccb41d809ad4338c9f7addf7ae"
uuid = "754358af-613d-5f8d-9788-280bf1605d4c"
version = "0.2.0"
[[deps.DSP]]
deps = ["Compat", "FFTW", "IterTools", "LinearAlgebra", "Polynomials", "Random", "Reexport", "SpecialFunctions", "Statistics"]
git-tree-sha1 = "1edc3eb6cd0ec2b5193ac6d37c1b1310044550fe"
uuid = "717857b8-e6f2-59f4-9121-6e50c889abd2"
version = "0.7.3"
[[deps.DataAPI]]
git-tree-sha1 = "cc70b17275652eb47bc9e5f81635981f13cea5c8"
uuid = "9a962f9c-6df0-11e9-0e5d-c546b8b5ee8a"
version = "1.9.0"
[[deps.DataStructures]]
deps = ["Compat", "InteractiveUtils", "OrderedCollections"]
git-tree-sha1 = "7d9d316f04214f7efdbb6398d545446e246eff02"
uuid = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
version = "0.18.10"
[[deps.DataValueInterfaces]]
git-tree-sha1 = "bfc1187b79289637fa0ef6d4436ebdfe6905cbd6"
uuid = "e2d170a0-9d28-54be-80f0-106bbe20a464"
version = "1.0.0"
[[deps.Dates]]
deps = ["Printf"]
uuid = "ade2ca70-3891-5945-98fb-dc099432e06a"
[[deps.DelimitedFiles]]
deps = ["Mmap"]
uuid = "8bb1440f-4735-579b-a4ab-409b98df4dab"
[[deps.DensityInterface]]
deps = ["InverseFunctions", "Test"]
git-tree-sha1 = "80c3e8639e3353e5d2912fb3a1916b8455e2494b"
uuid = "b429d917-457f-4dbc-8f4c-0cc954292b1d"
version = "0.4.0"
[[deps.DiffEqBase]]
deps = ["ArrayInterface", "ChainRulesCore", "DEDataArrays", "DataStructures", "Distributions", "DocStringExtensions", "FastBroadcast", "ForwardDiff", "FunctionWrappers", "IterativeSolvers", "LabelledArrays", "LinearAlgebra", "Logging", "MuladdMacro", "NonlinearSolve", "Parameters", "PreallocationTools", "Printf", "RecursiveArrayTools", "RecursiveFactorization", "Reexport", "Requires", "SciMLBase", "Setfield", "SparseArrays", "StaticArrays", "Statistics", "SuiteSparse", "ZygoteRules"]
git-tree-sha1 = "5c3d877ddfc2da61ce5cc1f5ce330ff97789c57c"
uuid = "2b5f629d-d688-5b77-993f-72d75c75574e"
version = "6.76.0"
[[deps.DiffResults]]
deps = ["StaticArrays"]
git-tree-sha1 = "c18e98cba888c6c25d1c3b048e4b3380ca956805"
uuid = "163ba53b-c6d8-5494-b064-1a9d43ac40c5"
version = "1.0.3"
[[deps.DiffRules]]
deps = ["LogExpFunctions", "NaNMath", "Random", "SpecialFunctions"]
git-tree-sha1 = "3287dacf67c3652d3fed09f4c12c187ae4dbb89a"
uuid = "b552c78f-8df3-52c6-915a-8e097449b14b"
version = "1.4.0"
[[deps.Distributed]]
deps = ["Random", "Serialization", "Sockets"]
uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b"
[[deps.Distributions]]
deps = ["ChainRulesCore", "DensityInterface", "FillArrays", "LinearAlgebra", "PDMats", "Printf", "QuadGK", "Random", "SparseArrays", "SpecialFunctions", "Statistics", "StatsBase", "StatsFuns", "Test"]
git-tree-sha1 = "dc6f530de935bb3c3cd73e99db5b4698e58b2fcf"
uuid = "31c24e10-a181-5473-b8eb-7969acd0382f"
version = "0.25.31"
[[deps.DocStringExtensions]]
deps = ["LibGit2"]
git-tree-sha1 = "b19534d1895d702889b219c382a6e18010797f0b"
uuid = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
version = "0.8.6"
[[deps.Downloads]]
deps = ["ArgTools", "LibCURL", "NetworkOptions"]
uuid = "f43a241f-c20a-4ad4-852c-f6b1247861c6"
[[deps.EarCut_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "3f3a2501fa7236e9b911e0f7a588c657e822bb6d"
uuid = "5ae413db-bbd1-5e63-b57d-d24a61df00f5"
version = "2.2.3+0"
[[deps.EllipsisNotation]]
deps = ["ArrayInterface"]
git-tree-sha1 = "9aad812fb7c4c038da7cab5a069f502e6e3ae030"
uuid = "da5c29d0-fa7d-589e-88eb-ea29b0a81949"
version = "1.1.1"
[[deps.ExprTools]]
git-tree-sha1 = "b7e3d17636b348f005f11040025ae8c6f645fe92"
uuid = "e2ba6199-217a-4e67-a87a-7c52f15ade04"
version = "0.1.6"
[[deps.FFTW]]
deps = ["AbstractFFTs", "FFTW_jll", "LinearAlgebra", "MKL_jll", "Preferences", "Reexport"]
git-tree-sha1 = "463cb335fa22c4ebacfd1faba5fde14edb80d96c"
uuid = "7a1cc6ca-52ef-59f5-83cd-3a7055c09341"
version = "1.4.5"
[[deps.FFTW_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "c6033cc3892d0ef5bb9cd29b7f2f0331ea5184ea"
uuid = "f5851436-0d7a-5f13-b9de-f02708fd171a"
version = "3.3.10+0"
[[deps.FastBroadcast]]
deps = ["LinearAlgebra", "Polyester", "Static"]
git-tree-sha1 = "e32a81c505ab234c992ca978f31ed8b0dabbc327"
uuid = "7034ab61-46d4-4ed7-9d0f-46aef9175898"
version = "0.1.11"
[[deps.FastGaussQuadrature]]
deps = ["LinearAlgebra", "SpecialFunctions", "StaticArrays"]
git-tree-sha1 = "5829b25887e53fb6730a9df2ff89ed24baa6abf6"
uuid = "442a2c76-b920-505d-bb47-c5924d526838"
version = "0.4.7"
[[deps.FastTransforms]]
deps = ["AbstractFFTs", "ArrayLayouts", "BinaryProvider", "DSP", "FFTW", "FastGaussQuadrature", "FastTransforms_jll", "FillArrays", "Libdl", "LinearAlgebra", "Reexport", "SpecialFunctions", "Test", "ToeplitzMatrices"]
git-tree-sha1 = "1f1738ba125f37fb9c8518d1e3eea8b80041742f"
uuid = "057dd010-8810-581a-b7be-e3fc3b93f78c"
version = "0.13.0"
[[deps.FastTransforms_jll]]
deps = ["Artifacts", "CompilerSupportLibraries_jll", "FFTW_jll", "JLLWrappers", "Libdl", "MPFR_jll", "OpenBLAS_jll", "Pkg"]
git-tree-sha1 = "176f3f679f8921b3dc2ba127da2f9caf3f6a26eb"
uuid = "34b6f7d7-08f9-5794-9e10-3819e4c7e49a"
version = "0.5.1+0"
[[deps.FillArrays]]
deps = ["LinearAlgebra", "Random", "SparseArrays", "Statistics"]
git-tree-sha1 = "8756f9935b7ccc9064c6eef0bff0ad643df733a3"
uuid = "1a297f60-69ca-5386-bcde-b61e274b549b"
version = "0.12.7"
[[deps.FiniteDiff]]
deps = ["ArrayInterface", "LinearAlgebra", "Requires", "SparseArrays", "StaticArrays"]
git-tree-sha1 = "8b3c09b56acaf3c0e581c66638b85c8650ee9dca"
uuid = "6a86dc24-6348-571c-b903-95158fe2bd41"
version = "2.8.1"
[[deps.FixedPointNumbers]]
deps = ["Statistics"]
git-tree-sha1 = "335bfdceacc84c5cdf16aadc768aa5ddfc5383cc"
uuid = "53c48c17-4a7d-5ca2-90c5-79b7896eea93"
version = "0.8.4"
[[deps.ForwardDiff]]
deps = ["CommonSubexpressions", "DiffResults", "DiffRules", "LinearAlgebra", "LogExpFunctions", "NaNMath", "Preferences", "Printf", "Random", "SpecialFunctions", "StaticArrays"]
git-tree-sha1 = "6406b5112809c08b1baa5703ad274e1dded0652f"
uuid = "f6369f11-7733-5829-9624-2563aa707210"
version = "0.10.23"
[[deps.FunctionWrappers]]
git-tree-sha1 = "241552bc2209f0fa068b6415b1942cc0aa486bcc"
uuid = "069b7b12-0de2-55c6-9aab-29f3d0a68a2e"
version = "1.1.2"
[[deps.Future]]
deps = ["Random"]
uuid = "9fa8497b-333b-5362-9e8d-4d0656e87820"
[[deps.GMP_jll]]
deps = ["Artifacts", "Libdl"]
uuid = "781609d7-10c4-51f6-84f2-b8444358ff6d"
[[deps.GeometryBasics]]
deps = ["EarCut_jll", "IterTools", "LinearAlgebra", "StaticArrays", "StructArrays", "Tables"]
git-tree-sha1 = "58bcdf5ebc057b085e58d95c138725628dd7453c"
uuid = "5c1252a2-5f33-56bf-86c9-59e7332b4326"
version = "0.4.1"
[[deps.HDF5]]
deps = ["Blosc", "Compat", "HDF5_jll", "Libdl", "Mmap", "Random", "Requires"]
git-tree-sha1 = "698c099c6613d7b7f151832868728f426abe698b"
uuid = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f"
version = "0.15.7"
[[deps.HDF5_jll]]
deps = ["Artifacts", "JLLWrappers", "LibCURL_jll", "Libdl", "OpenSSL_jll", "Pkg", "Zlib_jll"]
git-tree-sha1 = "fd83fa0bde42e01952757f01149dd968c06c4dba"
uuid = "0234f1f7-429e-5d53-9886-15a909be8d59"
version = "1.12.0+1"
[[deps.HostCPUFeatures]]
deps = ["BitTwiddlingConvenienceFunctions", "IfElse", "Libdl", "Static"]
git-tree-sha1 = "8f0dc80088981ab55702b04bba38097a44a1a3a9"
uuid = "3e5b6fbb-0976-4d2c-9146-d79de83f2fb0"
version = "0.1.5"
[[deps.Hwloc]]
deps = ["Hwloc_jll"]
git-tree-sha1 = "92d99146066c5c6888d5a3abc871e6a214388b91"
uuid = "0e44f5e4-bd66-52a0-8798-143a42290a1d"
version = "2.0.0"
[[deps.Hwloc_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "3395d4d4aeb3c9d31f5929d32760d8baeee88aaf"
uuid = "e33a78d0-f292-5ffc-b300-72abe9b543c8"
version = "2.5.0+0"
[[deps.IfElse]]
git-tree-sha1 = "debdd00ffef04665ccbb3e150747a77560e8fad1"
uuid = "615f187c-cbe4-4ef1-ba3b-2fcf58d6d173"
version = "0.1.1"
[[deps.InlineStrings]]
deps = ["Parsers"]
git-tree-sha1 = "19cb49649f8c41de7fea32d089d37de917b553da"
uuid = "842dd82b-1e85-43dc-bf29-5d0ee9dffc48"
version = "1.0.1"
[[deps.IntelOpenMP_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "d979e54b71da82f3a65b62553da4fc3d18c9004c"
uuid = "1d5cc7b8-4909-519e-a0f8-d0f5ad9712d0"
version = "2018.0.3+2"
[[deps.InteractiveUtils]]
deps = ["Markdown"]
uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
[[deps.Intervals]]
deps = ["Dates", "Printf", "RecipesBase", "Serialization", "TimeZones"]
git-tree-sha1 = "323a38ed1952d30586d0fe03412cde9399d3618b"
uuid = "d8418881-c3e1-53bb-8760-2df7ec849ed5"
version = "1.5.0"
[[deps.InverseFunctions]]
deps = ["Test"]
git-tree-sha1 = "a7254c0acd8e62f1ac75ad24d5db43f5f19f3c65"
uuid = "3587e190-3f89-42d0-90ee-14403ec27112"
version = "0.1.2"
[[deps.IrrationalConstants]]
git-tree-sha1 = "7fd44fd4ff43fc60815f8e764c0f352b83c49151"
uuid = "92d709cd-6900-40b7-9082-c6be49f344b6"
version = "0.1.1"
[[deps.IterTools]]
git-tree-sha1 = "05110a2ab1fc5f932622ffea2a003221f4782c18"
uuid = "c8e1da08-722c-5040-9ed9-7db0dc04731e"
version = "1.3.0"
[[deps.IterativeSolvers]]
deps = ["LinearAlgebra", "Printf", "Random", "RecipesBase", "SparseArrays"]
git-tree-sha1 = "1169632f425f79429f245113b775a0e3d121457c"
uuid = "42fd0dbc-a981-5370-80f2-aaf504508153"
version = "0.9.2"
[[deps.IteratorInterfaceExtensions]]
git-tree-sha1 = "a3f24677c21f5bbe9d2a714f95dcd58337fb2856"
uuid = "82899510-4779-5014-852e-03e436cf321d"
version = "1.0.0"
[[deps.JLLWrappers]]
deps = ["Preferences"]
git-tree-sha1 = "642a199af8b68253517b80bd3bfd17eb4e84df6e"
uuid = "692b3bcd-3c85-4b1f-b108-f13ce0eb3210"
version = "1.3.0"
[[deps.JSON]]
deps = ["Dates", "Mmap", "Parsers", "Unicode"]
git-tree-sha1 = "8076680b162ada2a031f707ac7b4953e30667a37"
uuid = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
version = "0.21.2"
[[deps.Kronecker]]
deps = ["LinearAlgebra", "NamedDims", "SparseArrays", "StatsBase"]
git-tree-sha1 = "a51f46415c844dee694cb8b20a3fcbe6dba342c2"
uuid = "2c470bb0-bcc8-11e8-3dad-c9649493f05e"
version = "0.5.1"
[[deps.LIKWID]]
deps = ["CEnum", "Libdl", "OrderedCollections", "Unitful"]
git-tree-sha1 = "7aacfb4746520ceafcd393cb76e0e4af2011f20d"
uuid = "bf22376a-e803-4184-b2ed-56326e3bff83"
version = "0.3.0"
[[deps.LaTeXStrings]]
git-tree-sha1 = "f2355693d6778a178ade15952b7ac47a4ff97996"
uuid = "b964fa9f-0449-5b57-a5c2-d3ea65f4040f"
version = "1.3.0"
[[deps.LabelledArrays]]
deps = ["ArrayInterface", "ChainRulesCore", "LinearAlgebra", "MacroTools", "StaticArrays"]
git-tree-sha1 = "3609bbf5feba7b22fb35fe7cb207c8c8d2e2fc5b"
uuid = "2ee39098-c373-598a-b85f-a56591580800"
version = "1.6.7"
[[deps.LayoutPointers]]
deps = ["ArrayInterface", "LinearAlgebra", "ManualMemory", "SIMDTypes", "Static"]
git-tree-sha1 = "83b56449c39342a47f3fcdb3bc782bd6d66e1d97"
uuid = "10f19ff3-798f-405d-979b-55457f8fc047"
version = "0.1.4"
[[deps.LazyArtifacts]]
deps = ["Artifacts", "Pkg"]
uuid = "4af54fe1-eca0-43a8-85a7-787d91b784e3"
[[deps.LibCURL]]
deps = ["LibCURL_jll", "MozillaCACerts_jll"]
uuid = "b27032c2-a3e7-50c8-80cd-2d36dbcbfd21"
[[deps.LibCURL_jll]]
deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll", "Zlib_jll", "nghttp2_jll"]
uuid = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0"
[[deps.LibGit2]]
deps = ["Base64", "NetworkOptions", "Printf", "SHA"]
uuid = "76f85450-5226-5b5a-8eaa-529ad045b433"
[[deps.LibSSH2_jll]]
deps = ["Artifacts", "Libdl", "MbedTLS_jll"]
uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8"
[[deps.Libdl]]
uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
[[deps.LinearAlgebra]]
deps = ["Libdl", "libblastrampoline_jll"]
uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
[[deps.LinearMaps]]
deps = ["LinearAlgebra", "SparseArrays"]
git-tree-sha1 = "dbb14c604fc47aa4f2e19d0ebb7b6416f3cfa5f5"
uuid = "7a12625a-238d-50fd-b39a-03d52299707e"
version = "3.5.1"
[[deps.LogExpFunctions]]
deps = ["ChainRulesCore", "ChangesOfVariables", "DocStringExtensions", "InverseFunctions", "IrrationalConstants", "LinearAlgebra"]
git-tree-sha1 = "be9eef9f9d78cecb6f262f3c10da151a6c5ab827"
uuid = "2ab3a3ac-af41-5b50-aa03-7779005ae688"
version = "0.3.5"
[[deps.Logging]]
uuid = "56ddb016-857b-54e1-b83d-db4d58db5568"
[[deps.LoopVectorization]]
deps = ["ArrayInterface", "CPUSummary", "CloseOpenIntervals", "DocStringExtensions", "HostCPUFeatures", "IfElse", "LayoutPointers", "LinearAlgebra", "OffsetArrays", "PolyesterWeave", "Requires", "SIMDDualNumbers", "SLEEFPirates", "Static", "ThreadingUtilities", "UnPack", "VectorizationBase"]
git-tree-sha1 = "9d8ce46c7727debdfd65be244f22257abf7d8739"
uuid = "bdcacae8-1622-11e9-2a5c-532679323890"
version = "0.12.98"
[[deps.Lz4_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "5d494bc6e85c4c9b626ee0cab05daa4085486ab1"
uuid = "5ced341a-0733-55b8-9ab6-a4889d929147"
version = "1.9.3+0"
[[deps.MAT]]
deps = ["BufferedStreams", "CodecZlib", "HDF5", "SparseArrays"]
git-tree-sha1 = "5c62992f3d46b8dce69bdd234279bb5a369db7d5"
uuid = "23992714-dd62-5051-b70f-ba57cb901cac"
version = "0.10.1"
[[deps.MKL_jll]]
deps = ["Artifacts", "IntelOpenMP_jll", "JLLWrappers", "LazyArtifacts", "Libdl", "Pkg"]
git-tree-sha1 = "5455aef09b40e5020e1520f551fa3135040d4ed0"
uuid = "856f044c-d86e-5d09-b602-aeab76dc8ba7"
version = "2021.1.1+2"
[[deps.MPFR_jll]]
deps = ["Artifacts", "GMP_jll", "Libdl"]
uuid = "3a97d323-0669-5f0c-9066-3539efd106a3"
[[deps.MPI]]
deps = ["Distributed", "DocStringExtensions", "Libdl", "MPICH_jll", "MicrosoftMPI_jll", "OpenMPI_jll", "Pkg", "Random", "Requires", "Serialization", "Sockets"]
git-tree-sha1 = "340d8dc89e1c85a846d3f38ee294bfdd1684055a"
uuid = "da04e1cc-30fd-572f-bb4f-1f8673147195"
version = "0.19.1"
[[deps.MPICH_jll]]
deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "09864c823da1a606dbc151534c1a134fd5506170"
uuid = "7cb0a576-ebde-5e09-9194-50597f1243b4"
version = "3.4.2+1"
[[deps.MacroTools]]
deps = ["Markdown", "Random"]
git-tree-sha1 = "3d3e902b31198a27340d0bf00d6ac452866021cf"
uuid = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09"
version = "0.5.9"
[[deps.ManualMemory]]
git-tree-sha1 = "9cb207b18148b2199db259adfa923b45593fe08e"
uuid = "d125e4d3-2237-4719-b19c-fa641b8a4667"
version = "0.1.6"
[[deps.Markdown]]
deps = ["Base64"]
uuid = "d6f4376e-aef5-505a-96c1-9c027394607a"
[[deps.MbedTLS_jll]]
deps = ["Artifacts", "Libdl"]
uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1"
[[deps.MicrosoftMPI_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "bb2fe65544e6efd883bb2060088df7dfb7273b41"
uuid = "9237b28f-5490-5468-be7b-bb81f5f5e6cf"
version = "10.1.3+1"
[[deps.Missings]]
deps = ["DataAPI"]
git-tree-sha1 = "bf210ce90b6c9eed32d25dbcae1ebc565df2687f"
uuid = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28"
version = "1.0.2"
[[deps.Mmap]]
uuid = "a63ad114-7e13-5084-954f-fe012c677804"
[[deps.Mocking]]
deps = ["Compat", "ExprTools"]
git-tree-sha1 = "29714d0a7a8083bba8427a4fbfb00a540c681ce7"
uuid = "78c3b35d-d492-501b-9361-3d52fe80e533"
version = "0.7.3"
[[deps.MozillaCACerts_jll]]
uuid = "14a3606d-f60d-562e-9121-12d972cd8159"
[[deps.MuladdMacro]]
git-tree-sha1 = "c6190f9a7fc5d9d5915ab29f2134421b12d24a68"
uuid = "46d2c3a1-f734-5fdb-9937-b9b9aeba4221"
version = "0.2.2"
[[deps.MutableArithmetics]]
deps = ["LinearAlgebra", "SparseArrays", "Test"]
git-tree-sha1 = "7bb6853d9afec54019c1397c6eb610b9b9a19525"
uuid = "d8a4904e-b15c-11e9-3269-09a3773c0cb0"
version = "0.3.1"
[[deps.NaNMath]]
git-tree-sha1 = "bfe47e760d60b82b66b61d2d44128b62e3a369fb"
uuid = "77ba4419-2d1f-58cd-9bb1-8ffee604a2e3"
version = "0.3.5"
[[deps.NamedDims]]
deps = ["AbstractFFTs", "ChainRulesCore", "CovarianceEstimation", "LinearAlgebra", "Pkg", "Requires", "Statistics"]
git-tree-sha1 = "1bb9558fad77d915edd65ef84772a6cd91214346"
uuid = "356022a1-0364-5f58-8944-0da4b18d706f"
version = "0.2.41"
[[deps.NetworkOptions]]
uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908"
[[deps.NodesAndModes]]
deps = ["DelimitedFiles", "LinearAlgebra", "SpecialFunctions"]
git-tree-sha1 = "487bc8d223d10ad6465d52c6a64c5eabfa03fb66"
uuid = "7aca2e03-f7e2-4192-9ec8-f4ca66d597fb"
version = "0.7.0"
[[deps.NonlinearSolve]]
deps = ["ArrayInterface", "FiniteDiff", "ForwardDiff", "IterativeSolvers", "LinearAlgebra", "RecursiveArrayTools", "RecursiveFactorization", "Reexport", "SciMLBase", "Setfield", "StaticArrays", "UnPack"]
git-tree-sha1 = "e9ffc92217b8709e0cf7b8808f6223a4a0936c95"
uuid = "8913a72c-1f9b-4ce2-8d82-65094dcecaec"
version = "0.3.11"
[[deps.Octavian]]
deps = ["ArrayInterface", "IfElse", "LoopVectorization", "ManualMemory", "PolyesterWeave", "Requires", "Static", "ThreadingUtilities", "VectorizationBase"]
git-tree-sha1 = "edc8b839c99c3ceee493b2a6c689f201ddeb4e3f"
uuid = "6fd5a793-0b7e-452c-907f-f8bfe9c57db4"
version = "0.3.8"
[[deps.OffsetArrays]]
deps = ["Adapt"]
git-tree-sha1 = "043017e0bdeff61cfbb7afeb558ab29536bbb5ed"
uuid = "6fe1bfb0-de20-5000-8ca7-80f57d26f881"
version = "1.10.8"
[[deps.OpenBLAS_jll]]
deps = ["Artifacts", "CompilerSupportLibraries_jll", "Libdl"]
uuid = "4536629a-c528-5b80-bd46-f80d51c5b363"
[[deps.OpenLibm_jll]]
deps = ["Artifacts", "Libdl"]
uuid = "05823500-19ac-5b8b-9628-191a04bc5112"
[[deps.OpenMPI_jll]]
deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "872077914c8a8cab9ea1430f338ae6b59258577d"
uuid = "fe0851c0-eecd-5654-98d4-656369965a5c"
version = "4.1.1+3"
[[deps.OpenSSL_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "15003dcb7d8db3c6c857fda14891a539a8f2705a"
uuid = "458c3c95-2e84-50aa-8efc-19380b2a3a95"
version = "1.1.10+0"
[[deps.OpenSpecFun_jll]]
deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "13652491f6856acfd2db29360e1bbcd4565d04f1"
uuid = "efe28fd5-8261-553b-a9e1-b2916fc3738e"
version = "0.5.5+0"
[[deps.OrderedCollections]]
git-tree-sha1 = "85f8e6578bf1f9ee0d11e7bb1b1456435479d47c"
uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
version = "1.4.1"
[[deps.P4est]]
deps = ["CBinding", "MPI", "P4est_jll", "Reexport"]
git-tree-sha1 = "da29061ec4771a2d70b927ccd440711a92a08d76"
uuid = "7d669430-f675-4ae7-b43e-fab78ec5a902"
version = "0.2.3"
[[deps.P4est_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Zlib_jll"]
git-tree-sha1 = "fe321634537ea1198bed0526ba1b4811e5a80fae"
uuid = "6b5a15aa-cf52-5330-8376-5e5d90283449"
version = "2.3.2+0"
[[deps.PDMats]]
deps = ["LinearAlgebra", "SparseArrays", "SuiteSparse"]
git-tree-sha1 = "ee26b350276c51697c9c2d88a072b339f9f03d73"
uuid = "90014a1f-27ba-587c-ab20-58faa44d9150"
version = "0.11.5"
[[deps.Parameters]]
deps = ["OrderedCollections", "UnPack"]
git-tree-sha1 = "34c0e9ad262e5f7fc75b10a9952ca7692cfc5fbe"
uuid = "d96e819e-fc66-5662-9728-84c9c7592b0a"
version = "0.12.3"
[[deps.Parsers]]
deps = ["Dates"]
git-tree-sha1 = "ae4bbcadb2906ccc085cf52ac286dc1377dceccc"
uuid = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0"
version = "2.1.2"
[[deps.Pkg]]
deps = ["Artifacts", "Dates", "Downloads", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "Serialization", "TOML", "Tar", "UUIDs", "p7zip_jll"]
uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
[[deps.PlotUtils]]
deps = ["ColorSchemes", "Colors", "Dates", "Printf", "Random", "Reexport", "Statistics"]
git-tree-sha1 = "b084324b4af5a438cd63619fd006614b3b20b87b"
uuid = "995b91a9-d308-5afd-9ec6-746e21dbc043"
version = "1.0.15"
[[deps.Polyester]]
deps = ["ArrayInterface", "BitTwiddlingConvenienceFunctions", "CPUSummary", "IfElse", "ManualMemory", "PolyesterWeave", "Requires", "Static", "StrideArraysCore", "ThreadingUtilities"]
git-tree-sha1 = "892b8d9dd3c7987a4d0fd320f0a421dd90b5d09d"
uuid = "f517fe37-dbe3-4b94-8317-1923a5111588"
version = "0.5.4"
[[deps.PolyesterWeave]]
deps = ["BitTwiddlingConvenienceFunctions", "CPUSummary", "IfElse", "Static", "ThreadingUtilities"]
git-tree-sha1 = "a3ff99bf561183ee20386aec98ab8f4a12dc724a"
uuid = "1d0040c9-8b98-4ee7-8388-3f51789ca0ad"
version = "0.1.2"
[[deps.PolynomialBases]]
deps = ["ArgCheck", "FastGaussQuadrature", "FastTransforms", "LinearAlgebra", "Parameters", "Requires", "SpecialFunctions"]
git-tree-sha1 = "120a30284e0a7686852f76d23c49f506e1e5ba49"
uuid = "c74db56a-226d-5e98-8bb0-a6049094aeea"
version = "0.4.10"
[[deps.Polynomials]]
deps = ["Intervals", "LinearAlgebra", "MutableArithmetics", "RecipesBase"]
git-tree-sha1 = "79bcbb379205f1c62913fa9ebecb413c7a35f8b0"
uuid = "f27b6e38-b328-58d1-80ce-0feddd5e7a45"
version = "2.0.18"
[[deps.PreallocationTools]]
deps = ["Adapt", "ArrayInterface", "ForwardDiff", "LabelledArrays"]
git-tree-sha1 = "ba819074442cd4c9bda1a3d905ec305f8acb37f2"
uuid = "d236fae5-4411-538c-8e31-a6e3d9e00b46"
version = "0.2.0"
[[deps.Preferences]]
deps = ["TOML"]
git-tree-sha1 = "00cfd92944ca9c760982747e9a1d0d5d86ab1e5a"
uuid = "21216c6a-2e73-6563-6e65-726566657250"
version = "1.2.2"
[[deps.Printf]]
deps = ["Unicode"]
uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7"
[[deps.PyCall]]
deps = ["Conda", "Dates", "Libdl", "LinearAlgebra", "MacroTools", "Serialization", "VersionParsing"]
git-tree-sha1 = "4ba3651d33ef76e24fef6a598b63ffd1c5e1cd17"
uuid = "438e738f-606a-5dbb-bf0a-cddfbfd45ab0"
version = "1.92.5"
[[deps.PyPlot]]
deps = ["Colors", "LaTeXStrings", "PyCall", "Sockets", "Test", "VersionParsing"]
git-tree-sha1 = "14c1b795b9d764e1784713941e787e1384268103"
uuid = "d330b81b-6aea-500a-939a-2ce795aea3ee"
version = "2.10.0"
[[deps.QuadGK]]
deps = ["DataStructures", "LinearAlgebra"]
git-tree-sha1 = "78aadffb3efd2155af139781b8a8df1ef279ea39"
uuid = "1fd47b50-473d-5c70-9696-f719f8f3bcdc"
version = "2.4.2"
[[deps.REPL]]
deps = ["InteractiveUtils", "Markdown", "Sockets", "Unicode"]
uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb"
[[deps.Random]]
deps = ["SHA", "Serialization"]
uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
[[deps.RecipesBase]]
git-tree-sha1 = "44a75aa7a527910ee3d1751d1f0e4148698add9e"
uuid = "3cdcf5f2-1ef4-517c-9805-6587b60abb01"
version = "1.1.2"
[[deps.RecursiveArrayTools]]
deps = ["ArrayInterface", "ChainRulesCore", "DocStringExtensions", "FillArrays", "LinearAlgebra", "RecipesBase", "Requires", "StaticArrays", "Statistics", "ZygoteRules"]
git-tree-sha1 = "c944fa4adbb47be43376359811c0a14757bdc8a8"
uuid = "731186ca-8d62-57ce-b412-fbd966d074cd"
version = "2.20.0"
[[deps.RecursiveFactorization]]
deps = ["LinearAlgebra", "LoopVectorization", "Polyester", "StrideArraysCore", "TriangularSolve"]
git-tree-sha1 = "b7edd69c796b30985ea6dfeda8504cdb7cf77e9f"
uuid = "f2c3362d-daeb-58d1-803e-2bc74f2840b4"
version = "0.2.5"
[[deps.Reexport]]
git-tree-sha1 = "45e428421666073eab6f2da5c9d310d99bb12f9b"
uuid = "189a3867-3050-52da-a836-e630ba90ab69"
version = "1.2.2"
[[deps.Requires]]
deps = ["UUIDs"]
git-tree-sha1 = "4036a3bd08ac7e968e27c203d45f5fff15020621"
uuid = "ae029012-a4dd-5104-9daa-d747884805df"
version = "1.1.3"
[[deps.Rmath]]
deps = ["Random", "Rmath_jll"]
git-tree-sha1 = "bf3188feca147ce108c76ad82c2792c57abe7b1f"
uuid = "79098fc4-a85e-5d69-aa6a-4863f24498fa"
version = "0.7.0"
[[deps.Rmath_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "68db32dff12bb6127bac73c209881191bf0efbb7"
uuid = "f50d1b31-88e8-58de-be2c-1cc44531875f"
version = "0.3.0+0"
[[deps.SHA]]
uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce"
[[deps.SIMDDualNumbers]]
deps = ["ForwardDiff", "IfElse", "SLEEFPirates", "VectorizationBase"]
git-tree-sha1 = "62c2da6eb66de8bb88081d20528647140d4daa0e"
uuid = "3cdde19b-5bb0-4aaf-8931-af3e248e098b"
version = "0.1.0"
[[deps.SIMDTypes]]
git-tree-sha1 = "330289636fb8107c5f32088d2741e9fd7a061a5c"
uuid = "94e857df-77ce-4151-89e5-788b33177be4"
version = "0.1.0"
[[deps.SLEEFPirates]]
deps = ["IfElse", "Static", "VectorizationBase"]
git-tree-sha1 = "1410aad1c6b35862573c01b96cd1f6dbe3979994"
uuid = "476501e8-09a2-5ece-8869-fb82de89a1fa"
version = "0.6.28"
[[deps.SciMLBase]]
deps = ["ArrayInterface", "CommonSolve", "ConstructionBase", "Distributed", "DocStringExtensions", "IteratorInterfaceExtensions", "LinearAlgebra", "Logging", "RecipesBase", "RecursiveArrayTools", "StaticArrays", "Statistics", "Tables", "TreeViews"]
git-tree-sha1 = "b3d23aa4e5f621b574b3b0d41c62c8624d27192a"
uuid = "0bca4576-84f4-4d90-8ffe-ffa030f20462"
version = "1.19.5"
[[deps.Serialization]]
uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
[[deps.Setfield]]
deps = ["ConstructionBase", "Future", "MacroTools", "Requires"]
git-tree-sha1 = "def0718ddbabeb5476e51e5a43609bee889f285d"
uuid = "efcf1570-3423-57d1-acb7-fd33fddbac46"
version = "0.8.0"
[[deps.SharedArrays]]
deps = ["Distributed", "Mmap", "Random", "Serialization"]
uuid = "1a1011a3-84de-559e-8e89-a11a2f7dc383"
[[deps.Sockets]]
uuid = "6462fe0b-24de-5631-8697-dd941f90decc"
[[deps.SortingAlgorithms]]
deps = ["DataStructures"]
git-tree-sha1 = "b3363d7460f7d098ca0912c69b082f75625d7508"
uuid = "a2af1166-a08f-5f64-846c-94a0d3cef48c"
version = "1.0.1"
[[deps.SparseArrays]]
deps = ["LinearAlgebra", "Random"]
uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
[[deps.SpecialFunctions]]
deps = ["ChainRulesCore", "IrrationalConstants", "LogExpFunctions", "OpenLibm_jll", "OpenSpecFun_jll"]
git-tree-sha1 = "f0bccf98e16759818ffc5d97ac3ebf87eb950150"
uuid = "276daf66-3868-5448-9aa4-cd146d93841b"
version = "1.8.1"
[[deps.StartUpDG]]
deps = ["Colors", "ConstructionBase", "Kronecker", "LinearAlgebra", "MAT", "NodesAndModes", "Printf", "RecipesBase", "Reexport", "Requires", "Setfield", "SparseArrays", "StaticArrays", "UnPack"]
git-tree-sha1 = "be47f5b52f849f8a9eafb701fbbbc9c131592bcb"
uuid = "472ebc20-7c99-4d4b-9470-8fde4e9faa0f"
version = "0.12.2"
[[deps.Static]]
deps = ["IfElse"]
git-tree-sha1 = "e7bc80dc93f50857a5d1e3c8121495852f407e6a"
uuid = "aedffcd0-7271-4cad-89d0-dc628f76c6d3"
version = "0.4.0"
[[deps.StaticArrays]]
deps = ["LinearAlgebra", "Random", "Statistics"]
git-tree-sha1 = "3c76dde64d03699e074ac02eb2e8ba8254d428da"
uuid = "90137ffa-7385-5640-81b9-e52037218182"
version = "1.2.13"
[[deps.Statistics]]
deps = ["LinearAlgebra", "SparseArrays"]
uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
[[deps.StatsAPI]]
git-tree-sha1 = "0f2aa8e32d511f758a2ce49208181f7733a0936a"
uuid = "82ae8749-77ed-4fe6-ae5f-f523153014b0"
version = "1.1.0"
[[deps.StatsBase]]
deps = ["DataAPI", "DataStructures", "LinearAlgebra", "LogExpFunctions", "Missings", "Printf", "Random", "SortingAlgorithms", "SparseArrays", "Statistics", "StatsAPI"]
git-tree-sha1 = "2bb0cb32026a66037360606510fca5984ccc6b75"
uuid = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
version = "0.33.13"
[[deps.StatsFuns]]
deps = ["ChainRulesCore", "InverseFunctions", "IrrationalConstants", "LogExpFunctions", "Reexport", "Rmath", "SpecialFunctions"]
git-tree-sha1 = "385ab64e64e79f0cd7cfcf897169b91ebbb2d6c8"
uuid = "4c63d2b9-4356-54db-8cca-17b64c39e42c"
version = "0.9.13"
[[deps.StrideArrays]]
deps = ["ArrayInterface", "LinearAlgebra", "LoopVectorization", "Octavian", "Random", "SLEEFPirates", "Static", "StrideArraysCore", "ThreadingUtilities", "VectorizationBase", "VectorizedRNG"]
git-tree-sha1 = "f9da1aec82a57e48bc30f35c0a551406f5baa8e3"
uuid = "d1fa6d79-ef01-42a6-86c9-f7c551f8593b"
version = "0.1.15"
[[deps.StrideArraysCore]]
deps = ["ArrayInterface", "CloseOpenIntervals", "IfElse", "LayoutPointers", "ManualMemory", "Requires", "SIMDTypes", "Static", "ThreadingUtilities"]
git-tree-sha1 = "12cf3253ebd8e2a3214ae171fbfe51e7e8d8ad28"
uuid = "7792a7ef-975c-4747-a70f-980b88e8d1da"
version = "0.2.9"
[[deps.StructArrays]]
deps = ["Adapt", "DataAPI", "StaticArrays", "Tables"]
git-tree-sha1 = "2ce41e0d042c60ecd131e9fb7154a3bfadbf50d3"
uuid = "09ab397b-f2b6-538f-b94a-2f83cf4a842a"
version = "0.6.3"
[[deps.SuiteSparse]]
deps = ["Libdl", "LinearAlgebra", "Serialization", "SparseArrays"]
uuid = "4607b0f0-06f3-5cda-b6b1-a6196a1729e9"
[[deps.SummationByPartsOperators]]
deps = ["ArgCheck", "ArrayInterface", "DiffEqBase", "FFTW", "InteractiveUtils", "LinearAlgebra", "LoopVectorization", "PolynomialBases", "RecursiveArrayTools", "Reexport", "Requires", "SparseArrays", "StaticArrays", "UnPack", "Unrolled"]
git-tree-sha1 = "7706477682d5b4eaa5358d7f6909c4b154415c33"
uuid = "9f78cca6-572e-554e-b819-917d2f1cf240"
version = "0.5.7"
[[deps.TOML]]
deps = ["Dates"]
uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76"
[[deps.TableTraits]]
deps = ["IteratorInterfaceExtensions"]
git-tree-sha1 = "c06b2f539df1c6efa794486abfb6ed2022561a39"
uuid = "3783bdb8-4a98-5b6b-af9a-565f29a5fe9c"
version = "1.0.1"
[[deps.Tables]]
deps = ["DataAPI", "DataValueInterfaces", "IteratorInterfaceExtensions", "LinearAlgebra", "TableTraits", "Test"]
git-tree-sha1 = "fed34d0e71b91734bf0a7e10eb1bb05296ddbcd0"
uuid = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
version = "1.6.0"
[[deps.Tar]]
deps = ["ArgTools", "SHA"]
uuid = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e"
[[deps.Test]]
deps = ["InteractiveUtils", "Logging", "Random", "Serialization"]
uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
[[deps.ThreadingUtilities]]
deps = ["ManualMemory"]
git-tree-sha1 = "03013c6ae7f1824131b2ae2fc1d49793b51e8394"
uuid = "8290d209-cae3-49c0-8002-c8c24d57dab5"
version = "0.4.6"
[[deps.TimeZones]]
deps = ["Dates", "Downloads", "InlineStrings", "LazyArtifacts", "Mocking", "Pkg", "Printf", "RecipesBase", "Serialization", "Unicode"]
git-tree-sha1 = "8de32288505b7db196f36d27d7236464ef50dba1"
uuid = "f269a46b-ccf7-5d73-abea-4c690281aa53"
version = "1.6.2"
[[deps.TimerOutputs]]
deps = ["ExprTools", "Printf"]
git-tree-sha1 = "7cb456f358e8f9d102a8b25e8dfedf58fa5689bc"
uuid = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f"
version = "0.5.13"
[[deps.ToeplitzMatrices]]
deps = ["AbstractFFTs", "LinearAlgebra", "StatsBase"]
git-tree-sha1 = "b61dc0269afe4c4e6109cee4d4098121bf59a8d0"
uuid = "c751599d-da0a-543b-9d20-d0a503d91d24"
version = "0.7.0"
[[deps.TranscodingStreams]]
deps = ["Random", "Test"]
git-tree-sha1 = "216b95ea110b5972db65aa90f88d8d89dcb8851c"
uuid = "3bb67fe8-82b1-5028-8e26-92a6c54297fa"
version = "0.9.6"
[[deps.TreeViews]]
deps = ["Test"]
git-tree-sha1 = "8d0d7a3fe2f30d6a7f833a5f19f7c7a5b396eae6"
uuid = "a2a6695c-b41b-5b7d-aed9-dbfdeacea5d7"
version = "0.3.0"
[[deps.Triangle_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "bfdd9ef1004eb9d407af935a6f36a4e0af711369"
uuid = "5639c1d2-226c-5e70-8d55-b3095415a16a"
version = "1.6.1+0"
[[deps.TriangularSolve]]
deps = ["CloseOpenIntervals", "IfElse", "LayoutPointers", "LinearAlgebra", "LoopVectorization", "Polyester", "Static", "VectorizationBase"]
git-tree-sha1 = "ec9a310324dd2c546c07f33a599ded9c1d00a420"
uuid = "d5829a12-d9aa-46ab-831f-fb7c9ab06edf"
version = "0.1.8"
[[deps.Triangulate]]
deps = ["DocStringExtensions", "Libdl", "Printf", "Test", "Triangle_jll"]
git-tree-sha1 = "2b4f716b192c0c615d96d541ee029e85666388cb"
uuid = "f7e6ffb2-c36d-4f8f-a77e-16e897189344"
version = "2.1.0"
[[deps.TriplotBase]]
git-tree-sha1 = "4d4ed7f294cda19382ff7de4c137d24d16adc89b"
uuid = "981d1d27-644d-49a2-9326-4793e63143c3"
version = "0.1.0"
[[deps.TriplotRecipes]]
deps = ["PlotUtils", "RecipesBase", "TriplotBase"]
git-tree-sha1 = "ee1303fdb2c712b96efec8db661750adc96aa1b3"
uuid = "808ab39a-a642-4abf-81ff-4cb34ebbffa3"
version = "0.1.0"
[[deps.Trixi]]
deps = ["CodeTracking", "DiffEqBase", "EllipsisNotation", "ForwardDiff", "GeometryBasics", "HDF5", "LinearAlgebra", "LinearMaps", "LoopVectorization", "MPI", "Octavian", "OffsetArrays", "P4est", "Polyester", "Printf", "RecipesBase", "Reexport", "Requires", "Setfield", "SparseArrays", "StartUpDG", "Static", "StaticArrays", "StrideArrays", "StructArrays", "SummationByPartsOperators", "TimerOutputs", "Triangulate", "TriplotBase", "TriplotRecipes", "UnPack"]
git-tree-sha1 = "dc8a7fa4e83e8c603a260be110192130c2e32cfb"
uuid = "a7f1ee26-1774-49b1-8366-f1abc58fbfcb"
version = "0.4.10"
[[deps.UUIDs]]
deps = ["Random", "SHA"]
uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
[[deps.UnPack]]
git-tree-sha1 = "387c1f73762231e86e0c9c5443ce3b4a0a9a0c2b"
uuid = "3a884ed6-31ef-47d7-9d2a-63182c4928ed"
version = "1.0.2"
[[deps.Unicode]]
uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"
[[deps.Unitful]]
deps = ["ConstructionBase", "Dates", "LinearAlgebra", "Random"]
git-tree-sha1 = "0992ed0c3ef66b0390e5752fe60054e5ff93b908"
uuid = "1986cc42-f94f-5a68-af5c-568840ba703d"
version = "1.9.2"
[[deps.Unrolled]]
deps = ["MacroTools", "Test"]
git-tree-sha1 = "3c2a0b4fc6b48d777fb42fca38ba1d5f54f3e38d"
uuid = "9602ed7d-8fef-5bc8-8597-8f21381861e8"
version = "0.1.3"
[[deps.VectorizationBase]]
deps = ["ArrayInterface", "CPUSummary", "HostCPUFeatures", "Hwloc", "IfElse", "LayoutPointers", "Libdl", "LinearAlgebra", "SIMDTypes", "Static"]
git-tree-sha1 = "5239606cf3552aff43d79ecc75b1af1ce4625109"
uuid = "3d5dd08c-fd9d-11e8-17fa-ed2836048c2f"
version = "0.21.21"
[[deps.VectorizedRNG]]
deps = ["Distributed", "Random", "UnPack", "VectorizationBase"]
git-tree-sha1 = "98002b227603e41e4aac8dc6ea5a19ccf25d191d"
uuid = "33b4df10-0173-11e9-2a0c-851a7edac40e"
version = "0.2.13"
[[deps.VersionParsing]]
git-tree-sha1 = "e575cf85535c7c3292b4d89d89cc29e8c3098e47"
uuid = "81def892-9a0e-5fdd-b105-ffc91e053289"
version = "1.2.1"
[[deps.Zlib_jll]]
deps = ["Libdl"]
uuid = "83775a58-1f1d-513f-b197-d71354ab007a"
[[deps.Zstd_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "cc4bf3fdde8b7e3e9fa0351bdeedba1cf3b7f6e6"
uuid = "3161d3a3-bdf6-5164-811a-617609db77b4"
version = "1.5.0+0"
[[deps.ZygoteRules]]
deps = ["MacroTools"]
git-tree-sha1 = "8c1a8e4dfacb1fd631745552c8db35d0deb09ea0"
uuid = "700de1a5-db45-46bc-99cf-38207098b444"
version = "0.2.2"
[[deps.libblastrampoline_jll]]
deps = ["Artifacts", "Libdl", "OpenBLAS_jll"]
uuid = "8e850b90-86db-534c-a0d3-1478176c7d93"
[[deps.nghttp2_jll]]
deps = ["Artifacts", "Libdl"]
uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d"
[[deps.p7zip_jll]]
deps = ["Artifacts", "Libdl"]
uuid = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0"
## Run this file using LIKWID:
# ```bash
# likwid-perfctr -C 0 -g MEM_DP -m julia --check-bounds=no --threads=1 measure_volume_terms.jl
# ```
# You can also inspect other groups, e.g., `-g FLOPS_DP`.
## activate project environment
using Pkg
Pkg.activate(@__DIR__)
Pkg.instantiate()
## load packages
using LIKWID
using Trixi
## set up benchmark code
"""
initial_condition_isentropic_vortex(x, t, equations)
The classical isentropic vortex test case of
- Chi-Wang Shu (1997)
Essentially Non-Oscillatory and Weighted Essentially Non-Oscillatory
Schemes for Hyperbolic Conservation Laws.
[NASA/CR-97-206253](https://ntrs.nasa.gov/citations/19980007543)
"""
function initial_condition_isentropic_vortex(x, t, equations::CompressibleEulerEquations2D)
ϱ0 = 1.0 # background density
v0 = SVector(1.0, 1.0) # background velocity
p0 = 10.0 # background pressure
ε = 20.0 # vortex strength
L = 10.0 # size of the domain per coordinate direction
T0 = p0 / ϱ0 # background temperature
γ = equations.gamma # ideal gas constant
x0 = v0 * t # current center of the vortex
dx = vortex_center.(x - x0, L)
r2 = sum(abs2, dx)
# perturbed primitive variables
T = T0 - (γ - 1) * ε^2 / (8 * γ * π^2) * exp(1 - r2)
v = v0 + ε / (2 * π) * exp(0.5 * (1 - r2)) * SVector(-dx[2], dx[1])
ϱ = ϱ0 * (T / T0)^(1 / (γ - 1))
p = ϱ * T
return prim2cons(SVector(ϱ, v..., p), equations)
end
function initial_condition_isentropic_vortex(x, t, equations::CompressibleEulerEquations3D)
ϱ0 = 1.0 # background density
v0 = SVector(1.0, 1.0, 0.0) # background velocity
p0 = 10.0 # background pressure
ε = 20.0 # vortex strength
L = 10.0 # size of the domain per coordinate direction
T0 = p0 / ϱ0 # background temperature
γ = equations.gamma # ideal gas constant
x0 = v0 * t # current center of the vortex
dx = vortex_center.(x - x0, L)
r2 = sum(abs2, dx)
# perturbed primitive variables
T = T0 - (γ - 1) * ε^2 / (8 * γ * π^2) * exp(1 - r2)
v = v0 + ε / (2 * π) * exp(0.5 * (1 - r2)) * SVector(-dx[2], dx[1], 0.0)
ϱ = ϱ0 * (T / T0)^(1 / (γ - 1))
p = ϱ * T
return prim2cons(SVector(ϱ, v..., p), equations)
end
vortex_center(x, L) = mod(x + L/2, L) - L/2
function many_volume_terms!(du, u, semi, t, n)
nonconservative_terms = Trixi.have_nonconservative_terms(semi.equations)
for _ in 1:n
Trixi.calc_volume_integral!(du, u, semi.mesh,
nonconservative_terms, semi.equations,
semi.solver.volume_integral, semi.solver, semi.cache)
end
end
function run_measurements(polydeg=3, n=5*10^3)
equations = CompressibleEulerEquations3D(1.4)
Marker.init()
for numerical_flux in (flux_shima_etal, flux_ranocha)
solver = DGSEM(polydeg=polydeg, surface_flux=numerical_flux,
volume_integral=VolumeIntegralFluxDifferencing(numerical_flux))
coordinates_min = (-5.0, -5.0, -5.0)
coordinates_max = ( 5.0, 5.0, 5.0)
initial_refinement_level = 3
cells_per_dimension = 2^initial_refinement_level .* (1, 1, 1)
t = 0.0
# TreeMesh
let
mesh = TreeMesh(coordinates_min, coordinates_max,
initial_refinement_level=initial_refinement_level,
n_cells_max=100_000, periodicity=true)
semi = SemidiscretizationHyperbolic(mesh, equations,
initial_condition_isentropic_vortex, solver)
u_ode = Trixi.compute_coefficients(t, semi)
du_ode = zero(u_ode)
GC.@preserve u_ode du_ode begin
u = Trixi.wrap_array(u_ode, semi)
du = Trixi.wrap_array(du_ode, semi)
# compile and cool down
many_volume_terms!(du, u, semi, t, 1)
sleep(1.0)
@region "TreeMesh-$(numerical_flux)" begin
many_volume_terms!(du, u, semi, t, n)
end
end
end
# StructuredMesh
let
mesh = StructuredMesh(cells_per_dimension, coordinates_min, coordinates_max,
periodicity=true)
semi = SemidiscretizationHyperbolic(mesh, equations,
initial_condition_isentropic_vortex, solver)
u_ode = Trixi.compute_coefficients(t, semi)
du_ode = zero(u_ode)
GC.@preserve u_ode du_ode begin
u = Trixi.wrap_array(u_ode, semi)
du = Trixi.wrap_array(du_ode, semi)
# compile and cool down
many_volume_terms!(du, u, semi, t, 1)
sleep(1.0)
@region "StructuredMesh-$(numerical_flux)" begin
many_volume_terms!(du, u, semi, t, n)
end
end
end
# P4estMesh
let
mesh = P4estMesh(cells_per_dimension; coordinates_min, coordinates_max,
polydeg=1, periodicity=true)
semi = SemidiscretizationHyperbolic(mesh, equations,
initial_condition_isentropic_vortex, solver)
u_ode = Trixi.compute_coefficients(t, semi)
du_ode = zero(u_ode)
GC.@preserve u_ode du_ode begin
u = Trixi.wrap_array(u_ode, semi)
du = Trixi.wrap_array(du_ode, semi)
# compile and cool down
many_volume_terms!(du, u, semi, t, 1)
sleep(1.0)
@region "P4estMesh-$(numerical_flux)" begin
many_volume_terms!(du, u, semi, t, n)
end
end
end
end
Marker.close()
return nothing
end
run_measurements()
[deps]
LIKWID = "bf22376a-e803-4184-b2ed-56326e3bff83"
PyCall = "438e738f-606a-5dbb-bf0a-cddfbfd45ab0"
PyPlot = "d330b81b-6aea-500a-939a-2ce795aea3ee"
Trixi = "a7f1ee26-1774-49b1-8366-f1abc58fbfcb"
@carstenbauer
Copy link

First run:

➜  bauerc@cn-0252 trixi  sh measure_volume_terms.sh
--------------------------------------------------------------------------------
CPU name:   Intel(R) Xeon(R) Gold 6148 CPU @ 2.40GHz
CPU type:   Intel Skylake SP processor
CPU clock:  2.39 GHz
Warning: The Marker API requires the application to run on the selected CPUs.
Warning: likwid-perfctr pins the application only when using the -C command line option.
Warning: LIKWID assumes that the application does it before the first instrumented code region is started.
Warning: You can use the string in the environment variable LIKWID_THREADS to pin you application to
Warning: to the CPUs specified after the -c command line option.
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Region TreeMesh-flux_shima_etal, Group 1: MEM_DP
+-------------------+------------+
|    Region Info    | HWThread 0 |
+-------------------+------------+
| RDTSC Runtime [s] |   8.577383 |
|     call count    |          1 |
+-------------------+------------+

+------------------------------------------+---------+-------------+
|                   Event                  | Counter |  HWThread 0 |
+------------------------------------------+---------+-------------+
|             INSTR_RETIRED_ANY            |  FIXC0  | 38438470000 |
|           CPU_CLK_UNHALTED_CORE          |  FIXC1  | 19130500000 |
|           CPU_CLK_UNHALTED_REF           |  FIXC2  | 13279500000 |
|              PWR_PKG_ENERGY              |   PWR0  |    906.3293 |
|              PWR_DRAM_ENERGY             |   PWR3  |    101.7574 |
| FP_ARITH_INST_RETIRED_128B_PACKED_DOUBLE |   PMC0  |  1492880000 |
|    FP_ARITH_INST_RETIRED_SCALAR_DOUBLE   |   PMC1  | 22987480000 |
| FP_ARITH_INST_RETIRED_256B_PACKED_DOUBLE |   PMC2  |  2136604000 |
| FP_ARITH_INST_RETIRED_512B_PACKED_DOUBLE |   PMC3  |           0 |
|               CAS_COUNT_RD               | MBOX0C0 |      247394 |
|               CAS_COUNT_WR               | MBOX0C1 |      127800 |
|               CAS_COUNT_RD               | MBOX1C0 |      279098 |
|               CAS_COUNT_WR               | MBOX1C1 |      154338 |
|               CAS_COUNT_RD               | MBOX2C0 |      269569 |
|               CAS_COUNT_WR               | MBOX2C1 |      145546 |
|               CAS_COUNT_RD               | MBOX3C0 |      295024 |
|               CAS_COUNT_WR               | MBOX3C1 |      173337 |
|               CAS_COUNT_RD               | MBOX4C0 |      272878 |
|               CAS_COUNT_WR               | MBOX4C1 |      153693 |
|               CAS_COUNT_RD               | MBOX5C0 |      276636 |
|               CAS_COUNT_WR               | MBOX5C1 |      153695 |
+------------------------------------------+---------+-------------+

+-----------------------------------+------------+
|               Metric              | HWThread 0 |
+-----------------------------------+------------+
|        Runtime (RDTSC) [s]        |     8.5774 |
|        Runtime unhalted [s]       |     7.9899 |
|            Clock [MHz]            |  3449.2724 |
|                CPI                |     0.4977 |
|             Energy [J]            |   906.3293 |
|             Power [W]             |   105.6650 |
|          Energy DRAM [J]          |   101.7574 |
|           Power DRAM [W]          |    11.8635 |
|            DP [MFLOP/s]           |  4024.4974 |
|          AVX DP [MFLOP/s]         |   996.3897 |
|          Packed [MUOPS/s]         |   423.1458 |
|          Scalar [MUOPS/s]         |  2680.0109 |
|  Memory read bandwidth [MBytes/s] |    12.2413 |
|  Memory read data volume [GBytes] |     0.1050 |
| Memory write bandwidth [MBytes/s] |     6.7781 |
| Memory write data volume [GBytes] |     0.0581 |
|    Memory bandwidth [MBytes/s]    |    19.0194 |
|    Memory data volume [GBytes]    |     0.1631 |
|       Operational intensity       |   211.5998 |
+-----------------------------------+------------+

Region StructuredMesh-flux_shima_etal, Group 1: MEM_DP
+-------------------+------------+
|    Region Info    | HWThread 0 |
+-------------------+------------+
| RDTSC Runtime [s] |  11.981190 |
|     call count    |          1 |
+-------------------+------------+

+------------------------------------------+---------+-------------+
|                   Event                  | Counter |  HWThread 0 |
+------------------------------------------+---------+-------------+
|             INSTR_RETIRED_ANY            |  FIXC0  | 57810190000 |
|           CPU_CLK_UNHALTED_CORE          |  FIXC1  | 27180600000 |
|           CPU_CLK_UNHALTED_REF           |  FIXC2  | 19001460000 |
|              PWR_PKG_ENERGY              |   PWR0  |   1267.6320 |
|              PWR_DRAM_ENERGY             |   PWR3  |    142.3140 |
| FP_ARITH_INST_RETIRED_128B_PACKED_DOUBLE |   PMC0  |  5082036000 |
|    FP_ARITH_INST_RETIRED_SCALAR_DOUBLE   |   PMC1  | 29210620000 |
| FP_ARITH_INST_RETIRED_256B_PACKED_DOUBLE |   PMC2  |  2173360000 |
| FP_ARITH_INST_RETIRED_512B_PACKED_DOUBLE |   PMC3  |           0 |
|               CAS_COUNT_RD               | MBOX0C0 |      263960 |
|               CAS_COUNT_WR               | MBOX0C1 |      117604 |
|               CAS_COUNT_RD               | MBOX1C0 |      261094 |
|               CAS_COUNT_WR               | MBOX1C1 |      118570 |
|               CAS_COUNT_RD               | MBOX2C0 |      269790 |
|               CAS_COUNT_WR               | MBOX2C1 |      116195 |
|               CAS_COUNT_RD               | MBOX3C0 |      267817 |
|               CAS_COUNT_WR               | MBOX3C1 |      124789 |
|               CAS_COUNT_RD               | MBOX4C0 |      272312 |
|               CAS_COUNT_WR               | MBOX4C1 |      123023 |
|               CAS_COUNT_RD               | MBOX5C0 |      259922 |
|               CAS_COUNT_WR               | MBOX5C1 |      122048 |
+------------------------------------------+---------+-------------+

+-----------------------------------+------------+
|               Metric              | HWThread 0 |
+-----------------------------------+------------+
|        Runtime (RDTSC) [s]        |    11.9812 |
|        Runtime unhalted [s]       |    11.3521 |
|            Clock [MHz]            |  3424.9557 |
|                CPI                |     0.4702 |
|             Energy [J]            |  1267.6320 |
|             Power [W]             |   105.8018 |
|          Energy DRAM [J]          |   142.3140 |
|           Power DRAM [W]          |    11.8781 |
|            DP [MFLOP/s]           |  4011.9664 |
|          AVX DP [MFLOP/s]         |   725.5907 |
|          Packed [MUOPS/s]         |   605.5656 |
|          Scalar [MUOPS/s]         |  2438.0400 |
|  Memory read bandwidth [MBytes/s] |     8.5195 |
|  Memory read data volume [GBytes] |     0.1021 |
| Memory write bandwidth [MBytes/s] |     3.8579 |
| Memory write data volume [GBytes] |     0.0462 |
|    Memory bandwidth [MBytes/s]    |    12.3774 |
|    Memory data volume [GBytes]    |     0.1483 |
|       Operational intensity       |   324.1365 |
+-----------------------------------+------------+

Region P4estMesh-flux_shima_etal, Group 1: MEM_DP
+-------------------+------------+
|    Region Info    | HWThread 0 |
+-------------------+------------+
| RDTSC Runtime [s] |  12.193580 |
|     call count    |          1 |
+-------------------+------------+

+------------------------------------------+---------+-------------+
|                   Event                  | Counter |  HWThread 0 |
+------------------------------------------+---------+-------------+
|             INSTR_RETIRED_ANY            |  FIXC0  | 58393090000 |
|           CPU_CLK_UNHALTED_CORE          |  FIXC1  | 27846640000 |
|           CPU_CLK_UNHALTED_REF           |  FIXC2  | 19516550000 |
|              PWR_PKG_ENERGY              |   PWR0  |   1287.3690 |
|              PWR_DRAM_ENERGY             |   PWR3  |    144.6812 |
| FP_ARITH_INST_RETIRED_128B_PACKED_DOUBLE |   PMC0  |  5122374000 |
|    FP_ARITH_INST_RETIRED_SCALAR_DOUBLE   |   PMC1  | 29418950000 |
| FP_ARITH_INST_RETIRED_256B_PACKED_DOUBLE |   PMC2  |  2187446000 |
| FP_ARITH_INST_RETIRED_512B_PACKED_DOUBLE |   PMC3  |           0 |
|               CAS_COUNT_RD               | MBOX0C0 |      326712 |
|               CAS_COUNT_WR               | MBOX0C1 |      120349 |
|               CAS_COUNT_RD               | MBOX1C0 |      326120 |
|               CAS_COUNT_WR               | MBOX1C1 |      121513 |
|               CAS_COUNT_RD               | MBOX2C0 |      322341 |
|               CAS_COUNT_WR               | MBOX2C1 |      119407 |
|               CAS_COUNT_RD               | MBOX3C0 |      330289 |
|               CAS_COUNT_WR               | MBOX3C1 |      134662 |
|               CAS_COUNT_RD               | MBOX4C0 |      326526 |
|               CAS_COUNT_WR               | MBOX4C1 |      127852 |
|               CAS_COUNT_RD               | MBOX5C0 |      328377 |
|               CAS_COUNT_WR               | MBOX5C1 |      130973 |
+------------------------------------------+---------+-------------+

+-----------------------------------+------------+
|               Metric              | HWThread 0 |
+-----------------------------------+------------+
|        Runtime (RDTSC) [s]        |    12.1936 |
|        Runtime unhalted [s]       |    11.6303 |
|            Clock [MHz]            |  3416.2736 |
|                CPI                |     0.4769 |
|             Energy [J]            |  1287.3690 |
|             Power [W]             |   105.5776 |
|          Energy DRAM [J]          |   144.6812 |
|           Power DRAM [W]          |    11.8654 |
|            DP [MFLOP/s]           |  3970.4075 |
|          AVX DP [MFLOP/s]         |   717.5730 |
|          Packed [MUOPS/s]         |   599.4810 |
|          Scalar [MUOPS/s]         |  2412.6590 |
|  Memory read bandwidth [MBytes/s] |    10.2893 |
|  Memory read data volume [GBytes] |     0.1255 |
| Memory write bandwidth [MBytes/s] |     3.9615 |
| Memory write data volume [GBytes] |     0.0483 |
|    Memory bandwidth [MBytes/s]    |    14.2508 |
|    Memory data volume [GBytes]    |     0.1738 |
|       Operational intensity       |   278.6103 |
+-----------------------------------+------------+

Region TreeMesh-flux_ranocha, Group 1: MEM_DP
+-------------------+------------+
|    Region Info    | HWThread 0 |
+-------------------+------------+
| RDTSC Runtime [s] |  18.557590 |
|     call count    |          1 |
+-------------------+------------+

+------------------------------------------+---------+-------------+
|                   Event                  | Counter |  HWThread 0 |
+------------------------------------------+---------+-------------+
|             INSTR_RETIRED_ANY            |  FIXC0  | 84123930000 |
|           CPU_CLK_UNHALTED_CORE          |  FIXC1  | 44661370000 |
|           CPU_CLK_UNHALTED_REF           |  FIXC2  | 31945940000 |
|              PWR_PKG_ENERGY              |   PWR0  |   1957.7660 |
|              PWR_DRAM_ENERGY             |   PWR3  |    220.7569 |
| FP_ARITH_INST_RETIRED_128B_PACKED_DOUBLE |   PMC0  |  2222158000 |
|    FP_ARITH_INST_RETIRED_SCALAR_DOUBLE   |   PMC1  | 44276040000 |
| FP_ARITH_INST_RETIRED_256B_PACKED_DOUBLE |   PMC2  |  2304596000 |
| FP_ARITH_INST_RETIRED_512B_PACKED_DOUBLE |   PMC3  |           0 |
|               CAS_COUNT_RD               | MBOX0C0 |      365987 |
|               CAS_COUNT_WR               | MBOX0C1 |      149223 |
|               CAS_COUNT_RD               | MBOX1C0 |      366772 |
|               CAS_COUNT_WR               | MBOX1C1 |      150810 |
|               CAS_COUNT_RD               | MBOX2C0 |      364672 |
|               CAS_COUNT_WR               | MBOX2C1 |      146737 |
|               CAS_COUNT_RD               | MBOX3C0 |      374857 |
|               CAS_COUNT_WR               | MBOX3C1 |      167229 |
|               CAS_COUNT_RD               | MBOX4C0 |      370204 |
|               CAS_COUNT_WR               | MBOX4C1 |      160590 |
|               CAS_COUNT_RD               | MBOX5C0 |      370343 |
|               CAS_COUNT_WR               | MBOX5C1 |      161459 |
+------------------------------------------+---------+-------------+

+-----------------------------------+------------+
|               Metric              | HWThread 0 |
+-----------------------------------+------------+
|        Runtime (RDTSC) [s]        |    18.5576 |
|        Runtime unhalted [s]       |    18.6530 |
|            Clock [MHz]            |  3347.3357 |
|                CPI                |     0.5309 |
|             Energy [J]            |  1957.7660 |
|             Power [W]             |   105.4968 |
|          Energy DRAM [J]          |   220.7569 |
|           Power DRAM [W]          |    11.8958 |
|            DP [MFLOP/s]           |  3122.1048 |
|          AVX DP [MFLOP/s]         |   496.7447 |
|          Packed [MUOPS/s]         |   243.9301 |
|          Scalar [MUOPS/s]         |  2385.8723 |
|  Memory read bandwidth [MBytes/s] |     7.6315 |
|  Memory read data volume [GBytes] |     0.1416 |
| Memory write bandwidth [MBytes/s] |     3.2282 |
| Memory write data volume [GBytes] |     0.0599 |
|    Memory bandwidth [MBytes/s]    |    10.8596 |
|    Memory data volume [GBytes]    |     0.2015 |
|       Operational intensity       |   287.4965 |
+-----------------------------------+------------+

Region StructuredMesh-flux_ranocha, Group 1: MEM_DP
+-------------------+------------+
|    Region Info    | HWThread 0 |
+-------------------+------------+
| RDTSC Runtime [s] |  22.719550 |
|     call count    |          1 |
+-------------------+------------+

+------------------------------------------+---------+-------------+
|                   Event                  | Counter |  HWThread 0 |
+------------------------------------------+---------+-------------+
|             INSTR_RETIRED_ANY            |  FIXC0  | 97667380000 |
|           CPU_CLK_UNHALTED_CORE          |  FIXC1  | 50655200000 |
|           CPU_CLK_UNHALTED_REF           |  FIXC2  | 35154480000 |
|              PWR_PKG_ENERGY              |   PWR0  |   2395.1280 |
|              PWR_DRAM_ENERGY             |   PWR3  |    269.8805 |
| FP_ARITH_INST_RETIRED_128B_PACKED_DOUBLE |   PMC0  |    31227780 |
|    FP_ARITH_INST_RETIRED_SCALAR_DOUBLE   |   PMC1  | 56438710000 |
| FP_ARITH_INST_RETIRED_256B_PACKED_DOUBLE |   PMC2  |  2135330000 |
| FP_ARITH_INST_RETIRED_512B_PACKED_DOUBLE |   PMC3  |           0 |
|               CAS_COUNT_RD               | MBOX0C0 |      806709 |
|               CAS_COUNT_WR               | MBOX0C1 |      415733 |
|               CAS_COUNT_RD               | MBOX1C0 |      801937 |
|               CAS_COUNT_WR               | MBOX1C1 |      413534 |
|               CAS_COUNT_RD               | MBOX2C0 |      818344 |
|               CAS_COUNT_WR               | MBOX2C1 |      411252 |
|               CAS_COUNT_RD               | MBOX3C0 |      822079 |
|               CAS_COUNT_WR               | MBOX3C1 |      430310 |
|               CAS_COUNT_RD               | MBOX4C0 |      800649 |
|               CAS_COUNT_WR               | MBOX4C1 |      422065 |
|               CAS_COUNT_RD               | MBOX5C0 |      797423 |
|               CAS_COUNT_WR               | MBOX5C1 |      420067 |
+------------------------------------------+---------+-------------+

+-----------------------------------+------------+
|               Metric              | HWThread 0 |
+-----------------------------------+------------+
|        Runtime (RDTSC) [s]        |    22.7196 |
|        Runtime unhalted [s]       |    21.1564 |
|            Clock [MHz]            |  3450.0568 |
|                CPI                |     0.5187 |
|             Energy [J]            |  2395.1280 |
|             Power [W]             |   105.4215 |
|          Energy DRAM [J]          |   269.8805 |
|           Power DRAM [W]          |    11.8788 |
|            DP [MFLOP/s]           |  2862.8422 |
|          AVX DP [MFLOP/s]         |   375.9458 |
|          Packed [MUOPS/s]         |    95.3609 |
|          Scalar [MUOPS/s]         |  2484.1474 |
|  Memory read bandwidth [MBytes/s] |    13.6542 |
|  Memory read data volume [GBytes] |     0.3102 |
| Memory write bandwidth [MBytes/s] |     7.0789 |
| Memory write data volume [GBytes] |     0.1608 |
|    Memory bandwidth [MBytes/s]    |    20.7331 |
|    Memory data volume [GBytes]    |     0.4710 |
|       Operational intensity       |   138.0808 |
+-----------------------------------+------------+

Region P4estMesh-flux_ranocha, Group 1: MEM_DP
+-------------------+------------+
|    Region Info    | HWThread 0 |
+-------------------+------------+
| RDTSC Runtime [s] |  22.855380 |
|     call count    |          1 |
+-------------------+------------+

+------------------------------------------+---------+--------------+
|                   Event                  | Counter |  HWThread 0  |
+------------------------------------------+---------+--------------+
|             INSTR_RETIRED_ANY            |  FIXC0  | 107168900000 |
|           CPU_CLK_UNHALTED_CORE          |  FIXC1  |  55841850000 |
|           CPU_CLK_UNHALTED_REF           |  FIXC2  |  40168140000 |
|              PWR_PKG_ENERGY              |   PWR0  |    2410.4210 |
|              PWR_DRAM_ENERGY             |   PWR3  |     269.8510 |
| FP_ARITH_INST_RETIRED_128B_PACKED_DOUBLE |   PMC0  |     35017400 |
|    FP_ARITH_INST_RETIRED_SCALAR_DOUBLE   |   PMC1  |  62533940000 |
| FP_ARITH_INST_RETIRED_256B_PACKED_DOUBLE |   PMC2  |   2339502000 |
| FP_ARITH_INST_RETIRED_512B_PACKED_DOUBLE |   PMC3  |            0 |
|               CAS_COUNT_RD               | MBOX0C0 |       620709 |
|               CAS_COUNT_WR               | MBOX0C1 |       273374 |
|               CAS_COUNT_RD               | MBOX1C0 |       660895 |
|               CAS_COUNT_WR               | MBOX1C1 |       277235 |
|               CAS_COUNT_RD               | MBOX2C0 |       632146 |
|               CAS_COUNT_WR               | MBOX2C1 |       272792 |
|               CAS_COUNT_RD               | MBOX3C0 |       653274 |
|               CAS_COUNT_WR               | MBOX3C1 |       294368 |
|               CAS_COUNT_RD               | MBOX4C0 |       620983 |
|               CAS_COUNT_WR               | MBOX4C1 |       284182 |
|               CAS_COUNT_RD               | MBOX5C0 |       647905 |
|               CAS_COUNT_WR               | MBOX5C1 |       286186 |
+------------------------------------------+---------+--------------+

+-----------------------------------+------------+
|               Metric              | HWThread 0 |
+-----------------------------------+------------+
|        Runtime (RDTSC) [s]        |    22.8554 |
|        Runtime unhalted [s]       |    23.3226 |
|            Clock [MHz]            |  3328.5951 |
|                CPI                |     0.5211 |
|             Energy [J]            |  2410.4210 |
|             Power [W]             |   105.4641 |
|          Energy DRAM [J]          |   269.8510 |
|           Power DRAM [W]          |    11.8069 |
|            DP [MFLOP/s]           |  3148.5796 |
|          AVX DP [MFLOP/s]         |   409.4444 |
|          Packed [MUOPS/s]         |   103.8932 |
|          Scalar [MUOPS/s]         |  2736.0709 |
|  Memory read bandwidth [MBytes/s] |    10.7414 |
|  Memory read data volume [GBytes] |     0.2455 |
| Memory write bandwidth [MBytes/s] |     4.7271 |
| Memory write data volume [GBytes] |     0.1080 |
|    Memory bandwidth [MBytes/s]    |    15.4685 |
|    Memory data volume [GBytes]    |     0.3535 |
|       Operational intensity       |   203.5474 |
+-----------------------------------+------------+

Second run:

➜  bauerc@cn-0252 trixi  sh measure_volume_terms.sh
--------------------------------------------------------------------------------
CPU name:   Intel(R) Xeon(R) Gold 6148 CPU @ 2.40GHz
CPU type:   Intel Skylake SP processor
CPU clock:  2.39 GHz
Warning: The Marker API requires the application to run on the selected CPUs.
Warning: likwid-perfctr pins the application only when using the -C command line option.
Warning: LIKWID assumes that the application does it before the first instrumented code region is started.
Warning: You can use the string in the environment variable LIKWID_THREADS to pin you application to
Warning: to the CPUs specified after the -c command line option.
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Region TreeMesh-flux_shima_etal, Group 1: MEM_DP
+-------------------+------------+
|    Region Info    | HWThread 0 |
+-------------------+------------+
| RDTSC Runtime [s] |   8.575548 |
|     call count    |          1 |
+-------------------+------------+

+------------------------------------------+---------+-------------+
|                   Event                  | Counter |  HWThread 0 |
+------------------------------------------+---------+-------------+
|             INSTR_RETIRED_ANY            |  FIXC0  | 38431960000 |
|           CPU_CLK_UNHALTED_CORE          |  FIXC1  | 19115010000 |
|           CPU_CLK_UNHALTED_REF           |  FIXC2  | 13264350000 |
|              PWR_PKG_ENERGY              |   PWR0  |    905.0948 |
|              PWR_DRAM_ENERGY             |   PWR3  |    101.1376 |
| FP_ARITH_INST_RETIRED_128B_PACKED_DOUBLE |   PMC0  |  1491691000 |
|    FP_ARITH_INST_RETIRED_SCALAR_DOUBLE   |   PMC1  | 22977730000 |
| FP_ARITH_INST_RETIRED_256B_PACKED_DOUBLE |   PMC2  |  2136422000 |
| FP_ARITH_INST_RETIRED_512B_PACKED_DOUBLE |   PMC3  |           0 |
|               CAS_COUNT_RD               | MBOX0C0 |      308744 |
|               CAS_COUNT_WR               | MBOX0C1 |      148097 |
|               CAS_COUNT_RD               | MBOX1C0 |      324417 |
|               CAS_COUNT_WR               | MBOX1C1 |      143411 |
|               CAS_COUNT_RD               | MBOX2C0 |      307776 |
|               CAS_COUNT_WR               | MBOX2C1 |      140641 |
|               CAS_COUNT_RD               | MBOX3C0 |      323688 |
|               CAS_COUNT_WR               | MBOX3C1 |      153991 |
|               CAS_COUNT_RD               | MBOX4C0 |      313574 |
|               CAS_COUNT_WR               | MBOX4C1 |      156665 |
|               CAS_COUNT_RD               | MBOX5C0 |      320488 |
|               CAS_COUNT_WR               | MBOX5C1 |      148861 |
+------------------------------------------+---------+-------------+

+-----------------------------------+------------+
|               Metric              | HWThread 0 |
+-----------------------------------+------------+
|        Runtime (RDTSC) [s]        |     8.5755 |
|        Runtime unhalted [s]       |     7.9834 |
|            Clock [MHz]            |  3450.4576 |
|                CPI                |     0.4974 |
|             Energy [J]            |   905.0948 |
|             Power [W]             |   105.5437 |
|          Energy DRAM [J]          |   101.1376 |
|           Power DRAM [W]          |    11.7937 |
|            DP [MFLOP/s]           |  4023.8595 |
|          AVX DP [MFLOP/s]         |   996.5180 |
|          Packed [MUOPS/s]         |   423.0765 |
|          Scalar [MUOPS/s]         |  2679.4474 |
|  Memory read bandwidth [MBytes/s] |    14.1701 |
|  Memory read data volume [GBytes] |     0.1215 |
| Memory write bandwidth [MBytes/s] |     6.6546 |
| Memory write data volume [GBytes] |     0.0571 |
|    Memory bandwidth [MBytes/s]    |    20.8246 |
|    Memory data volume [GBytes]    |     0.1786 |
|       Operational intensity       |   193.2260 |
+-----------------------------------+------------+

Region StructuredMesh-flux_shima_etal, Group 1: MEM_DP
+-------------------+------------+
|    Region Info    | HWThread 0 |
+-------------------+------------+
| RDTSC Runtime [s] |  11.995570 |
|     call count    |          1 |
+-------------------+------------+

+------------------------------------------+---------+-------------+
|                   Event                  | Counter |  HWThread 0 |
+------------------------------------------+---------+-------------+
|             INSTR_RETIRED_ANY            |  FIXC0  | 59880320000 |
|           CPU_CLK_UNHALTED_CORE          |  FIXC1  | 28183190000 |
|           CPU_CLK_UNHALTED_REF           |  FIXC2  | 19973190000 |
|              PWR_PKG_ENERGY              |   PWR0  |   1267.7930 |
|              PWR_DRAM_ENERGY             |   PWR3  |    142.3111 |
| FP_ARITH_INST_RETIRED_128B_PACKED_DOUBLE |   PMC0  |  5309685000 |
|    FP_ARITH_INST_RETIRED_SCALAR_DOUBLE   |   PMC1  | 30378350000 |
| FP_ARITH_INST_RETIRED_256B_PACKED_DOUBLE |   PMC2  |  2250528000 |
| FP_ARITH_INST_RETIRED_512B_PACKED_DOUBLE |   PMC3  |           0 |
|               CAS_COUNT_RD               | MBOX0C0 |      336830 |
|               CAS_COUNT_WR               | MBOX0C1 |      128748 |
|               CAS_COUNT_RD               | MBOX1C0 |      341692 |
|               CAS_COUNT_WR               | MBOX1C1 |      126568 |
|               CAS_COUNT_RD               | MBOX2C0 |      337341 |
|               CAS_COUNT_WR               | MBOX2C1 |      126379 |
|               CAS_COUNT_RD               | MBOX3C0 |      363331 |
|               CAS_COUNT_WR               | MBOX3C1 |      139517 |
|               CAS_COUNT_RD               | MBOX4C0 |      348639 |
|               CAS_COUNT_WR               | MBOX4C1 |      139796 |
|               CAS_COUNT_RD               | MBOX5C0 |      344867 |
|               CAS_COUNT_WR               | MBOX5C1 |      130048 |
+------------------------------------------+---------+-------------+

+-----------------------------------+------------+
|               Metric              | HWThread 0 |
+-----------------------------------+------------+
|        Runtime (RDTSC) [s]        |    11.9956 |
|        Runtime unhalted [s]       |    11.7707 |
|            Clock [MHz]            |  3378.5540 |
|                CPI                |     0.4707 |
|             Energy [J]            |  1267.7930 |
|             Power [W]             |   105.6884 |
|          Energy DRAM [J]          |   142.3111 |
|           Power DRAM [W]          |    11.8636 |
|            DP [MFLOP/s]           |  4168.1914 |
|          AVX DP [MFLOP/s]         |   750.4530 |
|          Packed [MUOPS/s]         |   630.2504 |
|          Scalar [MUOPS/s]         |  2532.4641 |
|  Memory read bandwidth [MBytes/s] |    11.0585 |
|  Memory read data volume [GBytes] |     0.1327 |
| Memory write bandwidth [MBytes/s] |     4.2205 |
| Memory write data volume [GBytes] |     0.0506 |
|    Memory bandwidth [MBytes/s]    |    15.2790 |
|    Memory data volume [GBytes]    |     0.1833 |
|       Operational intensity       |   272.8051 |
+-----------------------------------+------------+

Region P4estMesh-flux_shima_etal, Group 1: MEM_DP
+-------------------+------------+
|    Region Info    | HWThread 0 |
+-------------------+------------+
| RDTSC Runtime [s] |  12.245720 |
|     call count    |          1 |
+-------------------+------------+

+------------------------------------------+---------+-------------+
|                   Event                  | Counter |  HWThread 0 |
+------------------------------------------+---------+-------------+
|             INSTR_RETIRED_ANY            |  FIXC0  | 63558740000 |
|           CPU_CLK_UNHALTED_CORE          |  FIXC1  | 30430130000 |
|           CPU_CLK_UNHALTED_REF           |  FIXC2  | 22030870000 |
|              PWR_PKG_ENERGY              |   PWR0  |   1293.1050 |
|              PWR_DRAM_ENERGY             |   PWR3  |    145.5273 |
| FP_ARITH_INST_RETIRED_128B_PACKED_DOUBLE |   PMC0  |  5693176000 |
|    FP_ARITH_INST_RETIRED_SCALAR_DOUBLE   |   PMC1  | 32347250000 |
| FP_ARITH_INST_RETIRED_256B_PACKED_DOUBLE |   PMC2  |  2380867000 |
| FP_ARITH_INST_RETIRED_512B_PACKED_DOUBLE |   PMC3  |           0 |
|               CAS_COUNT_RD               | MBOX0C0 |      310809 |
|               CAS_COUNT_WR               | MBOX0C1 |      123143 |
|               CAS_COUNT_RD               | MBOX1C0 |      313217 |
|               CAS_COUNT_WR               | MBOX1C1 |      124842 |
|               CAS_COUNT_RD               | MBOX2C0 |      311053 |
|               CAS_COUNT_WR               | MBOX2C1 |      122061 |
|               CAS_COUNT_RD               | MBOX3C0 |      317877 |
|               CAS_COUNT_WR               | MBOX3C1 |      136096 |
|               CAS_COUNT_RD               | MBOX4C0 |      316549 |
|               CAS_COUNT_WR               | MBOX4C1 |      131075 |
|               CAS_COUNT_RD               | MBOX5C0 |      315797 |
|               CAS_COUNT_WR               | MBOX5C1 |      134091 |
+------------------------------------------+---------+-------------+

+-----------------------------------+------------+
|               Metric              | HWThread 0 |
+-----------------------------------+------------+
|        Runtime (RDTSC) [s]        |    12.2457 |
|        Runtime unhalted [s]       |    12.7091 |
|            Clock [MHz]            |  3307.1988 |
|                CPI                |     0.4788 |
|             Energy [J]            |  1293.1050 |
|             Power [W]             |   105.5965 |
|          Energy DRAM [J]          |   145.5273 |
|           Power DRAM [W]          |    11.8839 |
|            DP [MFLOP/s]           |  4349.0354 |
|          AVX DP [MFLOP/s]         |   777.6977 |
|          Packed [MUOPS/s]         |   659.3359 |
|          Scalar [MUOPS/s]         |  2641.5147 |
|  Memory read bandwidth [MBytes/s] |     9.8532 |
|  Memory read data volume [GBytes] |     0.1207 |
| Memory write bandwidth [MBytes/s] |     4.0311 |
| Memory write data volume [GBytes] |     0.0494 |
|    Memory bandwidth [MBytes/s]    |    13.8843 |
|    Memory data volume [GBytes]    |     0.1700 |
|       Operational intensity       |   313.2344 |
+-----------------------------------+------------+

Region TreeMesh-flux_ranocha, Group 1: MEM_DP
+-------------------+------------+
|    Region Info    | HWThread 0 |
+-------------------+------------+
| RDTSC Runtime [s] |  18.826150 |
|     call count    |          1 |
+-------------------+------------+

+------------------------------------------+---------+-------------+
|                   Event                  | Counter |  HWThread 0 |
+------------------------------------------+---------+-------------+
|             INSTR_RETIRED_ANY            |  FIXC0  | 85145020000 |
|           CPU_CLK_UNHALTED_CORE          |  FIXC1  | 45855170000 |
|           CPU_CLK_UNHALTED_REF           |  FIXC2  | 32944700000 |
|              PWR_PKG_ENERGY              |   PWR0  |   1985.3640 |
|              PWR_DRAM_ENERGY             |   PWR3  |    224.0630 |
| FP_ARITH_INST_RETIRED_128B_PACKED_DOUBLE |   PMC0  |  2255276000 |
|    FP_ARITH_INST_RETIRED_SCALAR_DOUBLE   |   PMC1  | 44873750000 |
| FP_ARITH_INST_RETIRED_256B_PACKED_DOUBLE |   PMC2  |  2332667000 |
| FP_ARITH_INST_RETIRED_512B_PACKED_DOUBLE |   PMC3  |           0 |
|               CAS_COUNT_RD               | MBOX0C0 |      439844 |
|               CAS_COUNT_WR               | MBOX0C1 |      198002 |
|               CAS_COUNT_RD               | MBOX1C0 |      445444 |
|               CAS_COUNT_WR               | MBOX1C1 |      198441 |
|               CAS_COUNT_RD               | MBOX2C0 |      440258 |
|               CAS_COUNT_WR               | MBOX2C1 |      196931 |
|               CAS_COUNT_RD               | MBOX3C0 |      447373 |
|               CAS_COUNT_WR               | MBOX3C1 |      211234 |
|               CAS_COUNT_RD               | MBOX4C0 |      440174 |
|               CAS_COUNT_WR               | MBOX4C1 |      204792 |
|               CAS_COUNT_RD               | MBOX5C0 |      446229 |
|               CAS_COUNT_WR               | MBOX5C1 |      207040 |
+------------------------------------------+---------+-------------+

+-----------------------------------+------------+
|               Metric              | HWThread 0 |
+-----------------------------------+------------+
|        Runtime (RDTSC) [s]        |    18.8261 |
|        Runtime unhalted [s]       |    19.1514 |
|            Clock [MHz]            |  3332.6591 |
|                CPI                |     0.5386 |
|             Energy [J]            |  1985.3640 |
|             Power [W]             |   105.4578 |
|          Energy DRAM [J]          |   224.0630 |
|           Power DRAM [W]          |    11.9017 |
|            DP [MFLOP/s]           |  3118.7986 |
|          AVX DP [MFLOP/s]         |   495.6227 |
|          Packed [MUOPS/s]         |   243.7005 |
|          Scalar [MUOPS/s]         |  2383.5861 |
|  Memory read bandwidth [MBytes/s] |     9.0404 |
|  Memory read data volume [GBytes] |     0.1702 |
| Memory write bandwidth [MBytes/s] |     4.1353 |
| Memory write data volume [GBytes] |     0.0779 |
|    Memory bandwidth [MBytes/s]    |    13.1758 |
|    Memory data volume [GBytes]    |     0.2480 |
|       Operational intensity       |   236.7074 |
+-----------------------------------+------------+

Region StructuredMesh-flux_ranocha, Group 1: MEM_DP
+-------------------+------------+
|    Region Info    | HWThread 0 |
+-------------------+------------+
| RDTSC Runtime [s] |  22.735760 |
|     call count    |          1 |
+-------------------+------------+

+------------------------------------------+---------+--------------+
|                   Event                  | Counter |  HWThread 0  |
+------------------------------------------+---------+--------------+
|             INSTR_RETIRED_ANY            |  FIXC0  | 102049300000 |
|           CPU_CLK_UNHALTED_CORE          |  FIXC1  |  52965500000 |
|           CPU_CLK_UNHALTED_REF           |  FIXC2  |  37414870000 |
|              PWR_PKG_ENERGY              |   PWR0  |    2397.3460 |
|              PWR_DRAM_ENERGY             |   PWR3  |     269.5991 |
| FP_ARITH_INST_RETIRED_128B_PACKED_DOUBLE |   PMC0  |     32988080 |
|    FP_ARITH_INST_RETIRED_SCALAR_DOUBLE   |   PMC1  |  59301530000 |
| FP_ARITH_INST_RETIRED_256B_PACKED_DOUBLE |   PMC2  |   2231217000 |
| FP_ARITH_INST_RETIRED_512B_PACKED_DOUBLE |   PMC3  |            0 |
|               CAS_COUNT_RD               | MBOX0C0 |       693576 |
|               CAS_COUNT_WR               | MBOX0C1 |       290449 |
|               CAS_COUNT_RD               | MBOX1C0 |       689512 |
|               CAS_COUNT_WR               | MBOX1C1 |       290386 |
|               CAS_COUNT_RD               | MBOX2C0 |       693012 |
|               CAS_COUNT_WR               | MBOX2C1 |       288456 |
|               CAS_COUNT_RD               | MBOX3C0 |       692354 |
|               CAS_COUNT_WR               | MBOX3C1 |       304688 |
|               CAS_COUNT_RD               | MBOX4C0 |       686184 |
|               CAS_COUNT_WR               | MBOX4C1 |       294446 |
|               CAS_COUNT_RD               | MBOX5C0 |       676023 |
|               CAS_COUNT_WR               | MBOX5C1 |       298491 |
+------------------------------------------+---------+--------------+

+-----------------------------------+------------+
|               Metric              | HWThread 0 |
+-----------------------------------+------------+
|        Runtime (RDTSC) [s]        |    22.7358 |
|        Runtime unhalted [s]       |    22.1210 |
|            Clock [MHz]            |  3389.5105 |
|                CPI                |     0.5190 |
|             Energy [J]            |  2397.3460 |
|             Power [W]             |   105.4438 |
|          Energy DRAM [J]          |   269.5991 |
|           Power DRAM [W]          |    11.8579 |
|            DP [MFLOP/s]           |  3003.7427 |
|          AVX DP [MFLOP/s]         |   392.5476 |
|          Packed [MUOPS/s]         |    99.5878 |
|          Scalar [MUOPS/s]         |  2608.2933 |
|  Memory read bandwidth [MBytes/s] |    11.6276 |
|  Memory read data volume [GBytes] |     0.2644 |
| Memory write bandwidth [MBytes/s] |     4.9738 |
| Memory write data volume [GBytes] |     0.1131 |
|    Memory bandwidth [MBytes/s]    |    16.6014 |
|    Memory data volume [GBytes]    |     0.3774 |
|       Operational intensity       |   180.9333 |
+-----------------------------------+------------+

Region P4estMesh-flux_ranocha, Group 1: MEM_DP
+-------------------+------------+
|    Region Info    | HWThread 0 |
+-------------------+------------+
| RDTSC Runtime [s] |  22.840920 |
|     call count    |          1 |
+-------------------+------------+

+------------------------------------------+---------+--------------+
|                   Event                  | Counter |  HWThread 0  |
+------------------------------------------+---------+--------------+
|             INSTR_RETIRED_ANY            |  FIXC0  | 107117600000 |
|           CPU_CLK_UNHALTED_CORE          |  FIXC1  |  55784410000 |
|           CPU_CLK_UNHALTED_REF           |  FIXC2  |  40130010000 |
|              PWR_PKG_ENERGY              |   PWR0  |    2408.6530 |
|              PWR_DRAM_ENERGY             |   PWR3  |     270.0086 |
| FP_ARITH_INST_RETIRED_128B_PACKED_DOUBLE |   PMC0  |     34940060 |
|    FP_ARITH_INST_RETIRED_SCALAR_DOUBLE   |   PMC1  |  62516550000 |
| FP_ARITH_INST_RETIRED_256B_PACKED_DOUBLE |   PMC2  |   2338743000 |
| FP_ARITH_INST_RETIRED_512B_PACKED_DOUBLE |   PMC3  |            0 |
|               CAS_COUNT_RD               | MBOX0C0 |       569627 |
|               CAS_COUNT_WR               | MBOX0C1 |       265460 |
|               CAS_COUNT_RD               | MBOX1C0 |       582768 |
|               CAS_COUNT_WR               | MBOX1C1 |       271799 |
|               CAS_COUNT_RD               | MBOX2C0 |       577773 |
|               CAS_COUNT_WR               | MBOX2C1 |       265922 |
|               CAS_COUNT_RD               | MBOX3C0 |       584479 |
|               CAS_COUNT_WR               | MBOX3C1 |       281503 |
|               CAS_COUNT_RD               | MBOX4C0 |       572941 |
|               CAS_COUNT_WR               | MBOX4C1 |       277122 |
|               CAS_COUNT_RD               | MBOX5C0 |       586125 |
|               CAS_COUNT_WR               | MBOX5C1 |       279040 |
+------------------------------------------+---------+--------------+

+-----------------------------------+------------+
|               Metric              | HWThread 0 |
+-----------------------------------+------------+
|        Runtime (RDTSC) [s]        |    22.8409 |
|        Runtime unhalted [s]       |    23.2983 |
|            Clock [MHz]            |  3328.3709 |
|                CPI                |     0.5208 |
|             Energy [J]            |  2408.6530 |
|             Power [W]             |   105.4534 |
|          Energy DRAM [J]          |   270.0086 |
|           Power DRAM [W]          |    11.8213 |
|            DP [MFLOP/s]           |  3149.6718 |
|          AVX DP [MFLOP/s]         |   409.5707 |
|          Packed [MUOPS/s]         |   103.9224 |
|          Scalar [MUOPS/s]         |  2737.0417 |
|  Memory read bandwidth [MBytes/s] |     9.7333 |
|  Memory read data volume [GBytes] |     0.2223 |
| Memory write bandwidth [MBytes/s] |     4.5976 |
| Memory write data volume [GBytes] |     0.1050 |
|    Memory bandwidth [MBytes/s]    |    14.3309 |
|    Memory data volume [GBytes]    |     0.3273 |
|       Operational intensity       |   219.7813 |
+-----------------------------------+------------+

Third run:

➜  bauerc@cn-0252 trixi  sh measure_volume_terms.sh
--------------------------------------------------------------------------------
CPU name:	Intel(R) Xeon(R) Gold 6148 CPU @ 2.40GHz
CPU type:	Intel Skylake SP processor
CPU clock:	2.39 GHz
Warning: The Marker API requires the application to run on the selected CPUs.
Warning: likwid-perfctr pins the application only when using the -C command line option.
Warning: LIKWID assumes that the application does it before the first instrumented code region is started.
Warning: You can use the string in the environment variable LIKWID_THREADS to pin you application to
Warning: to the CPUs specified after the -c command line option.
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Region TreeMesh-flux_shima_etal, Group 1: MEM_DP
+-------------------+------------+
|    Region Info    | HWThread 0 |
+-------------------+------------+
| RDTSC Runtime [s] |   8.725595 |
|     call count    |          1 |
+-------------------+------------+

+------------------------------------------+---------+-------------+
|                   Event                  | Counter |  HWThread 0 |
+------------------------------------------+---------+-------------+
|             INSTR_RETIRED_ANY            |  FIXC0  | 38707980000 |
|           CPU_CLK_UNHALTED_CORE          |  FIXC1  | 19588980000 |
|           CPU_CLK_UNHALTED_REF           |  FIXC2  | 13630940000 |
|              PWR_PKG_ENERGY              |   PWR0  |    921.5705 |
|              PWR_DRAM_ENERGY             |   PWR3  |    103.6795 |
| FP_ARITH_INST_RETIRED_128B_PACKED_DOUBLE |   PMC0  |  1504480000 |
|    FP_ARITH_INST_RETIRED_SCALAR_DOUBLE   |   PMC1  | 23151120000 |
| FP_ARITH_INST_RETIRED_256B_PACKED_DOUBLE |   PMC2  |  2151197000 |
| FP_ARITH_INST_RETIRED_512B_PACKED_DOUBLE |   PMC3  |           0 |
|               CAS_COUNT_RD               | MBOX0C0 |      215764 |
|               CAS_COUNT_WR               | MBOX0C1 |       82934 |
|               CAS_COUNT_RD               | MBOX1C0 |      218395 |
|               CAS_COUNT_WR               | MBOX1C1 |       83957 |
|               CAS_COUNT_RD               | MBOX2C0 |      209768 |
|               CAS_COUNT_WR               | MBOX2C1 |       82546 |
|               CAS_COUNT_RD               | MBOX3C0 |      218318 |
|               CAS_COUNT_WR               | MBOX3C1 |       86961 |
|               CAS_COUNT_RD               | MBOX4C0 |      209408 |
|               CAS_COUNT_WR               | MBOX4C1 |       82642 |
|               CAS_COUNT_RD               | MBOX5C0 |      213302 |
|               CAS_COUNT_WR               | MBOX5C1 |       84079 |
+------------------------------------------+---------+-------------+

+-----------------------------------+------------+
|               Metric              | HWThread 0 |
+-----------------------------------+------------+
|        Runtime (RDTSC) [s]        |     8.7256 |
|        Runtime unhalted [s]       |     8.1814 |
|            Clock [MHz]            |  3440.8751 |
|                CPI                |     0.5061 |
|             Energy [J]            |   921.5705 |
|             Power [W]             |   105.6169 |
|          Energy DRAM [J]          |   103.6795 |
|           Power DRAM [W]          |    11.8822 |
|            DP [MFLOP/s]           |  3984.2404 |
|          AVX DP [MFLOP/s]         |   986.1549 |
|          Packed [MUOPS/s]         |   418.9602 |
|          Scalar [MUOPS/s]         |  2653.2426 |
|  Memory read bandwidth [MBytes/s] |     9.4248 |
|  Memory read data volume [GBytes] |     0.0822 |
| Memory write bandwidth [MBytes/s] |     3.6902 |
| Memory write data volume [GBytes] |     0.0322 |
|    Memory bandwidth [MBytes/s]    |    13.1151 |
|    Memory data volume [GBytes]    |     0.1144 |
|       Operational intensity       |   303.7912 |
+-----------------------------------+------------+

Region StructuredMesh-flux_shima_etal, Group 1: MEM_DP
+-------------------+------------+
|    Region Info    | HWThread 0 |
+-------------------+------------+
| RDTSC Runtime [s] |  11.993640 |
|     call count    |          1 |
+-------------------+------------+

+------------------------------------------+---------+-------------+
|                   Event                  | Counter |  HWThread 0 |
+------------------------------------------+---------+-------------+
|             INSTR_RETIRED_ANY            |  FIXC0  | 61265840000 |
|           CPU_CLK_UNHALTED_CORE          |  FIXC1  | 28834690000 |
|           CPU_CLK_UNHALTED_REF           |  FIXC2  | 20619160000 |
|              PWR_PKG_ENERGY              |   PWR0  |   1268.1460 |
|              PWR_DRAM_ENERGY             |   PWR3  |    142.0985 |
| FP_ARITH_INST_RETIRED_128B_PACKED_DOUBLE |   PMC0  |  5463958000 |
|    FP_ARITH_INST_RETIRED_SCALAR_DOUBLE   |   PMC1  | 31168770000 |
| FP_ARITH_INST_RETIRED_256B_PACKED_DOUBLE |   PMC2  |  2302836000 |
| FP_ARITH_INST_RETIRED_512B_PACKED_DOUBLE |   PMC3  |           0 |
|               CAS_COUNT_RD               | MBOX0C0 |      266908 |
|               CAS_COUNT_WR               | MBOX0C1 |      110070 |
|               CAS_COUNT_RD               | MBOX1C0 |      272134 |
|               CAS_COUNT_WR               | MBOX1C1 |      111730 |
|               CAS_COUNT_RD               | MBOX2C0 |      279504 |
|               CAS_COUNT_WR               | MBOX2C1 |      110206 |
|               CAS_COUNT_RD               | MBOX3C0 |      271637 |
|               CAS_COUNT_WR               | MBOX3C1 |      120032 |
|               CAS_COUNT_RD               | MBOX4C0 |      269719 |
|               CAS_COUNT_WR               | MBOX4C1 |      114938 |
|               CAS_COUNT_RD               | MBOX5C0 |      278317 |
|               CAS_COUNT_WR               | MBOX5C1 |      116310 |
+------------------------------------------+---------+-------------+

+-----------------------------------+------------+
|               Metric              | HWThread 0 |
+-----------------------------------+------------+
|        Runtime (RDTSC) [s]        |    11.9936 |
|        Runtime unhalted [s]       |    12.0429 |
|            Clock [MHz]            |  3348.3220 |
|                CPI                |     0.4706 |
|             Energy [J]            |  1268.1460 |
|             Power [W]             |   105.7349 |
|          Energy DRAM [J]          |   142.0985 |
|           Power DRAM [W]          |    11.8478 |
|            DP [MFLOP/s]           |  4277.9365 |
|          AVX DP [MFLOP/s]         |   768.0191 |
|          Packed [MUOPS/s]         |   647.5760 |
|          Scalar [MUOPS/s]         |  2598.7749 |
|  Memory read bandwidth [MBytes/s] |     8.7418 |
|  Memory read data volume [GBytes] |     0.1048 |
| Memory write bandwidth [MBytes/s] |     3.6461 |
| Memory write data volume [GBytes] |     0.0437 |
|    Memory bandwidth [MBytes/s]    |    12.3879 |
|    Memory data volume [GBytes]    |     0.1486 |
|       Operational intensity       |   345.3311 |
+-----------------------------------+------------+

Region P4estMesh-flux_shima_etal, Group 1: MEM_DP
+-------------------+------------+
|    Region Info    | HWThread 0 |
+-------------------+------------+
| RDTSC Runtime [s] |  12.353050 |
|     call count    |          1 |
+-------------------+------------+

+------------------------------------------+---------+-------------+
|                   Event                  | Counter |  HWThread 0 |
+------------------------------------------+---------+-------------+
|             INSTR_RETIRED_ANY            |  FIXC0  | 57952220000 |
|           CPU_CLK_UNHALTED_CORE          |  FIXC1  | 27994570000 |
|           CPU_CLK_UNHALTED_REF           |  FIXC2  | 19553540000 |
|              PWR_PKG_ENERGY              |   PWR0  |   1304.4400 |
|              PWR_DRAM_ENERGY             |   PWR3  |    147.0310 |
| FP_ARITH_INST_RETIRED_128B_PACKED_DOUBLE |   PMC0  |  5072629000 |
|    FP_ARITH_INST_RETIRED_SCALAR_DOUBLE   |   PMC1  | 29164250000 |
| FP_ARITH_INST_RETIRED_256B_PACKED_DOUBLE |   PMC2  |  2170372000 |
| FP_ARITH_INST_RETIRED_512B_PACKED_DOUBLE |   PMC3  |           0 |
|               CAS_COUNT_RD               | MBOX0C0 |      303436 |
|               CAS_COUNT_WR               | MBOX0C1 |      123792 |
|               CAS_COUNT_RD               | MBOX1C0 |      308739 |
|               CAS_COUNT_WR               | MBOX1C1 |      123849 |
|               CAS_COUNT_RD               | MBOX2C0 |      310356 |
|               CAS_COUNT_WR               | MBOX2C1 |      124534 |
|               CAS_COUNT_RD               | MBOX3C0 |      311919 |
|               CAS_COUNT_WR               | MBOX3C1 |      136154 |
|               CAS_COUNT_RD               | MBOX4C0 |      304179 |
|               CAS_COUNT_WR               | MBOX4C1 |      129219 |
|               CAS_COUNT_RD               | MBOX5C0 |      307133 |
|               CAS_COUNT_WR               | MBOX5C1 |      129768 |
+------------------------------------------+---------+-------------+

+-----------------------------------+------------+
|               Metric              | HWThread 0 |
+-----------------------------------+------------+
|        Runtime (RDTSC) [s]        |    12.3530 |
|        Runtime unhalted [s]       |    11.6921 |
|            Clock [MHz]            |  3427.9249 |
|                CPI                |     0.4831 |
|             Energy [J]            |  1304.4400 |
|             Power [W]             |   105.5966 |
|          Energy DRAM [J]          |   147.0310 |
|           Power DRAM [W]          |    11.9024 |
|            DP [MFLOP/s]           |  3884.9512 |
|          AVX DP [MFLOP/s]         |   702.7809 |
|          Packed [MUOPS/s]         |   586.3330 |
|          Scalar [MUOPS/s]         |  2360.8947 |
|  Memory read bandwidth [MBytes/s] |     9.5627 |
|  Memory read data volume [GBytes] |     0.1181 |
| Memory write bandwidth [MBytes/s] |     3.9754 |
| Memory write data volume [GBytes] |     0.0491 |
|    Memory bandwidth [MBytes/s]    |    13.5381 |
|    Memory data volume [GBytes]    |     0.1672 |
|       Operational intensity       |   286.9640 |
+-----------------------------------+------------+

Region TreeMesh-flux_ranocha, Group 1: MEM_DP
+-------------------+------------+
|    Region Info    | HWThread 0 |
+-------------------+------------+
| RDTSC Runtime [s] |  18.434140 |
|     call count    |          1 |
+-------------------+------------+

+------------------------------------------+---------+-------------+
|                   Event                  | Counter |  HWThread 0 |
+------------------------------------------+---------+-------------+
|             INSTR_RETIRED_ANY            |  FIXC0  | 84665060000 |
|           CPU_CLK_UNHALTED_CORE          |  FIXC1  | 44648690000 |
|           CPU_CLK_UNHALTED_REF           |  FIXC2  | 32012370000 |
|              PWR_PKG_ENERGY              |   PWR0  |   1946.1120 |
|              PWR_DRAM_ENERGY             |   PWR3  |    218.6995 |
| FP_ARITH_INST_RETIRED_128B_PACKED_DOUBLE |   PMC0  |  2239664000 |
|    FP_ARITH_INST_RETIRED_SCALAR_DOUBLE   |   PMC1  | 44588070000 |
| FP_ARITH_INST_RETIRED_256B_PACKED_DOUBLE |   PMC2  |  2319154000 |
| FP_ARITH_INST_RETIRED_512B_PACKED_DOUBLE |   PMC3  |           0 |
|               CAS_COUNT_RD               | MBOX0C0 |      377215 |
|               CAS_COUNT_WR               | MBOX0C1 |      149731 |
|               CAS_COUNT_RD               | MBOX1C0 |      378855 |
|               CAS_COUNT_WR               | MBOX1C1 |      150295 |
|               CAS_COUNT_RD               | MBOX2C0 |      373844 |
|               CAS_COUNT_WR               | MBOX2C1 |      151703 |
|               CAS_COUNT_RD               | MBOX3C0 |      398949 |
|               CAS_COUNT_WR               | MBOX3C1 |      167538 |
|               CAS_COUNT_RD               | MBOX4C0 |      376333 |
|               CAS_COUNT_WR               | MBOX4C1 |      159173 |
|               CAS_COUNT_RD               | MBOX5C0 |      382310 |
|               CAS_COUNT_WR               | MBOX5C1 |      159657 |
+------------------------------------------+---------+-------------+

+-----------------------------------+------------+
|               Metric              | HWThread 0 |
+-----------------------------------+------------+
|        Runtime (RDTSC) [s]        |    18.4341 |
|        Runtime unhalted [s]       |    18.6477 |
|            Clock [MHz]            |  3339.4411 |
|                CPI                |     0.5274 |
|             Energy [J]            |  1946.1120 |
|             Power [W]             |   105.5711 |
|          Energy DRAM [J]          |   218.6995 |
|           Power DRAM [W]          |    11.8638 |
|            DP [MFLOP/s]           |  3164.9979 |
|          AVX DP [MFLOP/s]         |   503.2302 |
|          Packed [MUOPS/s]         |   247.3030 |
|          Scalar [MUOPS/s]         |  2418.7768 |
|  Memory read bandwidth [MBytes/s] |     7.9418 |
|  Memory read data volume [GBytes] |     0.1464 |
| Memory write bandwidth [MBytes/s] |     3.2569 |
| Memory write data volume [GBytes] |     0.0600 |
|    Memory bandwidth [MBytes/s]    |    11.1987 |
|    Memory data volume [GBytes]    |     0.2064 |
|       Operational intensity       |   282.6216 |
+-----------------------------------+------------+

Region StructuredMesh-flux_ranocha, Group 1: MEM_DP
+-------------------+------------+
|    Region Info    | HWThread 0 |
+-------------------+------------+
| RDTSC Runtime [s] |  22.634730 |
|     call count    |          1 |
+-------------------+------------+

+------------------------------------------+---------+--------------+
|                   Event                  | Counter |  HWThread 0  |
+------------------------------------------+---------+--------------+
|             INSTR_RETIRED_ANY            |  FIXC0  | 101752100000 |
|           CPU_CLK_UNHALTED_CORE          |  FIXC1  |  52588420000 |
|           CPU_CLK_UNHALTED_REF           |  FIXC2  |  37103250000 |
|              PWR_PKG_ENERGY              |   PWR0  |    2387.7970 |
|              PWR_DRAM_ENERGY             |   PWR3  |     267.5572 |
| FP_ARITH_INST_RETIRED_128B_PACKED_DOUBLE |   PMC0  |     32847910 |
|    FP_ARITH_INST_RETIRED_SCALAR_DOUBLE   |   PMC1  |  59114870000 |
| FP_ARITH_INST_RETIRED_256B_PACKED_DOUBLE |   PMC2  |   2224710000 |
| FP_ARITH_INST_RETIRED_512B_PACKED_DOUBLE |   PMC3  |            0 |
|               CAS_COUNT_RD               | MBOX0C0 |       511294 |
|               CAS_COUNT_WR               | MBOX0C1 |       203548 |
|               CAS_COUNT_RD               | MBOX1C0 |       513101 |
|               CAS_COUNT_WR               | MBOX1C1 |       201421 |
|               CAS_COUNT_RD               | MBOX2C0 |       528971 |
|               CAS_COUNT_WR               | MBOX2C1 |       207988 |
|               CAS_COUNT_RD               | MBOX3C0 |       531193 |
|               CAS_COUNT_WR               | MBOX3C1 |       222918 |
|               CAS_COUNT_RD               | MBOX4C0 |       510249 |
|               CAS_COUNT_WR               | MBOX4C1 |       211596 |
|               CAS_COUNT_RD               | MBOX5C0 |       516611 |
|               CAS_COUNT_WR               | MBOX5C1 |       215289 |
+------------------------------------------+---------+--------------+

+-----------------------------------+------------+
|               Metric              | HWThread 0 |
+-----------------------------------+------------+
|        Runtime (RDTSC) [s]        |    22.6347 |
|        Runtime unhalted [s]       |    21.9638 |
|            Clock [MHz]            |  3393.6032 |
|                CPI                |     0.5168 |
|             Energy [J]            |  2387.7970 |
|             Power [W]             |   105.4926 |
|          Energy DRAM [J]          |   267.5572 |
|           Power DRAM [W]          |    11.8206 |
|            DP [MFLOP/s]           |  3007.7410 |
|          AVX DP [MFLOP/s]         |   393.1498 |
|          Packed [MUOPS/s]         |    99.7387 |
|          Scalar [MUOPS/s]         |  2611.6888 |
|  Memory read bandwidth [MBytes/s] |     8.7976 |
|  Memory read data volume [GBytes] |     0.1991 |
| Memory write bandwidth [MBytes/s] |     3.5705 |
| Memory write data volume [GBytes] |     0.0808 |
|    Memory bandwidth [MBytes/s]    |    12.3680 |
|    Memory data volume [GBytes]    |     0.2799 |
|       Operational intensity       |   243.1864 |
+-----------------------------------+------------+

Region P4estMesh-flux_ranocha, Group 1: MEM_DP
+-------------------+------------+
|    Region Info    | HWThread 0 |
+-------------------+------------+
| RDTSC Runtime [s] |  22.935040 |
|     call count    |          1 |
+-------------------+------------+

+------------------------------------------+---------+--------------+
|                   Event                  | Counter |  HWThread 0  |
+------------------------------------------+---------+--------------+
|             INSTR_RETIRED_ANY            |  FIXC0  | 104604700000 |
|           CPU_CLK_UNHALTED_CORE          |  FIXC1  |  54691290000 |
|           CPU_CLK_UNHALTED_REF           |  FIXC2  |  38983070000 |
|              PWR_PKG_ENERGY              |   PWR0  |    2419.2990 |
|              PWR_DRAM_ENERGY             |   PWR3  |     271.8944 |
| FP_ARITH_INST_RETIRED_128B_PACKED_DOUBLE |   PMC0  |     33946040 |
|    FP_ARITH_INST_RETIRED_SCALAR_DOUBLE   |   PMC1  |  60862940000 |
| FP_ARITH_INST_RETIRED_256B_PACKED_DOUBLE |   PMC2  |   2283535000 |
| FP_ARITH_INST_RETIRED_512B_PACKED_DOUBLE |   PMC3  |            0 |
|               CAS_COUNT_RD               | MBOX0C0 |       565572 |
|               CAS_COUNT_WR               | MBOX0C1 |       241015 |
|               CAS_COUNT_RD               | MBOX1C0 |       577185 |
|               CAS_COUNT_WR               | MBOX1C1 |       242388 |
|               CAS_COUNT_RD               | MBOX2C0 |       572913 |
|               CAS_COUNT_WR               | MBOX2C1 |       245540 |
|               CAS_COUNT_RD               | MBOX3C0 |       579170 |
|               CAS_COUNT_WR               | MBOX3C1 |       265873 |
|               CAS_COUNT_RD               | MBOX4C0 |       569190 |
|               CAS_COUNT_WR               | MBOX4C1 |       254182 |
|               CAS_COUNT_RD               | MBOX5C0 |       569354 |
|               CAS_COUNT_WR               | MBOX5C1 |       254550 |
+------------------------------------------+---------+--------------+

+-----------------------------------+------------+
|               Metric              | HWThread 0 |
+-----------------------------------+------------+
|        Runtime (RDTSC) [s]        |    22.9350 |
|        Runtime unhalted [s]       |    22.8421 |
|            Clock [MHz]            |  3359.1162 |
|                CPI                |     0.5228 |
|             Energy [J]            |  2419.2990 |
|             Power [W]             |   105.4848 |
|          Energy DRAM [J]          |   271.8944 |
|           Power DRAM [W]          |    11.8550 |
|            DP [MFLOP/s]           |  3054.9313 |
|          AVX DP [MFLOP/s]         |   398.2614 |
|          Packed [MUOPS/s]         |   101.0454 |
|          Scalar [MUOPS/s]         |  2653.7098 |
|  Memory read bandwidth [MBytes/s] |     9.5808 |
|  Memory read data volume [GBytes] |     0.2197 |
| Memory write bandwidth [MBytes/s] |     4.1956 |
| Memory write data volume [GBytes] |     0.0962 |
|    Memory bandwidth [MBytes/s]    |    13.7765 |
|    Memory data volume [GBytes]    |     0.3160 |
|       Operational intensity       |   221.7501 |
+-----------------------------------+------------+

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment