Skip to content

Instantly share code, notes, and snippets.

// Imported ONNX:
// torch-mlir-opt input.mlir
module {
func.func @test_transpose_default(%arg0: !torch.vtensor<[2,3,4],f32>) -> !torch.vtensor<[4,3,2],f32> attributes {torch.onnx_meta.ir_version = 7 : si64, torch.onnx_meta.opset_version = 13 : si64} {
%0 = torch.operator "onnx.Transpose"(%arg0) : (!torch.vtensor<[2,3,4],f32>) -> !torch.vtensor<[4,3,2],f32>
return %0 : !torch.vtensor<[4,3,2],f32>
}
}
// -----
@rsuderman
rsuderman / test.py
Created January 31, 2024 19:47
Useful ONNX / Torch Test script
from pathlib import Path
import io
import onnx
import torch
from torch._export import capture_pre_autograd_graph
from torch.export import export, ExportedProgram
func.func @prefill_bs4$async_dispatch_1_generic_4xDx3200_i64xf32() attributes {translation_info = #iree_codegen.translation_info<CPUDoubleTilingExpert>} {
%c32_i64 = arith.constant 32 : i64
%c0 = arith.constant 0 : index
%c3200 = arith.constant 3200 : index
%c1 = arith.constant 1 : index
%c4 = arith.constant 4 : index
%c2 = arith.constant 2 : index
%c64 = arith.constant 64 : index
%cst = arith.constant 0.000000e+00 : f16
%c0_i64 = arith.constant 0 : i64
func.func @decode_bs4$async_dispatch_2_elementwise_32000x3200_f16xf32() attributes {translation_info = #iree_codegen.translation_info<CPUDoubleTilingExpert>} {
%c1664 = arith.constant 1664 : index
%c0 = arith.constant 0 : index
%c4 = arith.constant 4 : index
%c1 = arith.constant 1 : index
%cst = arith.constant 0.000000e+00 : f16
%c0_i64 = arith.constant 0 : i64
%cst_0 = arith.constant dense<[0, 1, 2, 3]> : vector<4xindex>
%cst_1 = arith.constant dense<3200> : vector<4x1x1xindex>
%cst_2 = arith.constant dense<0.000000e+00> : vector<1x1x4xf32>
1. Build torch-mlir and with python bindings (setup PYTHON_PATH)
https://github.com/llvm/torch-mlir/blob/main/docs/development.md
2. Import onnx file to mlir
python3 -m torch_mlir.tools.import_onnx model.onnx -o /tmp/onnx.mlir
3. Use `torch-mlir-opt` to convert onnx operators to torch operators
This file has been truncated, but you can view the full file.
#map = affine_map<(d0, d1, d2) -> (d1, d2)>
#map1 = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
module @module {
util.global private @__auto.constant_2048_50_torch.complex64 = dense_resource<__auto.constant_2048_50_torch.complex64> : tensor<2048x50xcomplex<f32>>
util.global private @__auto.token_embd.weight = #stream.parameter.named<"model"::"token_embd.weight"> : tensor<32000x3200xf16>
util.global private @__auto.blk.0.attn_norm.weight = #stream.parameter.named<"model"::"blk.0.attn_norm.weight"> : tensor<3200xf32>
util.global private @__auto.blk.0.attn_q.weight = #stream.parameter.named<"model"::"blk.0.attn_q.weight"> : tensor<3200x3200xf16>
util.global private @__auto.blk.0.attn_k.weight = #stream.parameter.named<"model"::"blk.0.attn_k.weight"> : tensor<3200x3200xf16>
util.global private @__auto.blk.0.attn_v.weight = #stream.parameter.named<"model"::"blk.0.attn_v.weight"> : tensor<3200x3200xf16>
util.global private @__auto.blk.0.attn_output.weight = #stream.parameter.named<"model"::"blk.0.attn_ou
#map0 = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
#map1 = affine_map<(d0, d1, d2) -> ()>
func.func private @broadcast_scale_widen(
%value : tensor<4x64x96xf16>, %scale : tensor<f32>) -> tensor<4x64x96xf32> {
%empty_f32 = tensor.empty() : tensor<4x64x96xf32>
%scaled = linalg.generic {indexing_maps = [#map0, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel"]}
#map0 = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
#map1 = affine_map<(d0, d1, d2) -> ()>
func.func private @broadcast_scale_widen(
%value : tensor<4x64x96xf8E4M3FNUZ>, %scale : tensor<f32>) -> tensor<4x64x96xf32> {
%empty_f32 = tensor.empty() : tensor<4x64x96xf32>
%scaled = linalg.generic {indexing_maps = [#map0, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel"]}
#map = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
#map1 = affine_map<(d0, d1, d2) -> (d0, d2, d1)>
#map2 = affine_map<(d0, d1, d2) -> ()>
#map3 = affine_map<(d0, d1, d2) -> (d0, d1)>
#map4 = affine_map<(d0, d1, d2) -> (d0, d1, 0)>
module @module {
ml_program.global private mutable @global_seed(dense<0> : tensor<i64>) : tensor<i64>
func.func @main(%arg0: tensor<4x64x32xf8E4M3FNUZ>, %arg1: tensor<4x64x32xf8E4M3FNUZ>, %arg2: tensor<4x64x32xf8E4M3FNUZ>, %arg3: tensor<f32>, %arg4: tensor<f32>, %arg5: tensor<f32>, %arg6: tensor<f32>) -> tensor<4x64x32xf8E4M3FNUZ> {
%cst = arith.constant 0.000000e+00 : f32
%c0_i64 = arith.constant 0 : i64
@rsuderman
rsuderman / gist:ca2dbf8d998e34c4880a51fb94fceb85
Last active June 10, 2024 21:10
Matmul per channel quant
import matplotlib.pyplot as plt
import torch
A_SHAPE = (8, 128)
B_SHAPE = (16, 128)
torch.manual_seed(12345)
A_QUANT = torch.rand((A_SHAPE[0],1), dtype=torch.float)
B_QUANT = torch.rand((B_SHAPE[0],1), dtype=torch.float)