Created
July 9, 2024 00:04
-
-
Save yzhang93/456640440608e48550308bf87245523c to your computer and use it in GitHub Desktop.
deeplab_dispatches
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- // | |
util.func public @jit_eval_0(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_0(%input0: tensor<576xi8>) -> (%output0: tensor<576xf32>)"}} { | |
%cst = arith.constant 1.562500e-02 : f32 | |
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<576xi8> | |
%1 = tensor.empty() : tensor<576xf32> | |
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<576xi8>) outs(%1 : tensor<576xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%4 = arith.extsi %in : i8 to i32 | |
%5 = arith.sitofp %4 : i32 to f32 | |
%6 = arith.mulf %5, %cst : f32 | |
linalg.yield %6 : f32 | |
} -> tensor<576xf32> | |
%3 = hal.tensor.export %2 "output0" : tensor<576xf32> -> !hal.buffer_view | |
util.return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- // | |
util.func public @jit_eval_3(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_3(%input0: tensor<192xi8>) -> (%output0: tensor<192xf32>)"}} { | |
%cst = arith.constant 3.125000e-02 : f32 | |
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<192xi8> | |
%1 = tensor.empty() : tensor<192xf32> | |
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<192xi8>) outs(%1 : tensor<192xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%4 = arith.extsi %in : i8 to i32 | |
%5 = arith.sitofp %4 : i32 to f32 | |
%6 = arith.mulf %5, %cst : f32 | |
linalg.yield %6 : f32 | |
} -> tensor<192xf32> | |
%3 = hal.tensor.export %2 "output0" : tensor<192xf32> -> !hal.buffer_view | |
util.return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- // | |
util.func public @jit_eval_10(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_10(%input0: tensor<96x3x3xi8>) -> (%output0: tensor<96x3x3xf32>)"}} { | |
%cst = arith.constant 3.125000e-02 : f32 | |
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<96x3x3xi8> | |
%1 = tensor.empty() : tensor<96x3x3xf32> | |
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%0 : tensor<96x3x3xi8>) outs(%1 : tensor<96x3x3xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%4 = arith.extsi %in : i8 to i32 | |
%5 = arith.sitofp %4 : i32 to f32 | |
%6 = arith.mulf %5, %cst : f32 | |
linalg.yield %6 : f32 | |
} -> tensor<96x3x3xf32> | |
%3 = hal.tensor.export %2 "output0" : tensor<96x3x3xf32> -> !hal.buffer_view | |
util.return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- // | |
util.func public @jit_eval_2(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_2(%input0: tensor<144xi8>) -> (%output0: tensor<144xf32>)"}} { | |
%cst = arith.constant 3.125000e-02 : f32 | |
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<144xi8> | |
%1 = tensor.empty() : tensor<144xf32> | |
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<144xi8>) outs(%1 : tensor<144xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%4 = arith.extsi %in : i8 to i32 | |
%5 = arith.sitofp %4 : i32 to f32 | |
%6 = arith.mulf %5, %cst : f32 | |
linalg.yield %6 : f32 | |
} -> tensor<144xf32> | |
%3 = hal.tensor.export %2 "output0" : tensor<144xf32> -> !hal.buffer_view | |
util.return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- // | |
util.func public @jit_eval_1(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_1(%input0: tensor<960xi8>) -> (%output0: tensor<960xf32>)"}} { | |
%cst = arith.constant 3.125000e-02 : f32 | |
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<960xi8> | |
%1 = tensor.empty() : tensor<960xf32> | |
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<960xi8>) outs(%1 : tensor<960xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%4 = arith.extsi %in : i8 to i32 | |
%5 = arith.sitofp %4 : i32 to f32 | |
%6 = arith.mulf %5, %cst : f32 | |
linalg.yield %6 : f32 | |
} -> tensor<960xf32> | |
%3 = hal.tensor.export %2 "output0" : tensor<960xf32> -> !hal.buffer_view | |
util.return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- // | |
util.func public @jit_eval_4(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_4(%input0: tensor<384xi8>) -> (%output0: tensor<384xf32>)"}} { | |
%cst = arith.constant 3.125000e-02 : f32 | |
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<384xi8> | |
%1 = tensor.empty() : tensor<384xf32> | |
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<384xi8>) outs(%1 : tensor<384xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%4 = arith.extsi %in : i8 to i32 | |
%5 = arith.sitofp %4 : i32 to f32 | |
%6 = arith.mulf %5, %cst : f32 | |
linalg.yield %6 : f32 | |
} -> tensor<384xf32> | |
%3 = hal.tensor.export %2 "output0" : tensor<384xf32> -> !hal.buffer_view | |
util.return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- // | |
util.func public @jit_eval_6(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_6(%input0: tensor<32xi8>) -> (%output0: tensor<32xf32>)"}} { | |
%cst = arith.constant 3.125000e-02 : f32 | |
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<32xi8> | |
%1 = tensor.empty() : tensor<32xf32> | |
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<32xi8>) outs(%1 : tensor<32xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%4 = arith.extsi %in : i8 to i32 | |
%5 = arith.sitofp %4 : i32 to f32 | |
%6 = arith.mulf %5, %cst : f32 | |
linalg.yield %6 : f32 | |
} -> tensor<32xf32> | |
%3 = hal.tensor.export %2 "output0" : tensor<32xf32> -> !hal.buffer_view | |
util.return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- // | |
util.func public @jit_eval_7(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_7(%input0: tensor<32x3x3x3xi8>) -> (%output0: tensor<3x3x3x32xf32>)"}} { | |
%cst = arith.constant 3.125000e-02 : f32 | |
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<32x3x3x3xi8> | |
%1 = tensor.empty() : tensor<3x3x3x32xf32> | |
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d2, d0, d1)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<32x3x3x3xi8>) outs(%1 : tensor<3x3x3x32xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%4 = arith.extsi %in : i8 to i32 | |
%5 = arith.sitofp %4 : i32 to f32 | |
%6 = arith.mulf %5, %cst : f32 | |
linalg.yield %6 : f32 | |
} -> tensor<3x3x3x32xf32> | |
%3 = hal.tensor.export %2 "output0" : tensor<3x3x3x32xf32> -> !hal.buffer_view | |
util.return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- // | |
util.func public @jit_eval(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval(%input0: tensor<96xi8>) -> (%output0: tensor<96xf32>)"}} { | |
%cst = arith.constant 3.125000e-02 : f32 | |
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<96xi8> | |
%1 = tensor.empty() : tensor<96xf32> | |
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<96xi8>) outs(%1 : tensor<96xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%4 = arith.extsi %in : i8 to i32 | |
%5 = arith.sitofp %4 : i32 to f32 | |
%6 = arith.mulf %5, %cst : f32 | |
linalg.yield %6 : f32 | |
} -> tensor<96xf32> | |
%3 = hal.tensor.export %2 "output0" : tensor<96xf32> -> !hal.buffer_view | |
util.return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- // | |
util.func public @jit_eval_5(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_5(%input0: tensor<256xi8>) -> (%output0: tensor<256xf32>)"}} { | |
%cst = arith.constant 9.765625E-4 : f32 | |
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<256xi8> | |
%1 = tensor.empty() : tensor<256xf32> | |
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<256xi8>) outs(%1 : tensor<256xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%4 = arith.extsi %in : i8 to i32 | |
%5 = arith.sitofp %4 : i32 to f32 | |
%6 = arith.mulf %5, %cst : f32 | |
linalg.yield %6 : f32 | |
} -> tensor<256xf32> | |
%3 = hal.tensor.export %2 "output0" : tensor<256xf32> -> !hal.buffer_view | |
util.return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- // | |
util.func public @jit_eval_9(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_9(%input0: tensor<16xi8>) -> (%output0: tensor<16xf32>)"}} { | |
%cst = arith.constant 2.500000e-01 : f32 | |
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<16xi8> | |
%1 = tensor.empty() : tensor<16xf32> | |
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<16xi8>) outs(%1 : tensor<16xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%4 = arith.extsi %in : i8 to i32 | |
%5 = arith.sitofp %4 : i32 to f32 | |
%6 = arith.mulf %5, %cst : f32 | |
linalg.yield %6 : f32 | |
} -> tensor<16xf32> | |
%3 = hal.tensor.export %2 "output0" : tensor<16xf32> -> !hal.buffer_view | |
util.return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- // | |
util.func public @jit_eval_11(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_11(%input0: tensor<24xi8>) -> (%output0: tensor<24xf32>)"}} { | |
%cst = arith.constant 5.000000e-01 : f32 | |
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<24xi8> | |
%1 = tensor.empty() : tensor<24xf32> | |
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<24xi8>) outs(%1 : tensor<24xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%4 = arith.extsi %in : i8 to i32 | |
%5 = arith.sitofp %4 : i32 to f32 | |
%6 = arith.mulf %5, %cst : f32 | |
linalg.yield %6 : f32 | |
} -> tensor<24xf32> | |
%3 = hal.tensor.export %2 "output0" : tensor<24xf32> -> !hal.buffer_view | |
util.return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- // | |
util.func public @jit_eval_8(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_8(%input0: tensor<32x3x3xi8>) -> (%output0: tensor<32x3x3xf32>)"}} { | |
%cst = arith.constant 5.000000e-01 : f32 | |
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<32x3x3xi8> | |
%1 = tensor.empty() : tensor<32x3x3xf32> | |
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%0 : tensor<32x3x3xi8>) outs(%1 : tensor<32x3x3xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%4 = arith.extsi %in : i8 to i32 | |
%5 = arith.sitofp %4 : i32 to f32 | |
%6 = arith.mulf %5, %cst : f32 | |
linalg.yield %6 : f32 | |
} -> tensor<32x3x3xf32> | |
%3 = hal.tensor.export %2 "output0" : tensor<32x3x3xf32> -> !hal.buffer_view | |
util.return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- // | |
util.func public @jit_eval_12(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_12(%input0: tensor<384x3x3xi8>) -> (%output0: tensor<384x3x3xf32>)"}} { | |
%cst = arith.constant 6.250000e-02 : f32 | |
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<384x3x3xi8> | |
%1 = tensor.empty() : tensor<384x3x3xf32> | |
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%0 : tensor<384x3x3xi8>) outs(%1 : tensor<384x3x3xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%4 = arith.extsi %in : i8 to i32 | |
%5 = arith.sitofp %4 : i32 to f32 | |
%6 = arith.mulf %5, %cst : f32 | |
linalg.yield %6 : f32 | |
} -> tensor<384x3x3xf32> | |
%3 = hal.tensor.export %2 "output0" : tensor<384x3x3xf32> -> !hal.buffer_view | |
util.return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- // | |
util.func public @jit_eval_13(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_13(%input0: tensor<384xi8>) -> (%output0: tensor<384xf32>)"}} { | |
%cst = arith.constant 3.125000e-02 : f32 | |
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<384xi8> | |
%1 = tensor.empty() : tensor<384xf32> | |
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<384xi8>) outs(%1 : tensor<384xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%4 = arith.extsi %in : i8 to i32 | |
%5 = arith.sitofp %4 : i32 to f32 | |
%6 = arith.mulf %5, %cst : f32 | |
linalg.yield %6 : f32 | |
} -> tensor<384xf32> | |
%3 = hal.tensor.export %2 "output0" : tensor<384xf32> -> !hal.buffer_view | |
util.return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- // | |
util.func public @jit_eval_18(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_18(%input0: tensor<576xi8>) -> (%output0: tensor<576xf32>)"}} { | |
%cst = arith.constant 3.125000e-02 : f32 | |
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<576xi8> | |
%1 = tensor.empty() : tensor<576xf32> | |
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<576xi8>) outs(%1 : tensor<576xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%4 = arith.extsi %in : i8 to i32 | |
%5 = arith.sitofp %4 : i32 to f32 | |
%6 = arith.mulf %5, %cst : f32 | |
linalg.yield %6 : f32 | |
} -> tensor<576xf32> | |
%3 = hal.tensor.export %2 "output0" : tensor<576xf32> -> !hal.buffer_view | |
util.return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- // | |
util.func public @jit_eval_21(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_21(%input0: tensor<576xi8>) -> (%output0: tensor<576xf32>)"}} { | |
%cst = arith.constant 3.125000e-02 : f32 | |
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<576xi8> | |
%1 = tensor.empty() : tensor<576xf32> | |
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<576xi8>) outs(%1 : tensor<576xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%4 = arith.extsi %in : i8 to i32 | |
%5 = arith.sitofp %4 : i32 to f32 | |
%6 = arith.mulf %5, %cst : f32 | |
linalg.yield %6 : f32 | |
} -> tensor<576xf32> | |
%3 = hal.tensor.export %2 "output0" : tensor<576xf32> -> !hal.buffer_view | |
util.return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- // | |
util.func public @jit_eval_14(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_14(%input0: tensor<96xi8>) -> (%output0: tensor<96xf32>)"}} { | |
%cst = arith.constant 1.250000e-01 : f32 | |
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<96xi8> | |
%1 = tensor.empty() : tensor<96xf32> | |
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<96xi8>) outs(%1 : tensor<96xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%4 = arith.extsi %in : i8 to i32 | |
%5 = arith.sitofp %4 : i32 to f32 | |
%6 = arith.mulf %5, %cst : f32 | |
linalg.yield %6 : f32 | |
} -> tensor<96xf32> | |
%3 = hal.tensor.export %2 "output0" : tensor<96xf32> -> !hal.buffer_view | |
util.return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- // | |
util.func public @jit_eval_23(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_23(%input0: tensor<960x3x3xi8>) -> (%output0: tensor<960x3x3xf32>)"}} { | |
%cst = arith.constant 6.250000e-02 : f32 | |
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<960x3x3xi8> | |
%1 = tensor.empty() : tensor<960x3x3xf32> | |
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%0 : tensor<960x3x3xi8>) outs(%1 : tensor<960x3x3xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%4 = arith.extsi %in : i8 to i32 | |
%5 = arith.sitofp %4 : i32 to f32 | |
%6 = arith.mulf %5, %cst : f32 | |
linalg.yield %6 : f32 | |
} -> tensor<960x3x3xf32> | |
%3 = hal.tensor.export %2 "output0" : tensor<960x3x3xf32> -> !hal.buffer_view | |
util.return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- // | |
util.func public @jit_eval_15(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_15(%input0: tensor<576x3x3xi8>) -> (%output0: tensor<576x3x3xf32>)"}} { | |
%cst = arith.constant 6.250000e-02 : f32 | |
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<576x3x3xi8> | |
%1 = tensor.empty() : tensor<576x3x3xf32> | |
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%0 : tensor<576x3x3xi8>) outs(%1 : tensor<576x3x3xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%4 = arith.extsi %in : i8 to i32 | |
%5 = arith.sitofp %4 : i32 to f32 | |
%6 = arith.mulf %5, %cst : f32 | |
linalg.yield %6 : f32 | |
} -> tensor<576x3x3xf32> | |
%3 = hal.tensor.export %2 "output0" : tensor<576x3x3xf32> -> !hal.buffer_view | |
util.return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- // | |
util.func public @jit_eval_19(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_19(%input0: tensor<96xi8>) -> (%output0: tensor<96xf32>)"}} { | |
%cst = arith.constant 1.250000e-01 : f32 | |
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<96xi8> | |
%1 = tensor.empty() : tensor<96xf32> | |
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<96xi8>) outs(%1 : tensor<96xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%4 = arith.extsi %in : i8 to i32 | |
%5 = arith.sitofp %4 : i32 to f32 | |
%6 = arith.mulf %5, %cst : f32 | |
linalg.yield %6 : f32 | |
} -> tensor<96xf32> | |
%3 = hal.tensor.export %2 "output0" : tensor<96xf32> -> !hal.buffer_view | |
util.return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- // | |
util.func public @jit_eval_26(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_26(%input0: tensor<960xi8>) -> (%output0: tensor<960xf32>)"}} { | |
%cst = arith.constant 1.562500e-02 : f32 | |
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<960xi8> | |
%1 = tensor.empty() : tensor<960xf32> | |
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<960xi8>) outs(%1 : tensor<960xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%4 = arith.extsi %in : i8 to i32 | |
%5 = arith.sitofp %4 : i32 to f32 | |
%6 = arith.mulf %5, %cst : f32 | |
linalg.yield %6 : f32 | |
} -> tensor<960xf32> | |
%3 = hal.tensor.export %2 "output0" : tensor<960xf32> -> !hal.buffer_view | |
util.return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- // | |
util.func public @jit_eval_27(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_27(%input0: tensor<160xi8>) -> (%output0: tensor<160xf32>)"}} { | |
%cst = arith.constant 1.250000e-01 : f32 | |
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<160xi8> | |
%1 = tensor.empty() : tensor<160xf32> | |
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<160xi8>) outs(%1 : tensor<160xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%4 = arith.extsi %in : i8 to i32 | |
%5 = arith.sitofp %4 : i32 to f32 | |
%6 = arith.mulf %5, %cst : f32 | |
linalg.yield %6 : f32 | |
} -> tensor<160xf32> | |
%3 = hal.tensor.export %2 "output0" : tensor<160xf32> -> !hal.buffer_view | |
util.return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- // | |
util.func public @jit_eval_17(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_17(%input0: tensor<576x3x3xi8>) -> (%output0: tensor<576x3x3xf32>)"}} { | |
%cst = arith.constant 6.250000e-02 : f32 | |
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<576x3x3xi8> | |
%1 = tensor.empty() : tensor<576x3x3xf32> | |
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%0 : tensor<576x3x3xi8>) outs(%1 : tensor<576x3x3xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%4 = arith.extsi %in : i8 to i32 | |
%5 = arith.sitofp %4 : i32 to f32 | |
%6 = arith.mulf %5, %cst : f32 | |
linalg.yield %6 : f32 | |
} -> tensor<576x3x3xf32> | |
%3 = hal.tensor.export %2 "output0" : tensor<576x3x3xf32> -> !hal.buffer_view | |
util.return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- // | |
util.func public @jit_eval_16(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_16(%input0: tensor<96xi8>) -> (%output0: tensor<96xf32>)"}} { | |
%cst = arith.constant 6.250000e-02 : f32 | |
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<96xi8> | |
%1 = tensor.empty() : tensor<96xf32> | |
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<96xi8>) outs(%1 : tensor<96xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%4 = arith.extsi %in : i8 to i32 | |
%5 = arith.sitofp %4 : i32 to f32 | |
%6 = arith.mulf %5, %cst : f32 | |
linalg.yield %6 : f32 | |
} -> tensor<96xf32> | |
%3 = hal.tensor.export %2 "output0" : tensor<96xf32> -> !hal.buffer_view | |
util.return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- // | |
util.func public @jit_eval_28(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_28(%input0: tensor<960x3x3xi8>) -> (%output0: tensor<960x3x3xf32>)"}} { | |
%cst = arith.constant 1.250000e-01 : f32 | |
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<960x3x3xi8> | |
%1 = tensor.empty() : tensor<960x3x3xf32> | |
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%0 : tensor<960x3x3xi8>) outs(%1 : tensor<960x3x3xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%4 = arith.extsi %in : i8 to i32 | |
%5 = arith.sitofp %4 : i32 to f32 | |
%6 = arith.mulf %5, %cst : f32 | |
linalg.yield %6 : f32 | |
} -> tensor<960x3x3xf32> | |
%3 = hal.tensor.export %2 "output0" : tensor<960x3x3xf32> -> !hal.buffer_view | |
util.return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- // | |
util.func public @jit_eval_22(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_22(%input0: tensor<160xi8>) -> (%output0: tensor<160xf32>)"}} { | |
%cst = arith.constant 1.250000e-01 : f32 | |
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<160xi8> | |
%1 = tensor.empty() : tensor<160xf32> | |
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<160xi8>) outs(%1 : tensor<160xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%4 = arith.extsi %in : i8 to i32 | |
%5 = arith.sitofp %4 : i32 to f32 | |
%6 = arith.mulf %5, %cst : f32 | |
linalg.yield %6 : f32 | |
} -> tensor<160xf32> | |
%3 = hal.tensor.export %2 "output0" : tensor<160xf32> -> !hal.buffer_view | |
util.return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- // | |
util.func public @jit_eval_20(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_20(%input0: tensor<576x3x3xi8>) -> (%output0: tensor<576x3x3xf32>)"}} { | |
%cst = arith.constant 3.125000e-02 : f32 | |
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<576x3x3xi8> | |
%1 = tensor.empty() : tensor<576x3x3xf32> | |
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%0 : tensor<576x3x3xi8>) outs(%1 : tensor<576x3x3xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%4 = arith.extsi %in : i8 to i32 | |
%5 = arith.sitofp %4 : i32 to f32 | |
%6 = arith.mulf %5, %cst : f32 | |
linalg.yield %6 : f32 | |
} -> tensor<576x3x3xf32> | |
%3 = hal.tensor.export %2 "output0" : tensor<576x3x3xf32> -> !hal.buffer_view | |
util.return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- // | |
util.func public @jit_eval_25(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_25(%input0: tensor<960x3x3xi8>) -> (%output0: tensor<960x3x3xf32>)"}} { | |
%cst = arith.constant 6.250000e-02 : f32 | |
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<960x3x3xi8> | |
%1 = tensor.empty() : tensor<960x3x3xf32> | |
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%0 : tensor<960x3x3xi8>) outs(%1 : tensor<960x3x3xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%4 = arith.extsi %in : i8 to i32 | |
%5 = arith.sitofp %4 : i32 to f32 | |
%6 = arith.mulf %5, %cst : f32 | |
linalg.yield %6 : f32 | |
} -> tensor<960x3x3xf32> | |
%3 = hal.tensor.export %2 "output0" : tensor<960x3x3xf32> -> !hal.buffer_view | |
util.return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- // | |
util.func public @jit_eval_24(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_24(%input0: tensor<160xi8>) -> (%output0: tensor<160xf32>)"}} { | |
%cst = arith.constant 6.250000e-02 : f32 | |
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<160xi8> | |
%1 = tensor.empty() : tensor<160xf32> | |
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<160xi8>) outs(%1 : tensor<160xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%4 = arith.extsi %in : i8 to i32 | |
%5 = arith.sitofp %4 : i32 to f32 | |
%6 = arith.mulf %5, %cst : f32 | |
linalg.yield %6 : f32 | |
} -> tensor<160xf32> | |
%3 = hal.tensor.export %2 "output0" : tensor<160xf32> -> !hal.buffer_view | |
util.return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- // | |
util.func public @jit_eval_31(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_31(%input0: tensor<144x3x3xi8>) -> (%output0: tensor<144x3x3xf32>)"}} { | |
%cst = arith.constant 1.250000e-01 : f32 | |
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<144x3x3xi8> | |
%1 = tensor.empty() : tensor<144x3x3xf32> | |
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%0 : tensor<144x3x3xi8>) outs(%1 : tensor<144x3x3xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%4 = arith.extsi %in : i8 to i32 | |
%5 = arith.sitofp %4 : i32 to f32 | |
%6 = arith.mulf %5, %cst : f32 | |
linalg.yield %6 : f32 | |
} -> tensor<144x3x3xf32> | |
%3 = hal.tensor.export %2 "output0" : tensor<144x3x3xf32> -> !hal.buffer_view | |
util.return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- // | |
util.func public @jit_eval_29(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_29(%input0: tensor<960xi8>) -> (%output0: tensor<960xf32>)"}} { | |
%cst = arith.constant 1.562500e-02 : f32 | |
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<960xi8> | |
%1 = tensor.empty() : tensor<960xf32> | |
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<960xi8>) outs(%1 : tensor<960xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%4 = arith.extsi %in : i8 to i32 | |
%5 = arith.sitofp %4 : i32 to f32 | |
%6 = arith.mulf %5, %cst : f32 | |
linalg.yield %6 : f32 | |
} -> tensor<960xf32> | |
%3 = hal.tensor.export %2 "output0" : tensor<960xf32> -> !hal.buffer_view | |
util.return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- // | |
util.func public @jit_eval_33(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_33(%input0: tensor<144x3x3xi8>) -> (%output0: tensor<144x3x3xf32>)"}} { | |
%cst = arith.constant 3.125000e-02 : f32 | |
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<144x3x3xi8> | |
%1 = tensor.empty() : tensor<144x3x3xf32> | |
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%0 : tensor<144x3x3xi8>) outs(%1 : tensor<144x3x3xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%4 = arith.extsi %in : i8 to i32 | |
%5 = arith.sitofp %4 : i32 to f32 | |
%6 = arith.mulf %5, %cst : f32 | |
linalg.yield %6 : f32 | |
} -> tensor<144x3x3xf32> | |
%3 = hal.tensor.export %2 "output0" : tensor<144x3x3xf32> -> !hal.buffer_view | |
util.return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- // | |
util.func public @jit_eval_34(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_34(%input0: tensor<144xi8>) -> (%output0: tensor<144xf32>)"}} { | |
%cst = arith.constant 3.125000e-02 : f32 | |
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<144xi8> | |
%1 = tensor.empty() : tensor<144xf32> | |
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<144xi8>) outs(%1 : tensor<144xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%4 = arith.extsi %in : i8 to i32 | |
%5 = arith.sitofp %4 : i32 to f32 | |
%6 = arith.mulf %5, %cst : f32 | |
linalg.yield %6 : f32 | |
} -> tensor<144xf32> | |
%3 = hal.tensor.export %2 "output0" : tensor<144xf32> -> !hal.buffer_view | |
util.return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- // | |
util.func public @jit_eval_39(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_39(%input0: tensor<192xi8>) -> (%output0: tensor<192xf32>)"}} { | |
%cst = arith.constant 3.125000e-02 : f32 | |
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<192xi8> | |
%1 = tensor.empty() : tensor<192xf32> | |
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<192xi8>) outs(%1 : tensor<192xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%4 = arith.extsi %in : i8 to i32 | |
%5 = arith.sitofp %4 : i32 to f32 | |
%6 = arith.mulf %5, %cst : f32 | |
linalg.yield %6 : f32 | |
} -> tensor<192xf32> | |
%3 = hal.tensor.export %2 "output0" : tensor<192xf32> -> !hal.buffer_view | |
util.return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- // | |
util.func public @jit_eval_41(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_41(%input0: tensor<192x3x3xi8>) -> (%output0: tensor<192x3x3xf32>)"}} { | |
%cst = arith.constant 1.562500e-02 : f32 | |
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<192x3x3xi8> | |
%1 = tensor.empty() : tensor<192x3x3xf32> | |
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%0 : tensor<192x3x3xi8>) outs(%1 : tensor<192x3x3xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%4 = arith.extsi %in : i8 to i32 | |
%5 = arith.sitofp %4 : i32 to f32 | |
%6 = arith.mulf %5, %cst : f32 | |
linalg.yield %6 : f32 | |
} -> tensor<192x3x3xf32> | |
%3 = hal.tensor.export %2 "output0" : tensor<192x3x3xf32> -> !hal.buffer_view | |
util.return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- // | |
util.func public @jit_eval_40(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_40(%input0: tensor<32xi8>) -> (%output0: tensor<32xf32>)"}} { | |
%cst = arith.constant 1.250000e-01 : f32 | |
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<32xi8> | |
%1 = tensor.empty() : tensor<32xf32> | |
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<32xi8>) outs(%1 : tensor<32xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%4 = arith.extsi %in : i8 to i32 | |
%5 = arith.sitofp %4 : i32 to f32 | |
%6 = arith.mulf %5, %cst : f32 | |
linalg.yield %6 : f32 | |
} -> tensor<32xf32> | |
%3 = hal.tensor.export %2 "output0" : tensor<32xf32> -> !hal.buffer_view | |
util.return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- // | |
util.func public @jit_eval_30(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_30(%input0: tensor<320xi8>) -> (%output0: tensor<320xf32>)"}} { | |
%cst = arith.constant 1.250000e-01 : f32 | |
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<320xi8> | |
%1 = tensor.empty() : tensor<320xf32> | |
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<320xi8>) outs(%1 : tensor<320xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%4 = arith.extsi %in : i8 to i32 | |
%5 = arith.sitofp %4 : i32 to f32 | |
%6 = arith.mulf %5, %cst : f32 | |
linalg.yield %6 : f32 | |
} -> tensor<320xf32> | |
%3 = hal.tensor.export %2 "output0" : tensor<320xf32> -> !hal.buffer_view | |
util.return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- // | |
util.func public @jit_eval_32(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_32(%input0: tensor<24xi8>) -> (%output0: tensor<24xf32>)"}} { | |
%cst = arith.constant 1.250000e-01 : f32 | |
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<24xi8> | |
%1 = tensor.empty() : tensor<24xf32> | |
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<24xi8>) outs(%1 : tensor<24xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%4 = arith.extsi %in : i8 to i32 | |
%5 = arith.sitofp %4 : i32 to f32 | |
%6 = arith.mulf %5, %cst : f32 | |
linalg.yield %6 : f32 | |
} -> tensor<24xf32> | |
%3 = hal.tensor.export %2 "output0" : tensor<24xf32> -> !hal.buffer_view | |
util.return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- // | |
util.func public @jit_eval_35(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_35(%input0: tensor<32xi8>) -> (%output0: tensor<32xf32>)"}} { | |
%cst = arith.constant 2.500000e-01 : f32 | |
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<32xi8> | |
%1 = tensor.empty() : tensor<32xf32> | |
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<32xi8>) outs(%1 : tensor<32xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%4 = arith.extsi %in : i8 to i32 | |
%5 = arith.sitofp %4 : i32 to f32 | |
%6 = arith.mulf %5, %cst : f32 | |
linalg.yield %6 : f32 | |
} -> tensor<32xf32> | |
%3 = hal.tensor.export %2 "output0" : tensor<32xf32> -> !hal.buffer_view | |
util.return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- // | |
util.func public @jit_eval_36(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_36(%input0: tensor<192x3x3xi8>) -> (%output0: tensor<192x3x3xf32>)"}} { | |
%cst = arith.constant 1.250000e-01 : f32 | |
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<192x3x3xi8> | |
%1 = tensor.empty() : tensor<192x3x3xf32> | |
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%0 : tensor<192x3x3xi8>) outs(%1 : tensor<192x3x3xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%4 = arith.extsi %in : i8 to i32 | |
%5 = arith.sitofp %4 : i32 to f32 | |
%6 = arith.mulf %5, %cst : f32 | |
linalg.yield %6 : f32 | |
} -> tensor<192x3x3xf32> | |
%3 = hal.tensor.export %2 "output0" : tensor<192x3x3xf32> -> !hal.buffer_view | |
util.return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- // | |
util.func public @jit_eval_38(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_38(%input0: tensor<192x3x3xi8>) -> (%output0: tensor<192x3x3xf32>)"}} { | |
%cst = arith.constant 6.250000e-02 : f32 | |
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<192x3x3xi8> | |
%1 = tensor.empty() : tensor<192x3x3xf32> | |
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%0 : tensor<192x3x3xi8>) outs(%1 : tensor<192x3x3xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%4 = arith.extsi %in : i8 to i32 | |
%5 = arith.sitofp %4 : i32 to f32 | |
%6 = arith.mulf %5, %cst : f32 | |
linalg.yield %6 : f32 | |
} -> tensor<192x3x3xf32> | |
%3 = hal.tensor.export %2 "output0" : tensor<192x3x3xf32> -> !hal.buffer_view | |
util.return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- // | |
util.func public @jit_eval_37(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_37(%input0: tensor<32xi8>) -> (%output0: tensor<32xf32>)"}} { | |
%cst = arith.constant 1.250000e-01 : f32 | |
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<32xi8> | |
%1 = tensor.empty() : tensor<32xf32> | |
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<32xi8>) outs(%1 : tensor<32xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%4 = arith.extsi %in : i8 to i32 | |
%5 = arith.sitofp %4 : i32 to f32 | |
%6 = arith.mulf %5, %cst : f32 | |
linalg.yield %6 : f32 | |
} -> tensor<32xf32> | |
%3 = hal.tensor.export %2 "output0" : tensor<32xf32> -> !hal.buffer_view | |
util.return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- // | |
util.func public @jit_eval_45(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_45(%input0: tensor<64xi8>) -> (%output0: tensor<64xf32>)"}} { | |
%cst = arith.constant 6.250000e-02 : f32 | |
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<64xi8> | |
%1 = tensor.empty() : tensor<64xf32> | |
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<64xi8>) outs(%1 : tensor<64xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%4 = arith.extsi %in : i8 to i32 | |
%5 = arith.sitofp %4 : i32 to f32 | |
%6 = arith.mulf %5, %cst : f32 | |
linalg.yield %6 : f32 | |
} -> tensor<64xf32> | |
%3 = hal.tensor.export %2 "output0" : tensor<64xf32> -> !hal.buffer_view | |
util.return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- // | |
util.func public @jit_eval_46(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_46(%input0: tensor<384x3x3xi8>) -> (%output0: tensor<384x3x3xf32>)"}} { | |
%cst = arith.constant 6.250000e-02 : f32 | |
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<384x3x3xi8> | |
%1 = tensor.empty() : tensor<384x3x3xf32> | |
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%0 : tensor<384x3x3xi8>) outs(%1 : tensor<384x3x3xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%4 = arith.extsi %in : i8 to i32 | |
%5 = arith.sitofp %4 : i32 to f32 | |
%6 = arith.mulf %5, %cst : f32 | |
linalg.yield %6 : f32 | |
} -> tensor<384x3x3xf32> | |
%3 = hal.tensor.export %2 "output0" : tensor<384x3x3xf32> -> !hal.buffer_view | |
util.return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- // | |
util.func public @jit_eval_43(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_43(%input0: tensor<64xi8>) -> (%output0: tensor<64xf32>)"}} { | |
%cst = arith.constant 2.500000e-01 : f32 | |
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<64xi8> | |
%1 = tensor.empty() : tensor<64xf32> | |
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<64xi8>) outs(%1 : tensor<64xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%4 = arith.extsi %in : i8 to i32 | |
%5 = arith.sitofp %4 : i32 to f32 | |
%6 = arith.mulf %5, %cst : f32 | |
linalg.yield %6 : f32 | |
} -> tensor<64xf32> | |
%3 = hal.tensor.export %2 "output0" : tensor<64xf32> -> !hal.buffer_view | |
util.return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- // | |
util.func public @jit_eval_42(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_42(%input0: tensor<192xi8>) -> (%output0: tensor<192xf32>)"}} { | |
%cst = arith.constant 3.125000e-02 : f32 | |
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<192xi8> | |
%1 = tensor.empty() : tensor<192xf32> | |
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<192xi8>) outs(%1 : tensor<192xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%4 = arith.extsi %in : i8 to i32 | |
%5 = arith.sitofp %4 : i32 to f32 | |
%6 = arith.mulf %5, %cst : f32 | |
linalg.yield %6 : f32 | |
} -> tensor<192xf32> | |
%3 = hal.tensor.export %2 "output0" : tensor<192xf32> -> !hal.buffer_view | |
util.return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- // | |
util.func public @jit_eval_47(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_47(%input0: tensor<384xi8>) -> (%output0: tensor<384xf32>)"}} { | |
%cst = arith.constant 1.562500e-02 : f32 | |
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<384xi8> | |
%1 = tensor.empty() : tensor<384xf32> | |
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<384xi8>) outs(%1 : tensor<384xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%4 = arith.extsi %in : i8 to i32 | |
%5 = arith.sitofp %4 : i32 to f32 | |
%6 = arith.mulf %5, %cst : f32 | |
linalg.yield %6 : f32 | |
} -> tensor<384xf32> | |
%3 = hal.tensor.export %2 "output0" : tensor<384xf32> -> !hal.buffer_view | |
util.return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- // | |
util.func public @jit_eval_48(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_48(%input0: tensor<64xi8>) -> (%output0: tensor<64xf32>)"}} { | |
%cst = arith.constant 6.250000e-02 : f32 | |
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<64xi8> | |
%1 = tensor.empty() : tensor<64xf32> | |
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<64xi8>) outs(%1 : tensor<64xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%4 = arith.extsi %in : i8 to i32 | |
%5 = arith.sitofp %4 : i32 to f32 | |
%6 = arith.mulf %5, %cst : f32 | |
linalg.yield %6 : f32 | |
} -> tensor<64xf32> | |
%3 = hal.tensor.export %2 "output0" : tensor<64xf32> -> !hal.buffer_view | |
util.return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- // | |
util.func public @jit_eval_44(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_44(%input0: tensor<384x3x3xi8>) -> (%output0: tensor<384x3x3xf32>)"}} { | |
%cst = arith.constant 1.250000e-01 : f32 | |
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<384x3x3xi8> | |
%1 = tensor.empty() : tensor<384x3x3xf32> | |
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%0 : tensor<384x3x3xi8>) outs(%1 : tensor<384x3x3xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%4 = arith.extsi %in : i8 to i32 | |
%5 = arith.sitofp %4 : i32 to f32 | |
%6 = arith.mulf %5, %cst : f32 | |
linalg.yield %6 : f32 | |
} -> tensor<384x3x3xf32> | |
%3 = hal.tensor.export %2 "output0" : tensor<384x3x3xf32> -> !hal.buffer_view | |
util.return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- // | |
util.func public @jit_eval_49(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_49(%input0: tensor<384x3x3xi8>) -> (%output0: tensor<384x3x3xf32>)"}} { | |
%cst = arith.constant 6.250000e-02 : f32 | |
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<384x3x3xi8> | |
%1 = tensor.empty() : tensor<384x3x3xf32> | |
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%0 : tensor<384x3x3xi8>) outs(%1 : tensor<384x3x3xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%4 = arith.extsi %in : i8 to i32 | |
%5 = arith.sitofp %4 : i32 to f32 | |
%6 = arith.mulf %5, %cst : f32 | |
linalg.yield %6 : f32 | |
} -> tensor<384x3x3xf32> | |
%3 = hal.tensor.export %2 "output0" : tensor<384x3x3xf32> -> !hal.buffer_view | |
util.return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- // | |
util.func public @jit_eval_51(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_51(%input0: tensor<64xi8>) -> (%output0: tensor<64xf32>)"}} { | |
%cst = arith.constant 1.250000e-01 : f32 | |
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<64xi8> | |
%1 = tensor.empty() : tensor<64xf32> | |
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<64xi8>) outs(%1 : tensor<64xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%4 = arith.extsi %in : i8 to i32 | |
%5 = arith.sitofp %4 : i32 to f32 | |
%6 = arith.mulf %5, %cst : f32 | |
linalg.yield %6 : f32 | |
} -> tensor<64xf32> | |
%3 = hal.tensor.export %2 "output0" : tensor<64xf32> -> !hal.buffer_view | |
util.return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- // | |
util.func public @jit_eval_52(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_52(%input0: tensor<256xi8>) -> (%output0: tensor<256xf32>)"}} { | |
%cst = arith.constant 1.562500e-02 : f32 | |
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<256xi8> | |
%1 = tensor.empty() : tensor<256xf32> | |
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<256xi8>) outs(%1 : tensor<256xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%4 = arith.extsi %in : i8 to i32 | |
%5 = arith.sitofp %4 : i32 to f32 | |
%6 = arith.mulf %5, %cst : f32 | |
linalg.yield %6 : f32 | |
} -> tensor<256xf32> | |
%3 = hal.tensor.export %2 "output0" : tensor<256xf32> -> !hal.buffer_view | |
util.return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- // | |
util.func public @jit_eval_58(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_58(%input0: tensor<144x1x1x24xi8>) -> (%output0: tensor<1x1x24x144xf32>)"}} { | |
%cst = arith.constant 3.906250e-03 : f32 | |
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<144x1x1x24xi8> | |
%1 = tensor.empty() : tensor<1x1x24x144xf32> | |
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<144x1x1x24xi8>) outs(%1 : tensor<1x1x24x144xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%4 = arith.extsi %in : i8 to i32 | |
%5 = arith.sitofp %4 : i32 to f32 | |
%6 = arith.mulf %5, %cst : f32 | |
linalg.yield %6 : f32 | |
} -> tensor<1x1x24x144xf32> | |
%3 = hal.tensor.export %2 "output0" : tensor<1x1x24x144xf32> -> !hal.buffer_view | |
util.return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- // | |
util.func public @jit_eval_50(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_50(%input0: tensor<384xi8>) -> (%output0: tensor<384xf32>)"}} { | |
%cst = arith.constant 1.562500e-02 : f32 | |
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<384xi8> | |
%1 = tensor.empty() : tensor<384xf32> | |
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<384xi8>) outs(%1 : tensor<384xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%4 = arith.extsi %in : i8 to i32 | |
%5 = arith.sitofp %4 : i32 to f32 | |
%6 = arith.mulf %5, %cst : f32 | |
linalg.yield %6 : f32 | |
} -> tensor<384xf32> | |
%3 = hal.tensor.export %2 "output0" : tensor<384xf32> -> !hal.buffer_view | |
util.return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- // | |
util.func public @jit_eval_56(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_56(%input0: tensor<96x1x1x16xi8>) -> (%output0: tensor<1x1x16x96xf32>)"}} { | |
%cst = arith.constant 3.906250e-03 : f32 | |
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<96x1x1x16xi8> | |
%1 = tensor.empty() : tensor<1x1x16x96xf32> | |
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<96x1x1x16xi8>) outs(%1 : tensor<1x1x16x96xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%4 = arith.extsi %in : i8 to i32 | |
%5 = arith.sitofp %4 : i32 to f32 | |
%6 = arith.mulf %5, %cst : f32 | |
linalg.yield %6 : f32 | |
} -> tensor<1x1x16x96xf32> | |
%3 = hal.tensor.export %2 "output0" : tensor<1x1x16x96xf32> -> !hal.buffer_view | |
util.return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- // | |
util.func public @jit_eval_60(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_60(%input0: tensor<144x1x1x24xi8>) -> (%output0: tensor<1x1x24x144xf32>)"}} { | |
%cst = arith.constant 3.906250e-03 : f32 | |
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<144x1x1x24xi8> | |
%1 = tensor.empty() : tensor<1x1x24x144xf32> | |
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<144x1x1x24xi8>) outs(%1 : tensor<1x1x24x144xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%4 = arith.extsi %in : i8 to i32 | |
%5 = arith.sitofp %4 : i32 to f32 | |
%6 = arith.mulf %5, %cst : f32 | |
linalg.yield %6 : f32 | |
} -> tensor<1x1x24x144xf32> | |
%3 = hal.tensor.export %2 "output0" : tensor<1x1x24x144xf32> -> !hal.buffer_view | |
util.return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- // | |
util.func public @jit_eval_62(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_62(%input0: tensor<192x1x1x32xi8>) -> (%output0: tensor<1x1x32x192xf32>)"}} { | |
%cst = arith.constant 0.001953125 : f32 | |
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<192x1x1x32xi8> | |
%1 = tensor.empty() : tensor<1x1x32x192xf32> | |
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<192x1x1x32xi8>) outs(%1 : tensor<1x1x32x192xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%4 = arith.extsi %in : i8 to i32 | |
%5 = arith.sitofp %4 : i32 to f32 | |
%6 = arith.mulf %5, %cst : f32 | |
linalg.yield %6 : f32 | |
} -> tensor<1x1x32x192xf32> | |
%3 = hal.tensor.export %2 "output0" : tensor<1x1x32x192xf32> -> !hal.buffer_view | |
util.return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- // | |
util.func public @jit_eval_55(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_55(%input0: tensor<16x1x1x32xi8>) -> (%output0: tensor<1x1x32x16xf32>)"}} { | |
%cst = arith.constant 6.250000e-02 : f32 | |
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<16x1x1x32xi8> | |
%1 = tensor.empty() : tensor<1x1x32x16xf32> | |
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<16x1x1x32xi8>) outs(%1 : tensor<1x1x32x16xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%4 = arith.extsi %in : i8 to i32 | |
%5 = arith.sitofp %4 : i32 to f32 | |
%6 = arith.mulf %5, %cst : f32 | |
linalg.yield %6 : f32 | |
} -> tensor<1x1x32x16xf32> | |
%3 = hal.tensor.export %2 "output0" : tensor<1x1x32x16xf32> -> !hal.buffer_view | |
util.return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- // | |
util.func public @jit_eval_61(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_61(%input0: tensor<32x1x1x144xi8>) -> (%output0: tensor<1x1x144x32xf32>)"}} { | |
%cst = arith.constant 1.562500e-02 : f32 | |
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<32x1x1x144xi8> | |
%1 = tensor.empty() : tensor<1x1x144x32xf32> | |
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<32x1x1x144xi8>) outs(%1 : tensor<1x1x144x32xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%4 = arith.extsi %in : i8 to i32 | |
%5 = arith.sitofp %4 : i32 to f32 | |
%6 = arith.mulf %5, %cst : f32 | |
linalg.yield %6 : f32 | |
} -> tensor<1x1x144x32xf32> | |
%3 = hal.tensor.export %2 "output0" : tensor<1x1x144x32xf32> -> !hal.buffer_view | |
util.return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- // | |
util.func public @jit_eval_54(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_54(%input0: tensor<21xi8>) -> (%output0: tensor<21xf32>)"}} { | |
%cst = arith.constant 1.562500e-02 : f32 | |
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<21xi8> | |
%1 = tensor.empty() : tensor<21xf32> | |
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<21xi8>) outs(%1 : tensor<21xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%4 = arith.extsi %in : i8 to i32 | |
%5 = arith.sitofp %4 : i32 to f32 | |
%6 = arith.mulf %5, %cst : f32 | |
linalg.yield %6 : f32 | |
} -> tensor<21xf32> | |
%3 = hal.tensor.export %2 "output0" : tensor<21xf32> -> !hal.buffer_view | |
util.return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- // | |
util.func public @jit_eval_53(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_53(%input0: tensor<256xi8>) -> (%output0: tensor<256xf32>)"}} { | |
%cst = arith.constant 0.001953125 : f32 | |
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<256xi8> | |
%1 = tensor.empty() : tensor<256xf32> | |
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<256xi8>) outs(%1 : tensor<256xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%4 = arith.extsi %in : i8 to i32 | |
%5 = arith.sitofp %4 : i32 to f32 | |
%6 = arith.mulf %5, %cst : f32 | |
linalg.yield %6 : f32 | |
} -> tensor<256xf32> | |
%3 = hal.tensor.export %2 "output0" : tensor<256xf32> -> !hal.buffer_view | |
util.return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- // | |
util.func public @jit_eval_64(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_64(%input0: tensor<192x1x1x32xi8>) -> (%output0: tensor<1x1x32x192xf32>)"}} { | |
%cst = arith.constant 9.765625E-4 : f32 | |
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<192x1x1x32xi8> | |
%1 = tensor.empty() : tensor<1x1x32x192xf32> | |
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<192x1x1x32xi8>) outs(%1 : tensor<1x1x32x192xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%4 = arith.extsi %in : i8 to i32 | |
%5 = arith.sitofp %4 : i32 to f32 | |
%6 = arith.mulf %5, %cst : f32 | |
linalg.yield %6 : f32 | |
} -> tensor<1x1x32x192xf32> | |
%3 = hal.tensor.export %2 "output0" : tensor<1x1x32x192xf32> -> !hal.buffer_view | |
util.return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- // | |
util.func public @jit_eval_63(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_63(%input0: tensor<32x1x1x192xi8>) -> (%output0: tensor<1x1x192x32xf32>)"}} { | |
%cst = arith.constant 1.562500e-02 : f32 | |
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<32x1x1x192xi8> | |
%1 = tensor.empty() : tensor<1x1x192x32xf32> | |
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<32x1x1x192xi8>) outs(%1 : tensor<1x1x192x32xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%4 = arith.extsi %in : i8 to i32 | |
%5 = arith.sitofp %4 : i32 to f32 | |
%6 = arith.mulf %5, %cst : f32 | |
linalg.yield %6 : f32 | |
} -> tensor<1x1x192x32xf32> | |
%3 = hal.tensor.export %2 "output0" : tensor<1x1x192x32xf32> -> !hal.buffer_view | |
util.return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- // | |
util.func public @jit_eval_59(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_59(%input0: tensor<24x1x1x144xi8>) -> (%output0: tensor<1x1x144x24xf32>)"}} { | |
%cst = arith.constant 3.125000e-02 : f32 | |
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<24x1x1x144xi8> | |
%1 = tensor.empty() : tensor<1x1x144x24xf32> | |
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<24x1x1x144xi8>) outs(%1 : tensor<1x1x144x24xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%4 = arith.extsi %in : i8 to i32 | |
%5 = arith.sitofp %4 : i32 to f32 | |
%6 = arith.mulf %5, %cst : f32 | |
linalg.yield %6 : f32 | |
} -> tensor<1x1x144x24xf32> | |
%3 = hal.tensor.export %2 "output0" : tensor<1x1x144x24xf32> -> !hal.buffer_view | |
util.return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- // | |
util.func public @jit_eval_57(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_57(%input0: tensor<24x1x1x96xi8>) -> (%output0: tensor<1x1x96x24xf32>)"}} { | |
%cst = arith.constant 3.125000e-02 : f32 | |
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<24x1x1x96xi8> | |
%1 = tensor.empty() : tensor<1x1x96x24xf32> | |
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<24x1x1x96xi8>) outs(%1 : tensor<1x1x96x24xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%4 = arith.extsi %in : i8 to i32 | |
%5 = arith.sitofp %4 : i32 to f32 | |
%6 = arith.mulf %5, %cst : f32 | |
linalg.yield %6 : f32 | |
} -> tensor<1x1x96x24xf32> | |
%3 = hal.tensor.export %2 "output0" : tensor<1x1x96x24xf32> -> !hal.buffer_view | |
util.return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- // | |
util.func public @jit_eval_67(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_67(%input0: tensor<64x1x1x192xi8>) -> (%output0: tensor<1x1x192x64xf32>)"}} { | |
%cst = arith.constant 1.562500e-02 : f32 | |
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<64x1x1x192xi8> | |
%1 = tensor.empty() : tensor<1x1x192x64xf32> | |
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<64x1x1x192xi8>) outs(%1 : tensor<1x1x192x64xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%4 = arith.extsi %in : i8 to i32 | |
%5 = arith.sitofp %4 : i32 to f32 | |
%6 = arith.mulf %5, %cst : f32 | |
linalg.yield %6 : f32 | |
} -> tensor<1x1x192x64xf32> | |
%3 = hal.tensor.export %2 "output0" : tensor<1x1x192x64xf32> -> !hal.buffer_view | |
util.return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- // | |
util.func public @jit_eval_65(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_65(%input0: tensor<32x1x1x192xi8>) -> (%output0: tensor<1x1x192x32xf32>)"}} { | |
%cst = arith.constant 3.125000e-02 : f32 | |
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<32x1x1x192xi8> | |
%1 = tensor.empty() : tensor<1x1x192x32xf32> | |
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<32x1x1x192xi8>) outs(%1 : tensor<1x1x192x32xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%4 = arith.extsi %in : i8 to i32 | |
%5 = arith.sitofp %4 : i32 to f32 | |
%6 = arith.mulf %5, %cst : f32 | |
linalg.yield %6 : f32 | |
} -> tensor<1x1x192x32xf32> | |
%3 = hal.tensor.export %2 "output0" : tensor<1x1x192x32xf32> -> !hal.buffer_view | |
util.return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- // | |
util.func public @jit_eval_68(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_68(%input0: tensor<384x1x1x64xi8>) -> (%output0: tensor<1x1x64x384xf32>)"}} { | |
%cst = arith.constant 9.765625E-4 : f32 | |
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<384x1x1x64xi8> | |
%1 = tensor.empty() : tensor<1x1x64x384xf32> | |
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<384x1x1x64xi8>) outs(%1 : tensor<1x1x64x384xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%4 = arith.extsi %in : i8 to i32 | |
%5 = arith.sitofp %4 : i32 to f32 | |
%6 = arith.mulf %5, %cst : f32 | |
linalg.yield %6 : f32 | |
} -> tensor<1x1x64x384xf32> | |
%3 = hal.tensor.export %2 "output0" : tensor<1x1x64x384xf32> -> !hal.buffer_view | |
util.return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- // | |
util.func public @jit_eval_71(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_71(%input0: tensor<64x1x1x384xi8>) -> (%output0: tensor<1x1x384x64xf32>)"}} { | |
%cst = arith.constant 7.812500e-03 : f32 | |
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<64x1x1x384xi8> | |
%1 = tensor.empty() : tensor<1x1x384x64xf32> | |
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<64x1x1x384xi8>) outs(%1 : tensor<1x1x384x64xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%4 = arith.extsi %in : i8 to i32 | |
%5 = arith.sitofp %4 : i32 to f32 | |
%6 = arith.mulf %5, %cst : f32 | |
linalg.yield %6 : f32 | |
} -> tensor<1x1x384x64xf32> | |
%3 = hal.tensor.export %2 "output0" : tensor<1x1x384x64xf32> -> !hal.buffer_view | |
util.return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- // | |
util.func public @jit_eval_70(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_70(%input0: tensor<384x1x1x64xi8>) -> (%output0: tensor<1x1x64x384xf32>)"}} { | |
%cst = arith.constant 9.765625E-4 : f32 | |
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<384x1x1x64xi8> | |
%1 = tensor.empty() : tensor<1x1x64x384xf32> | |
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<384x1x1x64xi8>) outs(%1 : tensor<1x1x64x384xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%4 = arith.extsi %in : i8 to i32 | |
%5 = arith.sitofp %4 : i32 to f32 | |
%6 = arith.mulf %5, %cst : f32 | |
linalg.yield %6 : f32 | |
} -> tensor<1x1x64x384xf32> | |
%3 = hal.tensor.export %2 "output0" : tensor<1x1x64x384xf32> -> !hal.buffer_view | |
util.return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- // | |
util.func public @jit_eval_66(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_66(%input0: tensor<192x1x1x32xi8>) -> (%output0: tensor<1x1x32x192xf32>)"}} { | |
%cst = arith.constant 0.001953125 : f32 | |
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<192x1x1x32xi8> | |
%1 = tensor.empty() : tensor<1x1x32x192xf32> | |
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<192x1x1x32xi8>) outs(%1 : tensor<1x1x32x192xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%4 = arith.extsi %in : i8 to i32 | |
%5 = arith.sitofp %4 : i32 to f32 | |
%6 = arith.mulf %5, %cst : f32 | |
linalg.yield %6 : f32 | |
} -> tensor<1x1x32x192xf32> | |
%3 = hal.tensor.export %2 "output0" : tensor<1x1x32x192xf32> -> !hal.buffer_view | |
util.return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- // | |
util.func public @jit_eval_78(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_78(%input0: tensor<576x1x1x96xi8>) -> (%output0: tensor<1x1x96x576xf32>)"}} { | |
%cst = arith.constant 9.765625E-4 : f32 | |
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<576x1x1x96xi8> | |
%1 = tensor.empty() : tensor<1x1x96x576xf32> | |
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<576x1x1x96xi8>) outs(%1 : tensor<1x1x96x576xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%4 = arith.extsi %in : i8 to i32 | |
%5 = arith.sitofp %4 : i32 to f32 | |
%6 = arith.mulf %5, %cst : f32 | |
linalg.yield %6 : f32 | |
} -> tensor<1x1x96x576xf32> | |
%3 = hal.tensor.export %2 "output0" : tensor<1x1x96x576xf32> -> !hal.buffer_view | |
util.return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- // | |
util.func public @jit_eval_77(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_77(%input0: tensor<96x1x1x576xi8>) -> (%output0: tensor<1x1x576x96xf32>)"}} { | |
%cst = arith.constant 7.812500e-03 : f32 | |
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<96x1x1x576xi8> | |
%1 = tensor.empty() : tensor<1x1x576x96xf32> | |
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<96x1x1x576xi8>) outs(%1 : tensor<1x1x576x96xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%4 = arith.extsi %in : i8 to i32 | |
%5 = arith.sitofp %4 : i32 to f32 | |
%6 = arith.mulf %5, %cst : f32 | |
linalg.yield %6 : f32 | |
} -> tensor<1x1x576x96xf32> | |
%3 = hal.tensor.export %2 "output0" : tensor<1x1x576x96xf32> -> !hal.buffer_view | |
util.return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- // | |
util.func public @jit_eval_80(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_80(%input0: tensor<576x1x1x96xi8>) -> (%output0: tensor<1x1x96x576xf32>)"}} { | |
%cst = arith.constant 0.001953125 : f32 | |
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<576x1x1x96xi8> | |
%1 = tensor.empty() : tensor<1x1x96x576xf32> | |
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<576x1x1x96xi8>) outs(%1 : tensor<1x1x96x576xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%4 = arith.extsi %in : i8 to i32 | |
%5 = arith.sitofp %4 : i32 to f32 | |
%6 = arith.mulf %5, %cst : f32 | |
linalg.yield %6 : f32 | |
} -> tensor<1x1x96x576xf32> | |
%3 = hal.tensor.export %2 "output0" : tensor<1x1x96x576xf32> -> !hal.buffer_view | |
util.return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- // | |
util.func public @jit_eval_79(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_79(%input0: tensor<96x1x1x576xi8>) -> (%output0: tensor<1x1x576x96xf32>)"}} { | |
%cst = arith.constant 7.812500e-03 : f32 | |
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<96x1x1x576xi8> | |
%1 = tensor.empty() : tensor<1x1x576x96xf32> | |
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<96x1x1x576xi8>) outs(%1 : tensor<1x1x576x96xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%4 = arith.extsi %in : i8 to i32 | |
%5 = arith.sitofp %4 : i32 to f32 | |
%6 = arith.mulf %5, %cst : f32 | |
linalg.yield %6 : f32 | |
} -> tensor<1x1x576x96xf32> | |
%3 = hal.tensor.export %2 "output0" : tensor<1x1x576x96xf32> -> !hal.buffer_view | |
util.return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- // | |
util.func public @jit_eval_69(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_69(%input0: tensor<64x1x1x384xi8>) -> (%output0: tensor<1x1x384x64xf32>)"}} { | |
%cst = arith.constant 7.812500e-03 : f32 | |
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<64x1x1x384xi8> | |
%1 = tensor.empty() : tensor<1x1x384x64xf32> | |
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<64x1x1x384xi8>) outs(%1 : tensor<1x1x384x64xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%4 = arith.extsi %in : i8 to i32 | |
%5 = arith.sitofp %4 : i32 to f32 | |
%6 = arith.mulf %5, %cst : f32 | |
linalg.yield %6 : f32 | |
} -> tensor<1x1x384x64xf32> | |
%3 = hal.tensor.export %2 "output0" : tensor<1x1x384x64xf32> -> !hal.buffer_view | |
util.return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- // | |
util.func public @jit_eval_81(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_81(%input0: tensor<160x1x1x576xi8>) -> (%output0: tensor<1x1x576x160xf32>)"}} { | |
%cst = arith.constant 3.906250e-03 : f32 | |
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<160x1x1x576xi8> | |
%1 = tensor.empty() : tensor<1x1x576x160xf32> | |
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<160x1x1x576xi8>) outs(%1 : tensor<1x1x576x160xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%4 = arith.extsi %in : i8 to i32 | |
%5 = arith.sitofp %4 : i32 to f32 | |
%6 = arith.mulf %5, %cst : f32 | |
linalg.yield %6 : f32 | |
} -> tensor<1x1x576x160xf32> | |
%3 = hal.tensor.export %2 "output0" : tensor<1x1x576x160xf32> -> !hal.buffer_view | |
util.return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- // | |
util.func public @jit_eval_76(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_76(%input0: tensor<576x1x1x96xi8>) -> (%output0: tensor<1x1x96x576xf32>)"}} { | |
%cst = arith.constant 0.001953125 : f32 | |
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<576x1x1x96xi8> | |
%1 = tensor.empty() : tensor<1x1x96x576xf32> | |
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<576x1x1x96xi8>) outs(%1 : tensor<1x1x96x576xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%4 = arith.extsi %in : i8 to i32 | |
%5 = arith.sitofp %4 : i32 to f32 | |
%6 = arith.mulf %5, %cst : f32 | |
linalg.yield %6 : f32 | |
} -> tensor<1x1x96x576xf32> | |
%3 = hal.tensor.export %2 "output0" : tensor<1x1x96x576xf32> -> !hal.buffer_view | |
util.return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- // | |
util.func public @jit_eval_72(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_72(%input0: tensor<384x1x1x64xi8>) -> (%output0: tensor<1x1x64x384xf32>)"}} { | |
%cst = arith.constant 9.765625E-4 : f32 | |
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<384x1x1x64xi8> | |
%1 = tensor.empty() : tensor<1x1x64x384xf32> | |
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<384x1x1x64xi8>) outs(%1 : tensor<1x1x64x384xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%4 = arith.extsi %in : i8 to i32 | |
%5 = arith.sitofp %4 : i32 to f32 | |
%6 = arith.mulf %5, %cst : f32 | |
linalg.yield %6 : f32 | |
} -> tensor<1x1x64x384xf32> | |
%3 = hal.tensor.export %2 "output0" : tensor<1x1x64x384xf32> -> !hal.buffer_view | |
util.return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- // | |
util.func public @jit_eval_75(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_75(%input0: tensor<96x1x1x384xi8>) -> (%output0: tensor<1x1x384x96xf32>)"}} { | |
%cst = arith.constant 7.812500e-03 : f32 | |
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<96x1x1x384xi8> | |
%1 = tensor.empty() : tensor<1x1x384x96xf32> | |
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<96x1x1x384xi8>) outs(%1 : tensor<1x1x384x96xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%4 = arith.extsi %in : i8 to i32 | |
%5 = arith.sitofp %4 : i32 to f32 | |
%6 = arith.mulf %5, %cst : f32 | |
linalg.yield %6 : f32 | |
} -> tensor<1x1x384x96xf32> | |
%3 = hal.tensor.export %2 "output0" : tensor<1x1x384x96xf32> -> !hal.buffer_view | |
util.return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- // | |
util.func public @jit_eval_74(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_74(%input0: tensor<384x1x1x64xi8>) -> (%output0: tensor<1x1x64x384xf32>)"}} { | |
%cst = arith.constant 0.001953125 : f32 | |
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<384x1x1x64xi8> | |
%1 = tensor.empty() : tensor<1x1x64x384xf32> | |
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<384x1x1x64xi8>) outs(%1 : tensor<1x1x64x384xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%4 = arith.extsi %in : i8 to i32 | |
%5 = arith.sitofp %4 : i32 to f32 | |
%6 = arith.mulf %5, %cst : f32 | |
linalg.yield %6 : f32 | |
} -> tensor<1x1x64x384xf32> | |
%3 = hal.tensor.export %2 "output0" : tensor<1x1x64x384xf32> -> !hal.buffer_view | |
util.return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- // | |
util.func public @jit_eval_83(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_83(%input0: tensor<160x1x1x960xi8>) -> (%output0: tensor<1x1x960x160xf32>)"}} { | |
%cst = arith.constant 7.812500e-03 : f32 | |
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<160x1x1x960xi8> | |
%1 = tensor.empty() : tensor<1x1x960x160xf32> | |
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<160x1x1x960xi8>) outs(%1 : tensor<1x1x960x160xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%4 = arith.extsi %in : i8 to i32 | |
%5 = arith.sitofp %4 : i32 to f32 | |
%6 = arith.mulf %5, %cst : f32 | |
linalg.yield %6 : f32 | |
} -> tensor<1x1x960x160xf32> | |
%3 = hal.tensor.export %2 "output0" : tensor<1x1x960x160xf32> -> !hal.buffer_view | |
util.return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- // | |
util.func public @jit_eval_73(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_73(%input0: tensor<64x1x1x384xi8>) -> (%output0: tensor<1x1x384x64xf32>)"}} { | |
%cst = arith.constant 7.812500e-03 : f32 | |
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<64x1x1x384xi8> | |
%1 = tensor.empty() : tensor<1x1x384x64xf32> | |
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<64x1x1x384xi8>) outs(%1 : tensor<1x1x384x64xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%4 = arith.extsi %in : i8 to i32 | |
%5 = arith.sitofp %4 : i32 to f32 | |
%6 = arith.mulf %5, %cst : f32 | |
linalg.yield %6 : f32 | |
} -> tensor<1x1x384x64xf32> | |
%3 = hal.tensor.export %2 "output0" : tensor<1x1x384x64xf32> -> !hal.buffer_view | |
util.return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- // | |
util.func public @jit_eval_84(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_84(%input0: tensor<960x1x1x160xi8>) -> (%output0: tensor<1x1x160x960xf32>)"}} { | |
%cst = arith.constant 9.765625E-4 : f32 | |
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<960x1x1x160xi8> | |
%1 = tensor.empty() : tensor<1x1x160x960xf32> | |
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<960x1x1x160xi8>) outs(%1 : tensor<1x1x160x960xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%4 = arith.extsi %in : i8 to i32 | |
%5 = arith.sitofp %4 : i32 to f32 | |
%6 = arith.mulf %5, %cst : f32 | |
linalg.yield %6 : f32 | |
} -> tensor<1x1x160x960xf32> | |
%3 = hal.tensor.export %2 "output0" : tensor<1x1x160x960xf32> -> !hal.buffer_view | |
util.return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- // | |
util.func public @jit_eval_82(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_82(%input0: tensor<960x1x1x160xi8>) -> (%output0: tensor<1x1x160x960xf32>)"}} { | |
%cst = arith.constant 0.001953125 : f32 | |
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<960x1x1x160xi8> | |
%1 = tensor.empty() : tensor<1x1x160x960xf32> | |
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<960x1x1x160xi8>) outs(%1 : tensor<1x1x160x960xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%4 = arith.extsi %in : i8 to i32 | |
%5 = arith.sitofp %4 : i32 to f32 | |
%6 = arith.mulf %5, %cst : f32 | |
linalg.yield %6 : f32 | |
} -> tensor<1x1x160x960xf32> | |
%3 = hal.tensor.export %2 "output0" : tensor<1x1x160x960xf32> -> !hal.buffer_view | |
util.return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- // | |
util.func public @jit_eval_85(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_85(%input0: tensor<160x1x1x960xi8>) -> (%output0: tensor<1x1x960x160xf32>)"}} { | |
%cst = arith.constant 7.812500e-03 : f32 | |
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<160x1x1x960xi8> | |
%1 = tensor.empty() : tensor<1x1x960x160xf32> | |
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<160x1x1x960xi8>) outs(%1 : tensor<1x1x960x160xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%4 = arith.extsi %in : i8 to i32 | |
%5 = arith.sitofp %4 : i32 to f32 | |
%6 = arith.mulf %5, %cst : f32 | |
linalg.yield %6 : f32 | |
} -> tensor<1x1x960x160xf32> | |
%3 = hal.tensor.export %2 "output0" : tensor<1x1x960x160xf32> -> !hal.buffer_view | |
util.return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- // | |
util.func public @jit_eval_86(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_86(%input0: tensor<960x1x1x160xi8>) -> (%output0: tensor<1x1x160x960xf32>)"}} { | |
%cst = arith.constant 0.001953125 : f32 | |
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<960x1x1x160xi8> | |
%1 = tensor.empty() : tensor<1x1x160x960xf32> | |
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<960x1x1x160xi8>) outs(%1 : tensor<1x1x160x960xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%4 = arith.extsi %in : i8 to i32 | |
%5 = arith.sitofp %4 : i32 to f32 | |
%6 = arith.mulf %5, %cst : f32 | |
linalg.yield %6 : f32 | |
} -> tensor<1x1x160x960xf32> | |
%3 = hal.tensor.export %2 "output0" : tensor<1x1x160x960xf32> -> !hal.buffer_view | |
util.return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- // | |
util.func public @jit_eval_87(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_87(%input0: tensor<320x1x1x960xi8>) -> (%output0: tensor<1x1x960x320xf32>)"}} { | |
%cst = arith.constant 3.906250e-03 : f32 | |
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<320x1x1x960xi8> | |
%1 = tensor.empty() : tensor<1x1x960x320xf32> | |
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<320x1x1x960xi8>) outs(%1 : tensor<1x1x960x320xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%4 = arith.extsi %in : i8 to i32 | |
%5 = arith.sitofp %4 : i32 to f32 | |
%6 = arith.mulf %5, %cst : f32 | |
linalg.yield %6 : f32 | |
} -> tensor<1x1x960x320xf32> | |
%3 = hal.tensor.export %2 "output0" : tensor<1x1x960x320xf32> -> !hal.buffer_view | |
util.return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- // | |
util.func public @jit_eval_89(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_89(%input0: tensor<256x1x1x320xi8>) -> (%output0: tensor<1x1x320x256xf32>)"}} { | |
%cst = arith.constant 0.001953125 : f32 | |
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<256x1x1x320xi8> | |
%1 = tensor.empty() : tensor<1x1x320x256xf32> | |
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<256x1x1x320xi8>) outs(%1 : tensor<1x1x320x256xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%4 = arith.extsi %in : i8 to i32 | |
%5 = arith.sitofp %4 : i32 to f32 | |
%6 = arith.mulf %5, %cst : f32 | |
linalg.yield %6 : f32 | |
} -> tensor<1x1x320x256xf32> | |
%3 = hal.tensor.export %2 "output0" : tensor<1x1x320x256xf32> -> !hal.buffer_view | |
util.return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- // | |
util.func public @jit_eval_88(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_88(%input0: tensor<256x1x1x320xi8>) -> (%output0: tensor<1x1x320x256xf32>)"}} { | |
%cst = arith.constant 9.765625E-4 : f32 | |
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<256x1x1x320xi8> | |
%1 = tensor.empty() : tensor<1x1x320x256xf32> | |
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<256x1x1x320xi8>) outs(%1 : tensor<1x1x320x256xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%4 = arith.extsi %in : i8 to i32 | |
%5 = arith.sitofp %4 : i32 to f32 | |
%6 = arith.mulf %5, %cst : f32 | |
linalg.yield %6 : f32 | |
} -> tensor<1x1x320x256xf32> | |
%3 = hal.tensor.export %2 "output0" : tensor<1x1x320x256xf32> -> !hal.buffer_view | |
util.return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- // | |
util.func public @jit_eval_91(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_91(%input0: tensor<21x1x1x256xi8>) -> (%output0: tensor<1x1x256x21xf32>)"}} { | |
%cst = arith.constant 3.906250e-03 : f32 | |
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<21x1x1x256xi8> | |
%1 = tensor.empty() : tensor<1x1x256x21xf32> | |
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<21x1x1x256xi8>) outs(%1 : tensor<1x1x256x21xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%4 = arith.extsi %in : i8 to i32 | |
%5 = arith.sitofp %4 : i32 to f32 | |
%6 = arith.mulf %5, %cst : f32 | |
linalg.yield %6 : f32 | |
} -> tensor<1x1x256x21xf32> | |
%3 = hal.tensor.export %2 "output0" : tensor<1x1x256x21xf32> -> !hal.buffer_view | |
util.return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- // | |
util.func public @jit_eval_90(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_90(%input0: tensor<256x1x1x512xi8>) -> (%output0: tensor<1x1x512x256xf32>)"}} { | |
%cst = arith.constant 0.001953125 : f32 | |
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<256x1x1x512xi8> | |
%1 = tensor.empty() : tensor<1x1x512x256xf32> | |
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<256x1x1x512xi8>) outs(%1 : tensor<1x1x512x256xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%4 = arith.extsi %in : i8 to i32 | |
%5 = arith.sitofp %4 : i32 to f32 | |
%6 = arith.mulf %5, %cst : f32 | |
linalg.yield %6 : f32 | |
} -> tensor<1x1x512x256xf32> | |
%3 = hal.tensor.export %2 "output0" : tensor<1x1x512x256xf32> -> !hal.buffer_view | |
util.return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- // | |
util.func public @tf2onnx(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c-1_i32 = arith.constant -1 : i32 | |
%c0 = arith.constant 0 : index | |
%device_0 = hal.devices.get %c0 : !hal.device | |
%0 = util.null : !hal.fence | |
%fence = hal.fence.create device(%device_0 : !hal.device) flags("None") : !hal.fence | |
%1 = util.call @tf2onnx$async(%arg0, %0, %fence) : (!hal.buffer_view, !hal.fence, !hal.fence) -> !hal.buffer_view | |
%status = hal.fence.await until([%fence]) timeout_millis(%c-1_i32) : i32 | |
util.return %1 : !hal.buffer_view | |
} | |
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- // | |
util.func public @tf2onnx$async(%arg0: !hal.buffer_view, %arg1: !hal.fence, %arg2: !hal.fence) -> !hal.buffer_view attributes {inlining_policy = #util.inline.never, iree.abi.model = "coarse-fences", iree.abi.stub} { | |
%cst = arith.constant dense_resource<__elided__> : tensor<1x21xf32> | |
%cst_0 = arith.constant dense_resource<__elided__> : tensor<1x256x1x1xf32> | |
%cst_1 = arith.constant 0.999259948 : f32 | |
%cst_2 = arith.constant 6.000000e+00 : f32 | |
%cst_3 = arith.constant 6.400000e+01 : f32 | |
%cst_4 = arith.constant -9.53674316E-7 : f32 | |
%cst_5 = arith.constant 7.89230776 : f32 | |
%cst_6 = arith.constant 6.500000e+01 : f32 | |
%cst_7 = arith.constant 7.812500e-03 : f32 | |
%cst_8 = arith.constant 1.250000e-01 : f32 | |
%cst_9 = arith.constant 2.500000e-01 : f32 | |
%cst_10 = arith.constant 6.250000e-02 : f32 | |
%cst_11 = arith.constant 3.125000e-02 : f32 | |
%cst_12 = arith.constant 5.000000e-01 : f32 | |
%cst_13 = arith.constant 1.000000e+00 : f32 | |
%cst_14 = arith.constant 0.000000e+00 : f32 | |
%cst_15 = arith.constant 1.270000e+02 : f32 | |
%cst_16 = arith.constant -1.280000e+02 : f32 | |
%cst_17 = arith.constant 4.225000e+03 : f32 | |
%c0 = arith.constant 0 : index | |
%cst_18 = arith.constant dense_resource<__elided__> : tensor<96xf32> | |
%cst_19 = arith.constant dense_resource<__elided__> : tensor<576xf32> | |
%cst_20 = arith.constant dense_resource<__elided__> : tensor<960xf32> | |
%cst_21 = arith.constant dense_resource<__elided__> : tensor<144xf32> | |
%cst_22 = arith.constant dense_resource<__elided__> : tensor<192xf32> | |
%cst_23 = arith.constant dense_resource<__elided__> : tensor<384xf32> | |
%cst_24 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
%cst_25 = arith.constant dense_resource<__elided__> : tensor<32xf32> | |
%cst_26 = arith.constant dense_resource<__elided__> : tensor<3x3x3x32xf32> | |
%cst_27 = arith.constant dense_resource<__elided__> : tensor<32x3x3xf32> | |
%cst_28 = arith.constant dense_resource<__elided__> : tensor<16xf32> | |
%cst_29 = arith.constant dense_resource<__elided__> : tensor<96x3x3xf32> | |
%cst_30 = arith.constant dense_resource<__elided__> : tensor<24xf32> | |
%cst_31 = arith.constant dense_resource<__elided__> : tensor<384x3x3xf32> | |
%cst_32 = arith.constant dense_resource<__elided__> : tensor<384xf32> | |
%cst_33 = arith.constant dense_resource<__elided__> : tensor<96xf32> | |
%cst_34 = arith.constant dense_resource<__elided__> : tensor<576x3x3xf32> | |
%cst_35 = arith.constant dense_resource<__elided__> : tensor<96xf32> | |
%cst_36 = arith.constant dense_resource<__elided__> : tensor<576x3x3xf32> | |
%cst_37 = arith.constant dense_resource<__elided__> : tensor<576xf32> | |
%cst_38 = arith.constant dense_resource<__elided__> : tensor<96xf32> | |
%cst_39 = arith.constant dense_resource<__elided__> : tensor<576x3x3xf32> | |
%cst_40 = arith.constant dense_resource<__elided__> : tensor<576xf32> | |
%cst_41 = arith.constant dense_resource<__elided__> : tensor<160xf32> | |
%cst_42 = arith.constant dense_resource<__elided__> : tensor<960x3x3xf32> | |
%cst_43 = arith.constant dense_resource<__elided__> : tensor<160xf32> | |
%cst_44 = arith.constant dense_resource<__elided__> : tensor<960x3x3xf32> | |
%cst_45 = arith.constant dense_resource<__elided__> : tensor<960xf32> | |
%cst_46 = arith.constant dense_resource<__elided__> : tensor<160xf32> | |
%cst_47 = arith.constant dense_resource<__elided__> : tensor<960x3x3xf32> | |
%cst_48 = arith.constant dense_resource<__elided__> : tensor<960xf32> | |
%cst_49 = arith.constant dense_resource<__elided__> : tensor<320xf32> | |
%cst_50 = arith.constant dense_resource<__elided__> : tensor<144x3x3xf32> | |
%cst_51 = arith.constant dense_resource<__elided__> : tensor<24xf32> | |
%cst_52 = arith.constant dense_resource<__elided__> : tensor<144x3x3xf32> | |
%cst_53 = arith.constant dense_resource<__elided__> : tensor<144xf32> | |
%cst_54 = arith.constant dense_resource<__elided__> : tensor<32xf32> | |
%cst_55 = arith.constant dense_resource<__elided__> : tensor<192x3x3xf32> | |
%cst_56 = arith.constant dense_resource<__elided__> : tensor<32xf32> | |
%cst_57 = arith.constant dense_resource<__elided__> : tensor<192x3x3xf32> | |
%cst_58 = arith.constant dense_resource<__elided__> : tensor<192xf32> | |
%cst_59 = arith.constant dense_resource<__elided__> : tensor<32xf32> | |
%cst_60 = arith.constant dense_resource<__elided__> : tensor<192x3x3xf32> | |
%cst_61 = arith.constant dense_resource<__elided__> : tensor<192xf32> | |
%cst_62 = arith.constant dense_resource<__elided__> : tensor<64xf32> | |
%cst_63 = arith.constant dense_resource<__elided__> : tensor<384x3x3xf32> | |
%cst_64 = arith.constant dense_resource<__elided__> : tensor<64xf32> | |
%cst_65 = arith.constant dense_resource<__elided__> : tensor<384x3x3xf32> | |
%cst_66 = arith.constant dense_resource<__elided__> : tensor<384xf32> | |
%cst_67 = arith.constant dense_resource<__elided__> : tensor<64xf32> | |
%cst_68 = arith.constant dense_resource<__elided__> : tensor<384x3x3xf32> | |
%cst_69 = arith.constant dense_resource<__elided__> : tensor<384xf32> | |
%cst_70 = arith.constant dense_resource<__elided__> : tensor<64xf32> | |
%cst_71 = arith.constant dense_resource<__elided__> : tensor<256xf32> | |
%cst_72 = arith.constant dense_resource<__elided__> : tensor<1x1x32x16xf32> | |
%cst_73 = arith.constant dense_resource<__elided__> : tensor<1x1x16x96xf32> | |
%cst_74 = arith.constant dense_resource<__elided__> : tensor<1x1x96x24xf32> | |
%cst_75 = arith.constant dense_resource<__elided__> : tensor<1x1x24x144xf32> | |
%cst_76 = arith.constant dense_resource<__elided__> : tensor<1x1x144x24xf32> | |
%cst_77 = arith.constant dense_resource<__elided__> : tensor<1x1x24x144xf32> | |
%cst_78 = arith.constant dense_resource<__elided__> : tensor<1x1x144x32xf32> | |
%cst_79 = arith.constant dense_resource<__elided__> : tensor<1x1x32x192xf32> | |
%cst_80 = arith.constant dense_resource<__elided__> : tensor<1x1x192x32xf32> | |
%cst_81 = arith.constant dense_resource<__elided__> : tensor<1x1x32x192xf32> | |
%cst_82 = arith.constant dense_resource<__elided__> : tensor<1x1x192x32xf32> | |
%cst_83 = arith.constant dense_resource<__elided__> : tensor<1x1x32x192xf32> | |
%cst_84 = arith.constant dense_resource<__elided__> : tensor<1x1x192x64xf32> | |
%cst_85 = arith.constant dense_resource<__elided__> : tensor<1x1x64x384xf32> | |
%cst_86 = arith.constant dense_resource<__elided__> : tensor<1x1x384x64xf32> | |
%cst_87 = arith.constant dense_resource<__elided__> : tensor<1x1x64x384xf32> | |
%cst_88 = arith.constant dense_resource<__elided__> : tensor<1x1x384x64xf32> | |
%cst_89 = arith.constant dense_resource<__elided__> : tensor<1x1x64x384xf32> | |
%cst_90 = arith.constant dense_resource<__elided__> : tensor<1x1x384x64xf32> | |
%cst_91 = arith.constant dense_resource<__elided__> : tensor<1x1x64x384xf32> | |
%cst_92 = arith.constant dense_resource<__elided__> : tensor<1x1x384x96xf32> | |
%cst_93 = arith.constant dense_resource<__elided__> : tensor<1x1x96x576xf32> | |
%cst_94 = arith.constant dense_resource<__elided__> : tensor<1x1x576x96xf32> | |
%cst_95 = arith.constant dense_resource<__elided__> : tensor<1x1x96x576xf32> | |
%cst_96 = arith.constant dense_resource<__elided__> : tensor<1x1x576x96xf32> | |
%cst_97 = arith.constant dense_resource<__elided__> : tensor<1x1x96x576xf32> | |
%cst_98 = arith.constant dense_resource<__elided__> : tensor<1x1x576x160xf32> | |
%cst_99 = arith.constant dense_resource<__elided__> : tensor<1x1x160x960xf32> | |
%cst_100 = arith.constant dense_resource<__elided__> : tensor<1x1x960x160xf32> | |
%cst_101 = arith.constant dense_resource<__elided__> : tensor<1x1x160x960xf32> | |
%cst_102 = arith.constant dense_resource<__elided__> : tensor<1x1x960x160xf32> | |
%cst_103 = arith.constant dense_resource<__elided__> : tensor<1x1x160x960xf32> | |
%cst_104 = arith.constant dense_resource<__elided__> : tensor<1x1x960x320xf32> | |
%cst_105 = arith.constant dense_resource<__elided__> : tensor<1x1x320x256xf32> | |
%cst_106 = arith.constant dense_resource<__elided__> : tensor<1x1x320x256xf32> | |
%cst_107 = arith.constant dense_resource<__elided__> : tensor<1x1x512x256xf32> | |
%cst_108 = arith.constant dense_resource<__elided__> : tensor<1x1x256x21xf32> | |
%0 = hal.tensor.import wait(%arg1) => %arg0 : !hal.buffer_view -> tensor<1x513x513x3xf32> | |
%expanded = tensor.expand_shape %0 [[0], [1], [2], [3, 4]] output_shape [1, 513, 513, 1, 3] : tensor<1x513x513x3xf32> into tensor<1x513x513x1x3xf32> | |
%1 = tensor.empty() : tensor<1x513x513x1x3xf32> | |
%2 = flow.dispatch.region -> (tensor<1x513x513x1x3xf32>) { | |
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2, d3, d4)>, affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2, d3, d4)>], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel"]} ins(%expanded : tensor<1x513x513x1x3xf32>) outs(%1 : tensor<1x513x513x1x3xf32>) { | |
^bb0(%in: f32, %out: f32): | |
%238 = arith.divf %in, %cst_7 : f32 | |
%239 = math.round %238 : f32 | |
%240 = arith.addf %239, %cst_14 : f32 | |
%241 = arith.cmpf ult, %240, %cst_16 : f32 | |
%242 = arith.cmpf ugt, %240, %cst_15 : f32 | |
%243 = arith.select %241, %cst_16, %240 : f32 | |
%244 = arith.select %242, %cst_15, %243 : f32 | |
%245 = arith.fptosi %244 : f32 to i8 | |
%246 = arith.extsi %245 : i8 to i32 | |
%247 = arith.sitofp %246 : i32 to f32 | |
%248 = arith.mulf %247, %cst_7 : f32 | |
linalg.yield %248 : f32 | |
} -> tensor<1x513x513x1x3xf32> | |
flow.return %237 : tensor<1x513x513x1x3xf32> | |
} | |
%collapsed = tensor.collapse_shape %2 [[0, 1], [2, 3], [4]] : tensor<1x513x513x1x3xf32> into tensor<513x513x3xf32> | |
%3 = tensor.empty() : tensor<515x515x3xf32> | |
%4 = linalg.fill ins(%cst_14 : f32) outs(%3 : tensor<515x515x3xf32>) -> tensor<515x515x3xf32> | |
%inserted_slice = tensor.insert_slice %collapsed into %4[1, 1, 0] [513, 513, 3] [1, 1, 1] : tensor<513x513x3xf32> into tensor<515x515x3xf32> | |
%expanded_109 = tensor.expand_shape %inserted_slice [[0, 1], [2], [3]] output_shape [1, 515, 515, 3] : tensor<515x515x3xf32> into tensor<1x515x515x3xf32> | |
%5 = tensor.empty() : tensor<1x257x257x32xf32> | |
%6 = linalg.fill ins(%cst_14 : f32) outs(%5 : tensor<1x257x257x32xf32>) -> tensor<1x257x257x32xf32> | |
%7 = flow.dispatch.region -> (tensor<1x257x257x32xf32>) { | |
%237 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%expanded_109, %cst_26 : tensor<1x515x515x3xf32>, tensor<3x3x3x32xf32>) outs(%6 : tensor<1x257x257x32xf32>) -> tensor<1x257x257x32xf32> | |
%238 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%237, %cst_25 : tensor<1x257x257x32xf32>, tensor<32xf32>) outs(%5 : tensor<1x257x257x32xf32>) { | |
^bb0(%in: f32, %in_229: f32, %out: f32): | |
%239 = arith.addf %in, %in_229 : f32 | |
%240 = arith.cmpf ult, %239, %cst_14 : f32 | |
%241 = arith.select %240, %cst_14, %239 : f32 | |
%242 = arith.cmpf ugt, %241, %cst_2 : f32 | |
%243 = arith.select %242, %cst_2, %241 : f32 | |
%244 = arith.divf %243, %cst_10 : f32 | |
%245 = math.round %244 : f32 | |
%246 = arith.addf %245, %cst_14 : f32 | |
%247 = arith.cmpf ult, %246, %cst_16 : f32 | |
%248 = arith.cmpf ugt, %246, %cst_15 : f32 | |
%249 = arith.select %247, %cst_16, %246 : f32 | |
%250 = arith.select %248, %cst_15, %249 : f32 | |
%251 = arith.fptosi %250 : f32 to i8 | |
%252 = arith.extsi %251 : i8 to i32 | |
%253 = arith.sitofp %252 : i32 to f32 | |
%254 = arith.mulf %253, %cst_10 : f32 | |
linalg.yield %254 : f32 | |
} -> tensor<1x257x257x32xf32> | |
flow.return %238 : tensor<1x257x257x32xf32> | |
} | |
%collapsed_110 = tensor.collapse_shape %7 [[0, 1], [2], [3]] : tensor<1x257x257x32xf32> into tensor<257x257x32xf32> | |
%8 = tensor.empty() : tensor<259x259x32xf32> | |
%9 = linalg.fill ins(%cst_14 : f32) outs(%8 : tensor<259x259x32xf32>) -> tensor<259x259x32xf32> | |
%inserted_slice_111 = tensor.insert_slice %collapsed_110 into %9[1, 1, 0] [257, 257, 32] [1, 1, 1] : tensor<257x257x32xf32> into tensor<259x259x32xf32> | |
%expanded_112 = tensor.expand_shape %inserted_slice_111 [[0], [1], [2, 3]] output_shape [259, 259, 1, 32] : tensor<259x259x32xf32> into tensor<259x259x1x32xf32> | |
%10 = tensor.empty() : tensor<1x32x259x259xf32> | |
%11 = flow.dispatch.region -> (tensor<1x32x259x259xf32>) { | |
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_112 : tensor<259x259x1x32xf32>) outs(%10 : tensor<1x32x259x259xf32>) { | |
^bb0(%in: f32, %out: f32): | |
linalg.yield %in : f32 | |
} -> tensor<1x32x259x259xf32> | |
flow.return %237 : tensor<1x32x259x259xf32> | |
} | |
%12 = tensor.empty() : tensor<1x32x257x257xf32> | |
%13 = linalg.fill ins(%cst_14 : f32) outs(%12 : tensor<1x32x257x257xf32>) -> tensor<1x32x257x257xf32> | |
%14 = flow.dispatch.region -> (tensor<1x32x257x257xf32>) { | |
%237 = linalg.depthwise_conv_2d_nchw_chw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%11, %cst_27 : tensor<1x32x259x259xf32>, tensor<32x3x3xf32>) outs(%13 : tensor<1x32x257x257xf32>) -> tensor<1x32x257x257xf32> | |
flow.return %237 : tensor<1x32x257x257xf32> | |
} | |
%15 = tensor.empty() : tensor<257x257x1x32xf32> | |
%16 = flow.dispatch.region -> (tensor<257x257x1x32xf32>) { | |
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%14 : tensor<1x32x257x257xf32>) outs(%15 : tensor<257x257x1x32xf32>) { | |
^bb0(%in: f32, %out: f32): | |
%238 = arith.cmpf ult, %in, %cst_14 : f32 | |
%239 = arith.select %238, %cst_14, %in : f32 | |
%240 = arith.cmpf ugt, %239, %cst_2 : f32 | |
%241 = arith.select %240, %cst_2, %239 : f32 | |
linalg.yield %241 : f32 | |
} -> tensor<257x257x1x32xf32> | |
flow.return %237 : tensor<257x257x1x32xf32> | |
} | |
%collapsed_113 = tensor.collapse_shape %16 [[0], [1], [2, 3]] : tensor<257x257x1x32xf32> into tensor<257x257x32xf32> | |
%expanded_114 = tensor.expand_shape %collapsed_113 [[0, 1], [2], [3]] output_shape [1, 257, 257, 32] : tensor<257x257x32xf32> into tensor<1x257x257x32xf32> | |
%17 = flow.dispatch.region -> (tensor<1x257x257x32xf32>) { | |
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_114 : tensor<1x257x257x32xf32>) outs(%5 : tensor<1x257x257x32xf32>) { | |
^bb0(%in: f32, %out: f32): | |
%238 = arith.divf %in, %cst_10 : f32 | |
%239 = math.round %238 : f32 | |
%240 = arith.addf %239, %cst_14 : f32 | |
%241 = arith.cmpf ult, %240, %cst_16 : f32 | |
%242 = arith.cmpf ugt, %240, %cst_15 : f32 | |
%243 = arith.select %241, %cst_16, %240 : f32 | |
%244 = arith.select %242, %cst_15, %243 : f32 | |
%245 = arith.fptosi %244 : f32 to i8 | |
%246 = arith.extsi %245 : i8 to i32 | |
%247 = arith.sitofp %246 : i32 to f32 | |
%248 = arith.mulf %247, %cst_10 : f32 | |
linalg.yield %248 : f32 | |
} -> tensor<1x257x257x32xf32> | |
flow.return %237 : tensor<1x257x257x32xf32> | |
} | |
%18 = tensor.empty() : tensor<1x257x257x16xf32> | |
%19 = linalg.fill ins(%cst_14 : f32) outs(%18 : tensor<1x257x257x16xf32>) -> tensor<1x257x257x16xf32> | |
%20 = flow.dispatch.region -> (tensor<1x257x257x16xf32>) { | |
%237 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%17, %cst_72 : tensor<1x257x257x32xf32>, tensor<1x1x32x16xf32>) outs(%19 : tensor<1x257x257x16xf32>) -> tensor<1x257x257x16xf32> | |
%238 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%237, %cst_28 : tensor<1x257x257x16xf32>, tensor<16xf32>) outs(%18 : tensor<1x257x257x16xf32>) { | |
^bb0(%in: f32, %in_229: f32, %out: f32): | |
%239 = arith.addf %in, %in_229 : f32 | |
%240 = arith.divf %239, %cst_9 : f32 | |
%241 = math.round %240 : f32 | |
%242 = arith.addf %241, %cst_14 : f32 | |
%243 = arith.cmpf ult, %242, %cst_16 : f32 | |
%244 = arith.cmpf ugt, %242, %cst_15 : f32 | |
%245 = arith.select %243, %cst_16, %242 : f32 | |
%246 = arith.select %244, %cst_15, %245 : f32 | |
%247 = arith.fptosi %246 : f32 to i8 | |
%248 = arith.extsi %247 : i8 to i32 | |
%249 = arith.sitofp %248 : i32 to f32 | |
%250 = arith.mulf %249, %cst_9 : f32 | |
linalg.yield %250 : f32 | |
} -> tensor<1x257x257x16xf32> | |
flow.return %238 : tensor<1x257x257x16xf32> | |
} | |
%21 = tensor.empty() : tensor<1x257x257x96xf32> | |
%22 = linalg.fill ins(%cst_14 : f32) outs(%21 : tensor<1x257x257x96xf32>) -> tensor<1x257x257x96xf32> | |
%23 = flow.dispatch.region -> (tensor<1x257x257x96xf32>) { | |
%237 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%20, %cst_73 : tensor<1x257x257x16xf32>, tensor<1x1x16x96xf32>) outs(%22 : tensor<1x257x257x96xf32>) -> tensor<1x257x257x96xf32> | |
%238 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%237, %cst_18 : tensor<1x257x257x96xf32>, tensor<96xf32>) outs(%21 : tensor<1x257x257x96xf32>) { | |
^bb0(%in: f32, %in_229: f32, %out: f32): | |
%239 = arith.addf %in, %in_229 : f32 | |
%240 = arith.cmpf ult, %239, %cst_14 : f32 | |
%241 = arith.select %240, %cst_14, %239 : f32 | |
%242 = arith.cmpf ugt, %241, %cst_2 : f32 | |
%243 = arith.select %242, %cst_2, %241 : f32 | |
%244 = arith.divf %243, %cst_10 : f32 | |
%245 = math.round %244 : f32 | |
%246 = arith.addf %245, %cst_14 : f32 | |
%247 = arith.cmpf ult, %246, %cst_16 : f32 | |
%248 = arith.cmpf ugt, %246, %cst_15 : f32 | |
%249 = arith.select %247, %cst_16, %246 : f32 | |
%250 = arith.select %248, %cst_15, %249 : f32 | |
%251 = arith.fptosi %250 : f32 to i8 | |
%252 = arith.extsi %251 : i8 to i32 | |
%253 = arith.sitofp %252 : i32 to f32 | |
%254 = arith.mulf %253, %cst_10 : f32 | |
linalg.yield %254 : f32 | |
} -> tensor<1x257x257x96xf32> | |
flow.return %238 : tensor<1x257x257x96xf32> | |
} | |
%collapsed_115 = tensor.collapse_shape %23 [[0, 1], [2], [3]] : tensor<1x257x257x96xf32> into tensor<257x257x96xf32> | |
%24 = tensor.empty() : tensor<259x259x96xf32> | |
%25 = linalg.fill ins(%cst_14 : f32) outs(%24 : tensor<259x259x96xf32>) -> tensor<259x259x96xf32> | |
%inserted_slice_116 = tensor.insert_slice %collapsed_115 into %25[1, 1, 0] [257, 257, 96] [1, 1, 1] : tensor<257x257x96xf32> into tensor<259x259x96xf32> | |
%expanded_117 = tensor.expand_shape %inserted_slice_116 [[0], [1], [2, 3]] output_shape [259, 259, 1, 96] : tensor<259x259x96xf32> into tensor<259x259x1x96xf32> | |
%26 = tensor.empty() : tensor<1x96x259x259xf32> | |
%27 = flow.dispatch.region -> (tensor<1x96x259x259xf32>) { | |
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_117 : tensor<259x259x1x96xf32>) outs(%26 : tensor<1x96x259x259xf32>) { | |
^bb0(%in: f32, %out: f32): | |
linalg.yield %in : f32 | |
} -> tensor<1x96x259x259xf32> | |
flow.return %237 : tensor<1x96x259x259xf32> | |
} | |
%28 = tensor.empty() : tensor<1x96x129x129xf32> | |
%29 = linalg.fill ins(%cst_14 : f32) outs(%28 : tensor<1x96x129x129xf32>) -> tensor<1x96x129x129xf32> | |
%30 = flow.dispatch.region -> (tensor<1x96x129x129xf32>) { | |
%237 = linalg.depthwise_conv_2d_nchw_chw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%27, %cst_29 : tensor<1x96x259x259xf32>, tensor<96x3x3xf32>) outs(%29 : tensor<1x96x129x129xf32>) -> tensor<1x96x129x129xf32> | |
flow.return %237 : tensor<1x96x129x129xf32> | |
} | |
%31 = tensor.empty() : tensor<129x129x1x96xf32> | |
%32 = flow.dispatch.region -> (tensor<129x129x1x96xf32>) { | |
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%30 : tensor<1x96x129x129xf32>) outs(%31 : tensor<129x129x1x96xf32>) { | |
^bb0(%in: f32, %out: f32): | |
%238 = arith.cmpf ult, %in, %cst_14 : f32 | |
%239 = arith.select %238, %cst_14, %in : f32 | |
%240 = arith.cmpf ugt, %239, %cst_2 : f32 | |
%241 = arith.select %240, %cst_2, %239 : f32 | |
linalg.yield %241 : f32 | |
} -> tensor<129x129x1x96xf32> | |
flow.return %237 : tensor<129x129x1x96xf32> | |
} | |
%collapsed_118 = tensor.collapse_shape %32 [[0], [1], [2, 3]] : tensor<129x129x1x96xf32> into tensor<129x129x96xf32> | |
%expanded_119 = tensor.expand_shape %collapsed_118 [[0, 1], [2], [3]] output_shape [1, 129, 129, 96] : tensor<129x129x96xf32> into tensor<1x129x129x96xf32> | |
%33 = tensor.empty() : tensor<1x129x129x96xf32> | |
%34 = flow.dispatch.region -> (tensor<1x129x129x96xf32>) { | |
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_119 : tensor<1x129x129x96xf32>) outs(%33 : tensor<1x129x129x96xf32>) { | |
^bb0(%in: f32, %out: f32): | |
%238 = arith.divf %in, %cst_10 : f32 | |
%239 = math.round %238 : f32 | |
%240 = arith.addf %239, %cst_14 : f32 | |
%241 = arith.cmpf ult, %240, %cst_16 : f32 | |
%242 = arith.cmpf ugt, %240, %cst_15 : f32 | |
%243 = arith.select %241, %cst_16, %240 : f32 | |
%244 = arith.select %242, %cst_15, %243 : f32 | |
%245 = arith.fptosi %244 : f32 to i8 | |
%246 = arith.extsi %245 : i8 to i32 | |
%247 = arith.sitofp %246 : i32 to f32 | |
%248 = arith.mulf %247, %cst_10 : f32 | |
linalg.yield %248 : f32 | |
} -> tensor<1x129x129x96xf32> | |
flow.return %237 : tensor<1x129x129x96xf32> | |
} | |
%35 = tensor.empty() : tensor<129x129x24xf32> | |
%36 = tensor.empty() : tensor<1x129x129x24xf32> | |
%37 = linalg.fill ins(%cst_14 : f32) outs(%36 : tensor<1x129x129x24xf32>) -> tensor<1x129x129x24xf32> | |
%38 = tensor.empty() : tensor<1x129x129x24xi8> | |
%39 = flow.dispatch.region -> (tensor<1x129x129x24xi8>) { | |
%237 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%34, %cst_74 : tensor<1x129x129x96xf32>, tensor<1x1x96x24xf32>) outs(%37 : tensor<1x129x129x24xf32>) -> tensor<1x129x129x24xf32> | |
%238 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%237, %cst_30 : tensor<1x129x129x24xf32>, tensor<24xf32>) outs(%38 : tensor<1x129x129x24xi8>) { | |
^bb0(%in: f32, %in_229: f32, %out: i8): | |
%239 = arith.addf %in, %in_229 : f32 | |
%240 = arith.divf %239, %cst_9 : f32 | |
%241 = math.round %240 : f32 | |
%242 = arith.addf %241, %cst_14 : f32 | |
%243 = arith.cmpf ult, %242, %cst_16 : f32 | |
%244 = arith.cmpf ugt, %242, %cst_15 : f32 | |
%245 = arith.select %243, %cst_16, %242 : f32 | |
%246 = arith.select %244, %cst_15, %245 : f32 | |
%247 = arith.fptosi %246 : f32 to i8 | |
linalg.yield %247 : i8 | |
} -> tensor<1x129x129x24xi8> | |
flow.return %238 : tensor<1x129x129x24xi8> | |
} | |
%collapsed_120 = tensor.collapse_shape %39 [[0, 1], [2], [3]] : tensor<1x129x129x24xi8> into tensor<129x129x24xi8> | |
%40 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%collapsed_120 : tensor<129x129x24xi8>) outs(%35 : tensor<129x129x24xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%237 = arith.extsi %in : i8 to i32 | |
%238 = arith.sitofp %237 : i32 to f32 | |
%239 = arith.mulf %238, %cst_9 : f32 | |
linalg.yield %239 : f32 | |
} -> tensor<129x129x24xf32> | |
%expanded_121 = tensor.expand_shape %40 [[0, 1], [2], [3]] output_shape [1, 129, 129, 24] : tensor<129x129x24xf32> into tensor<1x129x129x24xf32> | |
%41 = tensor.empty() : tensor<1x129x129x144xf32> | |
%42 = linalg.fill ins(%cst_14 : f32) outs(%41 : tensor<1x129x129x144xf32>) -> tensor<1x129x129x144xf32> | |
%43 = flow.dispatch.region -> (tensor<1x129x129x144xf32>) { | |
%237 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%expanded_121, %cst_75 : tensor<1x129x129x24xf32>, tensor<1x1x24x144xf32>) outs(%42 : tensor<1x129x129x144xf32>) -> tensor<1x129x129x144xf32> | |
%238 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%237, %cst_21 : tensor<1x129x129x144xf32>, tensor<144xf32>) outs(%41 : tensor<1x129x129x144xf32>) { | |
^bb0(%in: f32, %in_229: f32, %out: f32): | |
%239 = arith.addf %in, %in_229 : f32 | |
%240 = arith.cmpf ult, %239, %cst_14 : f32 | |
%241 = arith.select %240, %cst_14, %239 : f32 | |
%242 = arith.cmpf ugt, %241, %cst_2 : f32 | |
%243 = arith.select %242, %cst_2, %241 : f32 | |
%244 = arith.divf %243, %cst_10 : f32 | |
%245 = math.round %244 : f32 | |
%246 = arith.addf %245, %cst_14 : f32 | |
%247 = arith.cmpf ult, %246, %cst_16 : f32 | |
%248 = arith.cmpf ugt, %246, %cst_15 : f32 | |
%249 = arith.select %247, %cst_16, %246 : f32 | |
%250 = arith.select %248, %cst_15, %249 : f32 | |
%251 = arith.fptosi %250 : f32 to i8 | |
%252 = arith.extsi %251 : i8 to i32 | |
%253 = arith.sitofp %252 : i32 to f32 | |
%254 = arith.mulf %253, %cst_10 : f32 | |
linalg.yield %254 : f32 | |
} -> tensor<1x129x129x144xf32> | |
flow.return %238 : tensor<1x129x129x144xf32> | |
} | |
%collapsed_122 = tensor.collapse_shape %43 [[0, 1], [2], [3]] : tensor<1x129x129x144xf32> into tensor<129x129x144xf32> | |
%44 = tensor.empty() : tensor<131x131x144xf32> | |
%45 = linalg.fill ins(%cst_14 : f32) outs(%44 : tensor<131x131x144xf32>) -> tensor<131x131x144xf32> | |
%inserted_slice_123 = tensor.insert_slice %collapsed_122 into %45[1, 1, 0] [129, 129, 144] [1, 1, 1] : tensor<129x129x144xf32> into tensor<131x131x144xf32> | |
%expanded_124 = tensor.expand_shape %inserted_slice_123 [[0], [1], [2, 3]] output_shape [131, 131, 1, 144] : tensor<131x131x144xf32> into tensor<131x131x1x144xf32> | |
%46 = tensor.empty() : tensor<1x144x131x131xf32> | |
%47 = flow.dispatch.region -> (tensor<1x144x131x131xf32>) { | |
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_124 : tensor<131x131x1x144xf32>) outs(%46 : tensor<1x144x131x131xf32>) { | |
^bb0(%in: f32, %out: f32): | |
linalg.yield %in : f32 | |
} -> tensor<1x144x131x131xf32> | |
flow.return %237 : tensor<1x144x131x131xf32> | |
} | |
%48 = tensor.empty() : tensor<1x144x129x129xf32> | |
%49 = linalg.fill ins(%cst_14 : f32) outs(%48 : tensor<1x144x129x129xf32>) -> tensor<1x144x129x129xf32> | |
%50 = flow.dispatch.region -> (tensor<1x144x129x129xf32>) { | |
%237 = linalg.depthwise_conv_2d_nchw_chw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%47, %cst_50 : tensor<1x144x131x131xf32>, tensor<144x3x3xf32>) outs(%49 : tensor<1x144x129x129xf32>) -> tensor<1x144x129x129xf32> | |
flow.return %237 : tensor<1x144x129x129xf32> | |
} | |
%51 = tensor.empty() : tensor<129x129x1x144xf32> | |
%52 = flow.dispatch.region -> (tensor<129x129x1x144xf32>) { | |
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%50 : tensor<1x144x129x129xf32>) outs(%51 : tensor<129x129x1x144xf32>) { | |
^bb0(%in: f32, %out: f32): | |
%238 = arith.cmpf ult, %in, %cst_14 : f32 | |
%239 = arith.select %238, %cst_14, %in : f32 | |
%240 = arith.cmpf ugt, %239, %cst_2 : f32 | |
%241 = arith.select %240, %cst_2, %239 : f32 | |
linalg.yield %241 : f32 | |
} -> tensor<129x129x1x144xf32> | |
flow.return %237 : tensor<129x129x1x144xf32> | |
} | |
%collapsed_125 = tensor.collapse_shape %52 [[0], [1], [2, 3]] : tensor<129x129x1x144xf32> into tensor<129x129x144xf32> | |
%expanded_126 = tensor.expand_shape %collapsed_125 [[0, 1], [2], [3]] output_shape [1, 129, 129, 144] : tensor<129x129x144xf32> into tensor<1x129x129x144xf32> | |
%53 = flow.dispatch.region -> (tensor<1x129x129x144xf32>) { | |
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_126 : tensor<1x129x129x144xf32>) outs(%41 : tensor<1x129x129x144xf32>) { | |
^bb0(%in: f32, %out: f32): | |
%238 = arith.divf %in, %cst_10 : f32 | |
%239 = math.round %238 : f32 | |
%240 = arith.addf %239, %cst_14 : f32 | |
%241 = arith.cmpf ult, %240, %cst_16 : f32 | |
%242 = arith.cmpf ugt, %240, %cst_15 : f32 | |
%243 = arith.select %241, %cst_16, %240 : f32 | |
%244 = arith.select %242, %cst_15, %243 : f32 | |
%245 = arith.fptosi %244 : f32 to i8 | |
%246 = arith.extsi %245 : i8 to i32 | |
%247 = arith.sitofp %246 : i32 to f32 | |
%248 = arith.mulf %247, %cst_10 : f32 | |
linalg.yield %248 : f32 | |
} -> tensor<1x129x129x144xf32> | |
flow.return %237 : tensor<1x129x129x144xf32> | |
} | |
%54 = flow.dispatch.region -> (tensor<1x129x129x24xf32>) { | |
%237 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%53, %cst_76 : tensor<1x129x129x144xf32>, tensor<1x1x144x24xf32>) outs(%37 : tensor<1x129x129x24xf32>) -> tensor<1x129x129x24xf32> | |
%238 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_121, %237, %cst_51 : tensor<1x129x129x24xf32>, tensor<1x129x129x24xf32>, tensor<24xf32>) outs(%36 : tensor<1x129x129x24xf32>) { | |
^bb0(%in: f32, %in_229: f32, %in_230: f32, %out: f32): | |
%239 = arith.addf %in_229, %in_230 : f32 | |
%240 = arith.divf %239, %cst_9 : f32 | |
%241 = math.round %240 : f32 | |
%242 = arith.addf %241, %cst_14 : f32 | |
%243 = arith.cmpf ult, %242, %cst_16 : f32 | |
%244 = arith.cmpf ugt, %242, %cst_15 : f32 | |
%245 = arith.select %243, %cst_16, %242 : f32 | |
%246 = arith.select %244, %cst_15, %245 : f32 | |
%247 = arith.fptosi %246 : f32 to i8 | |
%248 = arith.extsi %247 : i8 to i32 | |
%249 = arith.sitofp %248 : i32 to f32 | |
%250 = arith.mulf %249, %cst_9 : f32 | |
%251 = arith.addf %in, %250 : f32 | |
%252 = arith.divf %251, %cst_12 : f32 | |
%253 = math.round %252 : f32 | |
%254 = arith.addf %253, %cst_14 : f32 | |
%255 = arith.cmpf ult, %254, %cst_16 : f32 | |
%256 = arith.cmpf ugt, %254, %cst_15 : f32 | |
%257 = arith.select %255, %cst_16, %254 : f32 | |
%258 = arith.select %256, %cst_15, %257 : f32 | |
%259 = arith.fptosi %258 : f32 to i8 | |
%260 = arith.extsi %259 : i8 to i32 | |
%261 = arith.sitofp %260 : i32 to f32 | |
%262 = arith.mulf %261, %cst_12 : f32 | |
linalg.yield %262 : f32 | |
} -> tensor<1x129x129x24xf32> | |
flow.return %238 : tensor<1x129x129x24xf32> | |
} | |
%55 = flow.dispatch.region -> (tensor<1x129x129x144xf32>) { | |
%237 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%54, %cst_77 : tensor<1x129x129x24xf32>, tensor<1x1x24x144xf32>) outs(%42 : tensor<1x129x129x144xf32>) -> tensor<1x129x129x144xf32> | |
%238 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%237, %cst_53 : tensor<1x129x129x144xf32>, tensor<144xf32>) outs(%41 : tensor<1x129x129x144xf32>) { | |
^bb0(%in: f32, %in_229: f32, %out: f32): | |
%239 = arith.addf %in, %in_229 : f32 | |
%240 = arith.cmpf ult, %239, %cst_14 : f32 | |
%241 = arith.select %240, %cst_14, %239 : f32 | |
%242 = arith.cmpf ugt, %241, %cst_2 : f32 | |
%243 = arith.select %242, %cst_2, %241 : f32 | |
%244 = arith.divf %243, %cst_10 : f32 | |
%245 = math.round %244 : f32 | |
%246 = arith.addf %245, %cst_14 : f32 | |
%247 = arith.cmpf ult, %246, %cst_16 : f32 | |
%248 = arith.cmpf ugt, %246, %cst_15 : f32 | |
%249 = arith.select %247, %cst_16, %246 : f32 | |
%250 = arith.select %248, %cst_15, %249 : f32 | |
%251 = arith.fptosi %250 : f32 to i8 | |
%252 = arith.extsi %251 : i8 to i32 | |
%253 = arith.sitofp %252 : i32 to f32 | |
%254 = arith.mulf %253, %cst_10 : f32 | |
linalg.yield %254 : f32 | |
} -> tensor<1x129x129x144xf32> | |
flow.return %238 : tensor<1x129x129x144xf32> | |
} | |
%collapsed_127 = tensor.collapse_shape %55 [[0, 1], [2], [3]] : tensor<1x129x129x144xf32> into tensor<129x129x144xf32> | |
%inserted_slice_128 = tensor.insert_slice %collapsed_127 into %45[1, 1, 0] [129, 129, 144] [1, 1, 1] : tensor<129x129x144xf32> into tensor<131x131x144xf32> | |
%expanded_129 = tensor.expand_shape %inserted_slice_128 [[0], [1], [2, 3]] output_shape [131, 131, 1, 144] : tensor<131x131x144xf32> into tensor<131x131x1x144xf32> | |
%56 = flow.dispatch.region -> (tensor<1x144x131x131xf32>) { | |
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_129 : tensor<131x131x1x144xf32>) outs(%46 : tensor<1x144x131x131xf32>) { | |
^bb0(%in: f32, %out: f32): | |
linalg.yield %in : f32 | |
} -> tensor<1x144x131x131xf32> | |
flow.return %237 : tensor<1x144x131x131xf32> | |
} | |
%57 = tensor.empty() : tensor<1x144x65x65xf32> | |
%58 = linalg.fill ins(%cst_14 : f32) outs(%57 : tensor<1x144x65x65xf32>) -> tensor<1x144x65x65xf32> | |
%59 = flow.dispatch.region -> (tensor<1x144x65x65xf32>) { | |
%237 = linalg.depthwise_conv_2d_nchw_chw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%56, %cst_52 : tensor<1x144x131x131xf32>, tensor<144x3x3xf32>) outs(%58 : tensor<1x144x65x65xf32>) -> tensor<1x144x65x65xf32> | |
flow.return %237 : tensor<1x144x65x65xf32> | |
} | |
%60 = tensor.empty() : tensor<65x65x1x144xf32> | |
%61 = flow.dispatch.region -> (tensor<65x65x1x144xf32>) { | |
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%59 : tensor<1x144x65x65xf32>) outs(%60 : tensor<65x65x1x144xf32>) { | |
^bb0(%in: f32, %out: f32): | |
%238 = arith.cmpf ult, %in, %cst_14 : f32 | |
%239 = arith.select %238, %cst_14, %in : f32 | |
%240 = arith.cmpf ugt, %239, %cst_2 : f32 | |
%241 = arith.select %240, %cst_2, %239 : f32 | |
linalg.yield %241 : f32 | |
} -> tensor<65x65x1x144xf32> | |
flow.return %237 : tensor<65x65x1x144xf32> | |
} | |
%collapsed_130 = tensor.collapse_shape %61 [[0], [1], [2, 3]] : tensor<65x65x1x144xf32> into tensor<65x65x144xf32> | |
%expanded_131 = tensor.expand_shape %collapsed_130 [[0, 1], [2], [3]] output_shape [1, 65, 65, 144] : tensor<65x65x144xf32> into tensor<1x65x65x144xf32> | |
%62 = tensor.empty() : tensor<1x65x65x144xf32> | |
%63 = flow.dispatch.region -> (tensor<1x65x65x144xf32>) { | |
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_131 : tensor<1x65x65x144xf32>) outs(%62 : tensor<1x65x65x144xf32>) { | |
^bb0(%in: f32, %out: f32): | |
%238 = arith.divf %in, %cst_10 : f32 | |
%239 = math.round %238 : f32 | |
%240 = arith.addf %239, %cst_14 : f32 | |
%241 = arith.cmpf ult, %240, %cst_16 : f32 | |
%242 = arith.cmpf ugt, %240, %cst_15 : f32 | |
%243 = arith.select %241, %cst_16, %240 : f32 | |
%244 = arith.select %242, %cst_15, %243 : f32 | |
%245 = arith.fptosi %244 : f32 to i8 | |
%246 = arith.extsi %245 : i8 to i32 | |
%247 = arith.sitofp %246 : i32 to f32 | |
%248 = arith.mulf %247, %cst_10 : f32 | |
linalg.yield %248 : f32 | |
} -> tensor<1x65x65x144xf32> | |
flow.return %237 : tensor<1x65x65x144xf32> | |
} | |
%64 = tensor.empty() : tensor<65x65x32xf32> | |
%65 = tensor.empty() : tensor<1x65x65x32xf32> | |
%66 = linalg.fill ins(%cst_14 : f32) outs(%65 : tensor<1x65x65x32xf32>) -> tensor<1x65x65x32xf32> | |
%67 = tensor.empty() : tensor<1x65x65x32xi8> | |
%68 = flow.dispatch.region -> (tensor<1x65x65x32xi8>) { | |
%237 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%63, %cst_78 : tensor<1x65x65x144xf32>, tensor<1x1x144x32xf32>) outs(%66 : tensor<1x65x65x32xf32>) -> tensor<1x65x65x32xf32> | |
%238 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%237, %cst_54 : tensor<1x65x65x32xf32>, tensor<32xf32>) outs(%67 : tensor<1x65x65x32xi8>) { | |
^bb0(%in: f32, %in_229: f32, %out: i8): | |
%239 = arith.addf %in, %in_229 : f32 | |
%240 = arith.divf %239, %cst_9 : f32 | |
%241 = math.round %240 : f32 | |
%242 = arith.addf %241, %cst_14 : f32 | |
%243 = arith.cmpf ult, %242, %cst_16 : f32 | |
%244 = arith.cmpf ugt, %242, %cst_15 : f32 | |
%245 = arith.select %243, %cst_16, %242 : f32 | |
%246 = arith.select %244, %cst_15, %245 : f32 | |
%247 = arith.fptosi %246 : f32 to i8 | |
linalg.yield %247 : i8 | |
} -> tensor<1x65x65x32xi8> | |
flow.return %238 : tensor<1x65x65x32xi8> | |
} | |
%collapsed_132 = tensor.collapse_shape %68 [[0, 1], [2], [3]] : tensor<1x65x65x32xi8> into tensor<65x65x32xi8> | |
%69 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%collapsed_132 : tensor<65x65x32xi8>) outs(%64 : tensor<65x65x32xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%237 = arith.extsi %in : i8 to i32 | |
%238 = arith.sitofp %237 : i32 to f32 | |
%239 = arith.mulf %238, %cst_9 : f32 | |
linalg.yield %239 : f32 | |
} -> tensor<65x65x32xf32> | |
%expanded_133 = tensor.expand_shape %69 [[0, 1], [2], [3]] output_shape [1, 65, 65, 32] : tensor<65x65x32xf32> into tensor<1x65x65x32xf32> | |
%70 = tensor.empty() : tensor<1x65x65x192xf32> | |
%71 = linalg.fill ins(%cst_14 : f32) outs(%70 : tensor<1x65x65x192xf32>) -> tensor<1x65x65x192xf32> | |
%72 = flow.dispatch.region -> (tensor<1x65x65x192xf32>) { | |
%237 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%expanded_133, %cst_79 : tensor<1x65x65x32xf32>, tensor<1x1x32x192xf32>) outs(%71 : tensor<1x65x65x192xf32>) -> tensor<1x65x65x192xf32> | |
%238 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%237, %cst_22 : tensor<1x65x65x192xf32>, tensor<192xf32>) outs(%70 : tensor<1x65x65x192xf32>) { | |
^bb0(%in: f32, %in_229: f32, %out: f32): | |
%239 = arith.addf %in, %in_229 : f32 | |
%240 = arith.cmpf ult, %239, %cst_14 : f32 | |
%241 = arith.select %240, %cst_14, %239 : f32 | |
%242 = arith.cmpf ugt, %241, %cst_2 : f32 | |
%243 = arith.select %242, %cst_2, %241 : f32 | |
%244 = arith.divf %243, %cst_10 : f32 | |
%245 = math.round %244 : f32 | |
%246 = arith.addf %245, %cst_14 : f32 | |
%247 = arith.cmpf ult, %246, %cst_16 : f32 | |
%248 = arith.cmpf ugt, %246, %cst_15 : f32 | |
%249 = arith.select %247, %cst_16, %246 : f32 | |
%250 = arith.select %248, %cst_15, %249 : f32 | |
%251 = arith.fptosi %250 : f32 to i8 | |
%252 = arith.extsi %251 : i8 to i32 | |
%253 = arith.sitofp %252 : i32 to f32 | |
%254 = arith.mulf %253, %cst_10 : f32 | |
linalg.yield %254 : f32 | |
} -> tensor<1x65x65x192xf32> | |
flow.return %238 : tensor<1x65x65x192xf32> | |
} | |
%collapsed_134 = tensor.collapse_shape %72 [[0, 1], [2], [3]] : tensor<1x65x65x192xf32> into tensor<65x65x192xf32> | |
%73 = tensor.empty() : tensor<67x67x192xf32> | |
%74 = linalg.fill ins(%cst_14 : f32) outs(%73 : tensor<67x67x192xf32>) -> tensor<67x67x192xf32> | |
%inserted_slice_135 = tensor.insert_slice %collapsed_134 into %74[1, 1, 0] [65, 65, 192] [1, 1, 1] : tensor<65x65x192xf32> into tensor<67x67x192xf32> | |
%expanded_136 = tensor.expand_shape %inserted_slice_135 [[0], [1], [2, 3]] output_shape [67, 67, 1, 192] : tensor<67x67x192xf32> into tensor<67x67x1x192xf32> | |
%75 = tensor.empty() : tensor<1x192x67x67xf32> | |
%76 = flow.dispatch.region -> (tensor<1x192x67x67xf32>) { | |
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_136 : tensor<67x67x1x192xf32>) outs(%75 : tensor<1x192x67x67xf32>) { | |
^bb0(%in: f32, %out: f32): | |
linalg.yield %in : f32 | |
} -> tensor<1x192x67x67xf32> | |
flow.return %237 : tensor<1x192x67x67xf32> | |
} | |
%77 = tensor.empty() : tensor<1x192x65x65xf32> | |
%78 = linalg.fill ins(%cst_14 : f32) outs(%77 : tensor<1x192x65x65xf32>) -> tensor<1x192x65x65xf32> | |
%79 = flow.dispatch.region -> (tensor<1x192x65x65xf32>) { | |
%237 = linalg.depthwise_conv_2d_nchw_chw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%76, %cst_55 : tensor<1x192x67x67xf32>, tensor<192x3x3xf32>) outs(%78 : tensor<1x192x65x65xf32>) -> tensor<1x192x65x65xf32> | |
flow.return %237 : tensor<1x192x65x65xf32> | |
} | |
%80 = tensor.empty() : tensor<65x65x1x192xf32> | |
%81 = flow.dispatch.region -> (tensor<65x65x1x192xf32>) { | |
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%79 : tensor<1x192x65x65xf32>) outs(%80 : tensor<65x65x1x192xf32>) { | |
^bb0(%in: f32, %out: f32): | |
%238 = arith.cmpf ult, %in, %cst_14 : f32 | |
%239 = arith.select %238, %cst_14, %in : f32 | |
%240 = arith.cmpf ugt, %239, %cst_2 : f32 | |
%241 = arith.select %240, %cst_2, %239 : f32 | |
linalg.yield %241 : f32 | |
} -> tensor<65x65x1x192xf32> | |
flow.return %237 : tensor<65x65x1x192xf32> | |
} | |
%collapsed_137 = tensor.collapse_shape %81 [[0], [1], [2, 3]] : tensor<65x65x1x192xf32> into tensor<65x65x192xf32> | |
%expanded_138 = tensor.expand_shape %collapsed_137 [[0, 1], [2], [3]] output_shape [1, 65, 65, 192] : tensor<65x65x192xf32> into tensor<1x65x65x192xf32> | |
%82 = flow.dispatch.region -> (tensor<1x65x65x192xf32>) { | |
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_138 : tensor<1x65x65x192xf32>) outs(%70 : tensor<1x65x65x192xf32>) { | |
^bb0(%in: f32, %out: f32): | |
%238 = arith.divf %in, %cst_10 : f32 | |
%239 = math.round %238 : f32 | |
%240 = arith.addf %239, %cst_14 : f32 | |
%241 = arith.cmpf ult, %240, %cst_16 : f32 | |
%242 = arith.cmpf ugt, %240, %cst_15 : f32 | |
%243 = arith.select %241, %cst_16, %240 : f32 | |
%244 = arith.select %242, %cst_15, %243 : f32 | |
%245 = arith.fptosi %244 : f32 to i8 | |
%246 = arith.extsi %245 : i8 to i32 | |
%247 = arith.sitofp %246 : i32 to f32 | |
%248 = arith.mulf %247, %cst_10 : f32 | |
linalg.yield %248 : f32 | |
} -> tensor<1x65x65x192xf32> | |
flow.return %237 : tensor<1x65x65x192xf32> | |
} | |
%83 = flow.dispatch.region -> (tensor<1x65x65x32xi8>) { | |
%237 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%82, %cst_80 : tensor<1x65x65x192xf32>, tensor<1x1x192x32xf32>) outs(%66 : tensor<1x65x65x32xf32>) -> tensor<1x65x65x32xf32> | |
%238 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_133, %237, %cst_56 : tensor<1x65x65x32xf32>, tensor<1x65x65x32xf32>, tensor<32xf32>) outs(%67 : tensor<1x65x65x32xi8>) { | |
^bb0(%in: f32, %in_229: f32, %in_230: f32, %out: i8): | |
%239 = arith.addf %in_229, %in_230 : f32 | |
%240 = arith.divf %239, %cst_9 : f32 | |
%241 = math.round %240 : f32 | |
%242 = arith.addf %241, %cst_14 : f32 | |
%243 = arith.cmpf ult, %242, %cst_16 : f32 | |
%244 = arith.cmpf ugt, %242, %cst_15 : f32 | |
%245 = arith.select %243, %cst_16, %242 : f32 | |
%246 = arith.select %244, %cst_15, %245 : f32 | |
%247 = arith.fptosi %246 : f32 to i8 | |
%248 = arith.extsi %247 : i8 to i32 | |
%249 = arith.sitofp %248 : i32 to f32 | |
%250 = arith.mulf %249, %cst_9 : f32 | |
%251 = arith.addf %in, %250 : f32 | |
%252 = arith.divf %251, %cst_9 : f32 | |
%253 = math.round %252 : f32 | |
%254 = arith.addf %253, %cst_14 : f32 | |
%255 = arith.cmpf ult, %254, %cst_16 : f32 | |
%256 = arith.cmpf ugt, %254, %cst_15 : f32 | |
%257 = arith.select %255, %cst_16, %254 : f32 | |
%258 = arith.select %256, %cst_15, %257 : f32 | |
%259 = arith.fptosi %258 : f32 to i8 | |
linalg.yield %259 : i8 | |
} -> tensor<1x65x65x32xi8> | |
flow.return %238 : tensor<1x65x65x32xi8> | |
} | |
%collapsed_139 = tensor.collapse_shape %83 [[0, 1], [2], [3]] : tensor<1x65x65x32xi8> into tensor<65x65x32xi8> | |
%84 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%collapsed_139 : tensor<65x65x32xi8>) outs(%64 : tensor<65x65x32xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%237 = arith.extsi %in : i8 to i32 | |
%238 = arith.sitofp %237 : i32 to f32 | |
%239 = arith.mulf %238, %cst_9 : f32 | |
linalg.yield %239 : f32 | |
} -> tensor<65x65x32xf32> | |
%expanded_140 = tensor.expand_shape %84 [[0, 1], [2], [3]] output_shape [1, 65, 65, 32] : tensor<65x65x32xf32> into tensor<1x65x65x32xf32> | |
%85 = flow.dispatch.region -> (tensor<1x65x65x192xf32>) { | |
%237 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%expanded_140, %cst_81 : tensor<1x65x65x32xf32>, tensor<1x1x32x192xf32>) outs(%71 : tensor<1x65x65x192xf32>) -> tensor<1x65x65x192xf32> | |
%238 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%237, %cst_58 : tensor<1x65x65x192xf32>, tensor<192xf32>) outs(%70 : tensor<1x65x65x192xf32>) { | |
^bb0(%in: f32, %in_229: f32, %out: f32): | |
%239 = arith.addf %in, %in_229 : f32 | |
%240 = arith.cmpf ult, %239, %cst_14 : f32 | |
%241 = arith.select %240, %cst_14, %239 : f32 | |
%242 = arith.cmpf ugt, %241, %cst_2 : f32 | |
%243 = arith.select %242, %cst_2, %241 : f32 | |
%244 = arith.divf %243, %cst_10 : f32 | |
%245 = math.round %244 : f32 | |
%246 = arith.addf %245, %cst_14 : f32 | |
%247 = arith.cmpf ult, %246, %cst_16 : f32 | |
%248 = arith.cmpf ugt, %246, %cst_15 : f32 | |
%249 = arith.select %247, %cst_16, %246 : f32 | |
%250 = arith.select %248, %cst_15, %249 : f32 | |
%251 = arith.fptosi %250 : f32 to i8 | |
%252 = arith.extsi %251 : i8 to i32 | |
%253 = arith.sitofp %252 : i32 to f32 | |
%254 = arith.mulf %253, %cst_10 : f32 | |
linalg.yield %254 : f32 | |
} -> tensor<1x65x65x192xf32> | |
flow.return %238 : tensor<1x65x65x192xf32> | |
} | |
%collapsed_141 = tensor.collapse_shape %85 [[0, 1], [2], [3]] : tensor<1x65x65x192xf32> into tensor<65x65x192xf32> | |
%inserted_slice_142 = tensor.insert_slice %collapsed_141 into %74[1, 1, 0] [65, 65, 192] [1, 1, 1] : tensor<65x65x192xf32> into tensor<67x67x192xf32> | |
%expanded_143 = tensor.expand_shape %inserted_slice_142 [[0], [1], [2, 3]] output_shape [67, 67, 1, 192] : tensor<67x67x192xf32> into tensor<67x67x1x192xf32> | |
%86 = flow.dispatch.region -> (tensor<1x192x67x67xf32>) { | |
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_143 : tensor<67x67x1x192xf32>) outs(%75 : tensor<1x192x67x67xf32>) { | |
^bb0(%in: f32, %out: f32): | |
linalg.yield %in : f32 | |
} -> tensor<1x192x67x67xf32> | |
flow.return %237 : tensor<1x192x67x67xf32> | |
} | |
%87 = flow.dispatch.region -> (tensor<1x192x65x65xf32>) { | |
%237 = linalg.depthwise_conv_2d_nchw_chw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%86, %cst_57 : tensor<1x192x67x67xf32>, tensor<192x3x3xf32>) outs(%78 : tensor<1x192x65x65xf32>) -> tensor<1x192x65x65xf32> | |
flow.return %237 : tensor<1x192x65x65xf32> | |
} | |
%88 = flow.dispatch.region -> (tensor<65x65x1x192xf32>) { | |
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%87 : tensor<1x192x65x65xf32>) outs(%80 : tensor<65x65x1x192xf32>) { | |
^bb0(%in: f32, %out: f32): | |
%238 = arith.cmpf ult, %in, %cst_14 : f32 | |
%239 = arith.select %238, %cst_14, %in : f32 | |
%240 = arith.cmpf ugt, %239, %cst_2 : f32 | |
%241 = arith.select %240, %cst_2, %239 : f32 | |
linalg.yield %241 : f32 | |
} -> tensor<65x65x1x192xf32> | |
flow.return %237 : tensor<65x65x1x192xf32> | |
} | |
%collapsed_144 = tensor.collapse_shape %88 [[0], [1], [2, 3]] : tensor<65x65x1x192xf32> into tensor<65x65x192xf32> | |
%expanded_145 = tensor.expand_shape %collapsed_144 [[0, 1], [2], [3]] output_shape [1, 65, 65, 192] : tensor<65x65x192xf32> into tensor<1x65x65x192xf32> | |
%89 = flow.dispatch.region -> (tensor<1x65x65x192xf32>) { | |
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_145 : tensor<1x65x65x192xf32>) outs(%70 : tensor<1x65x65x192xf32>) { | |
^bb0(%in: f32, %out: f32): | |
%238 = arith.divf %in, %cst_10 : f32 | |
%239 = math.round %238 : f32 | |
%240 = arith.addf %239, %cst_14 : f32 | |
%241 = arith.cmpf ult, %240, %cst_16 : f32 | |
%242 = arith.cmpf ugt, %240, %cst_15 : f32 | |
%243 = arith.select %241, %cst_16, %240 : f32 | |
%244 = arith.select %242, %cst_15, %243 : f32 | |
%245 = arith.fptosi %244 : f32 to i8 | |
%246 = arith.extsi %245 : i8 to i32 | |
%247 = arith.sitofp %246 : i32 to f32 | |
%248 = arith.mulf %247, %cst_10 : f32 | |
linalg.yield %248 : f32 | |
} -> tensor<1x65x65x192xf32> | |
flow.return %237 : tensor<1x65x65x192xf32> | |
} | |
%90 = flow.dispatch.region -> (tensor<1x65x65x32xf32>) { | |
%237 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%89, %cst_82 : tensor<1x65x65x192xf32>, tensor<1x1x192x32xf32>) outs(%66 : tensor<1x65x65x32xf32>) -> tensor<1x65x65x32xf32> | |
%238 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_140, %237, %cst_59 : tensor<1x65x65x32xf32>, tensor<1x65x65x32xf32>, tensor<32xf32>) outs(%65 : tensor<1x65x65x32xf32>) { | |
^bb0(%in: f32, %in_229: f32, %in_230: f32, %out: f32): | |
%239 = arith.addf %in_229, %in_230 : f32 | |
%240 = arith.divf %239, %cst_9 : f32 | |
%241 = math.round %240 : f32 | |
%242 = arith.addf %241, %cst_14 : f32 | |
%243 = arith.cmpf ult, %242, %cst_16 : f32 | |
%244 = arith.cmpf ugt, %242, %cst_15 : f32 | |
%245 = arith.select %243, %cst_16, %242 : f32 | |
%246 = arith.select %244, %cst_15, %245 : f32 | |
%247 = arith.fptosi %246 : f32 to i8 | |
%248 = arith.extsi %247 : i8 to i32 | |
%249 = arith.sitofp %248 : i32 to f32 | |
%250 = arith.mulf %249, %cst_9 : f32 | |
%251 = arith.addf %in, %250 : f32 | |
%252 = arith.divf %251, %cst_9 : f32 | |
%253 = math.round %252 : f32 | |
%254 = arith.addf %253, %cst_14 : f32 | |
%255 = arith.cmpf ult, %254, %cst_16 : f32 | |
%256 = arith.cmpf ugt, %254, %cst_15 : f32 | |
%257 = arith.select %255, %cst_16, %254 : f32 | |
%258 = arith.select %256, %cst_15, %257 : f32 | |
%259 = arith.fptosi %258 : f32 to i8 | |
%260 = arith.extsi %259 : i8 to i32 | |
%261 = arith.sitofp %260 : i32 to f32 | |
%262 = arith.mulf %261, %cst_9 : f32 | |
linalg.yield %262 : f32 | |
} -> tensor<1x65x65x32xf32> | |
flow.return %238 : tensor<1x65x65x32xf32> | |
} | |
%91 = flow.dispatch.region -> (tensor<1x65x65x192xf32>) { | |
%237 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%90, %cst_83 : tensor<1x65x65x32xf32>, tensor<1x1x32x192xf32>) outs(%71 : tensor<1x65x65x192xf32>) -> tensor<1x65x65x192xf32> | |
%238 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%237, %cst_61 : tensor<1x65x65x192xf32>, tensor<192xf32>) outs(%70 : tensor<1x65x65x192xf32>) { | |
^bb0(%in: f32, %in_229: f32, %out: f32): | |
%239 = arith.addf %in, %in_229 : f32 | |
%240 = arith.cmpf ult, %239, %cst_14 : f32 | |
%241 = arith.select %240, %cst_14, %239 : f32 | |
%242 = arith.cmpf ugt, %241, %cst_2 : f32 | |
%243 = arith.select %242, %cst_2, %241 : f32 | |
%244 = arith.divf %243, %cst_10 : f32 | |
%245 = math.round %244 : f32 | |
%246 = arith.addf %245, %cst_14 : f32 | |
%247 = arith.cmpf ult, %246, %cst_16 : f32 | |
%248 = arith.cmpf ugt, %246, %cst_15 : f32 | |
%249 = arith.select %247, %cst_16, %246 : f32 | |
%250 = arith.select %248, %cst_15, %249 : f32 | |
%251 = arith.fptosi %250 : f32 to i8 | |
%252 = arith.extsi %251 : i8 to i32 | |
%253 = arith.sitofp %252 : i32 to f32 | |
%254 = arith.mulf %253, %cst_10 : f32 | |
linalg.yield %254 : f32 | |
} -> tensor<1x65x65x192xf32> | |
flow.return %238 : tensor<1x65x65x192xf32> | |
} | |
%collapsed_146 = tensor.collapse_shape %91 [[0, 1], [2], [3]] : tensor<1x65x65x192xf32> into tensor<65x65x192xf32> | |
%inserted_slice_147 = tensor.insert_slice %collapsed_146 into %74[1, 1, 0] [65, 65, 192] [1, 1, 1] : tensor<65x65x192xf32> into tensor<67x67x192xf32> | |
%expanded_148 = tensor.expand_shape %inserted_slice_147 [[0], [1], [2, 3]] output_shape [67, 67, 1, 192] : tensor<67x67x192xf32> into tensor<67x67x1x192xf32> | |
%92 = flow.dispatch.region -> (tensor<1x192x67x67xf32>) { | |
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_148 : tensor<67x67x1x192xf32>) outs(%75 : tensor<1x192x67x67xf32>) { | |
^bb0(%in: f32, %out: f32): | |
linalg.yield %in : f32 | |
} -> tensor<1x192x67x67xf32> | |
flow.return %237 : tensor<1x192x67x67xf32> | |
} | |
%93 = flow.dispatch.region -> (tensor<1x192x65x65xf32>) { | |
%237 = linalg.depthwise_conv_2d_nchw_chw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%92, %cst_60 : tensor<1x192x67x67xf32>, tensor<192x3x3xf32>) outs(%78 : tensor<1x192x65x65xf32>) -> tensor<1x192x65x65xf32> | |
flow.return %237 : tensor<1x192x65x65xf32> | |
} | |
%94 = flow.dispatch.region -> (tensor<65x65x1x192xf32>) { | |
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%93 : tensor<1x192x65x65xf32>) outs(%80 : tensor<65x65x1x192xf32>) { | |
^bb0(%in: f32, %out: f32): | |
%238 = arith.cmpf ult, %in, %cst_14 : f32 | |
%239 = arith.select %238, %cst_14, %in : f32 | |
%240 = arith.cmpf ugt, %239, %cst_2 : f32 | |
%241 = arith.select %240, %cst_2, %239 : f32 | |
linalg.yield %241 : f32 | |
} -> tensor<65x65x1x192xf32> | |
flow.return %237 : tensor<65x65x1x192xf32> | |
} | |
%collapsed_149 = tensor.collapse_shape %94 [[0], [1], [2, 3]] : tensor<65x65x1x192xf32> into tensor<65x65x192xf32> | |
%expanded_150 = tensor.expand_shape %collapsed_149 [[0, 1], [2], [3]] output_shape [1, 65, 65, 192] : tensor<65x65x192xf32> into tensor<1x65x65x192xf32> | |
%95 = flow.dispatch.region -> (tensor<1x65x65x192xf32>) { | |
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_150 : tensor<1x65x65x192xf32>) outs(%70 : tensor<1x65x65x192xf32>) { | |
^bb0(%in: f32, %out: f32): | |
%238 = arith.divf %in, %cst_10 : f32 | |
%239 = math.round %238 : f32 | |
%240 = arith.addf %239, %cst_14 : f32 | |
%241 = arith.cmpf ult, %240, %cst_16 : f32 | |
%242 = arith.cmpf ugt, %240, %cst_15 : f32 | |
%243 = arith.select %241, %cst_16, %240 : f32 | |
%244 = arith.select %242, %cst_15, %243 : f32 | |
%245 = arith.fptosi %244 : f32 to i8 | |
%246 = arith.extsi %245 : i8 to i32 | |
%247 = arith.sitofp %246 : i32 to f32 | |
%248 = arith.mulf %247, %cst_10 : f32 | |
linalg.yield %248 : f32 | |
} -> tensor<1x65x65x192xf32> | |
flow.return %237 : tensor<1x65x65x192xf32> | |
} | |
%96 = tensor.empty() : tensor<65x65x64xf32> | |
%97 = tensor.empty() : tensor<1x65x65x64xf32> | |
%98 = linalg.fill ins(%cst_14 : f32) outs(%97 : tensor<1x65x65x64xf32>) -> tensor<1x65x65x64xf32> | |
%99 = tensor.empty() : tensor<1x65x65x64xi8> | |
%100 = flow.dispatch.region -> (tensor<1x65x65x64xi8>) { | |
%237 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%95, %cst_84 : tensor<1x65x65x192xf32>, tensor<1x1x192x64xf32>) outs(%98 : tensor<1x65x65x64xf32>) -> tensor<1x65x65x64xf32> | |
%238 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%237, %cst_62 : tensor<1x65x65x64xf32>, tensor<64xf32>) outs(%99 : tensor<1x65x65x64xi8>) { | |
^bb0(%in: f32, %in_229: f32, %out: i8): | |
%239 = arith.addf %in, %in_229 : f32 | |
%240 = arith.divf %239, %cst_8 : f32 | |
%241 = math.round %240 : f32 | |
%242 = arith.addf %241, %cst_14 : f32 | |
%243 = arith.cmpf ult, %242, %cst_16 : f32 | |
%244 = arith.cmpf ugt, %242, %cst_15 : f32 | |
%245 = arith.select %243, %cst_16, %242 : f32 | |
%246 = arith.select %244, %cst_15, %245 : f32 | |
%247 = arith.fptosi %246 : f32 to i8 | |
linalg.yield %247 : i8 | |
} -> tensor<1x65x65x64xi8> | |
flow.return %238 : tensor<1x65x65x64xi8> | |
} | |
%collapsed_151 = tensor.collapse_shape %100 [[0, 1], [2], [3]] : tensor<1x65x65x64xi8> into tensor<65x65x64xi8> | |
%101 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%collapsed_151 : tensor<65x65x64xi8>) outs(%96 : tensor<65x65x64xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%237 = arith.extsi %in : i8 to i32 | |
%238 = arith.sitofp %237 : i32 to f32 | |
%239 = arith.mulf %238, %cst_8 : f32 | |
linalg.yield %239 : f32 | |
} -> tensor<65x65x64xf32> | |
%expanded_152 = tensor.expand_shape %101 [[0, 1], [2], [3]] output_shape [1, 65, 65, 64] : tensor<65x65x64xf32> into tensor<1x65x65x64xf32> | |
%102 = tensor.empty() : tensor<1x65x65x384xf32> | |
%103 = linalg.fill ins(%cst_14 : f32) outs(%102 : tensor<1x65x65x384xf32>) -> tensor<1x65x65x384xf32> | |
%104 = flow.dispatch.region -> (tensor<1x65x65x384xf32>) { | |
%237 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%expanded_152, %cst_85 : tensor<1x65x65x64xf32>, tensor<1x1x64x384xf32>) outs(%103 : tensor<1x65x65x384xf32>) -> tensor<1x65x65x384xf32> | |
%238 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%237, %cst_23 : tensor<1x65x65x384xf32>, tensor<384xf32>) outs(%102 : tensor<1x65x65x384xf32>) { | |
^bb0(%in: f32, %in_229: f32, %out: f32): | |
%239 = arith.addf %in, %in_229 : f32 | |
%240 = arith.cmpf ult, %239, %cst_14 : f32 | |
%241 = arith.select %240, %cst_14, %239 : f32 | |
%242 = arith.cmpf ugt, %241, %cst_2 : f32 | |
%243 = arith.select %242, %cst_2, %241 : f32 | |
%244 = arith.divf %243, %cst_10 : f32 | |
%245 = math.round %244 : f32 | |
%246 = arith.addf %245, %cst_14 : f32 | |
%247 = arith.cmpf ult, %246, %cst_16 : f32 | |
%248 = arith.cmpf ugt, %246, %cst_15 : f32 | |
%249 = arith.select %247, %cst_16, %246 : f32 | |
%250 = arith.select %248, %cst_15, %249 : f32 | |
%251 = arith.fptosi %250 : f32 to i8 | |
%252 = arith.extsi %251 : i8 to i32 | |
%253 = arith.sitofp %252 : i32 to f32 | |
%254 = arith.mulf %253, %cst_10 : f32 | |
linalg.yield %254 : f32 | |
} -> tensor<1x65x65x384xf32> | |
flow.return %238 : tensor<1x65x65x384xf32> | |
} | |
%collapsed_153 = tensor.collapse_shape %104 [[0, 1], [2], [3]] : tensor<1x65x65x384xf32> into tensor<65x65x384xf32> | |
%105 = tensor.empty() : tensor<69x69x384xf32> | |
%106 = linalg.fill ins(%cst_14 : f32) outs(%105 : tensor<69x69x384xf32>) -> tensor<69x69x384xf32> | |
%inserted_slice_154 = tensor.insert_slice %collapsed_153 into %106[2, 2, 0] [65, 65, 384] [1, 1, 1] : tensor<65x65x384xf32> into tensor<69x69x384xf32> | |
%expanded_155 = tensor.expand_shape %inserted_slice_154 [[0], [1], [2, 3]] output_shape [69, 69, 1, 384] : tensor<69x69x384xf32> into tensor<69x69x1x384xf32> | |
%107 = tensor.empty() : tensor<1x384x69x69xf32> | |
%108 = flow.dispatch.region -> (tensor<1x384x69x69xf32>) { | |
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_155 : tensor<69x69x1x384xf32>) outs(%107 : tensor<1x384x69x69xf32>) { | |
^bb0(%in: f32, %out: f32): | |
linalg.yield %in : f32 | |
} -> tensor<1x384x69x69xf32> | |
flow.return %237 : tensor<1x384x69x69xf32> | |
} | |
%109 = tensor.empty() : tensor<1x384x65x65xf32> | |
%110 = linalg.fill ins(%cst_14 : f32) outs(%109 : tensor<1x384x65x65xf32>) -> tensor<1x384x65x65xf32> | |
%111 = flow.dispatch.region -> (tensor<1x384x65x65xf32>) { | |
%237 = linalg.depthwise_conv_2d_nchw_chw {dilations = dense<2> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%108, %cst_63 : tensor<1x384x69x69xf32>, tensor<384x3x3xf32>) outs(%110 : tensor<1x384x65x65xf32>) -> tensor<1x384x65x65xf32> | |
flow.return %237 : tensor<1x384x65x65xf32> | |
} | |
%112 = tensor.empty() : tensor<65x65x1x384xf32> | |
%113 = flow.dispatch.region -> (tensor<65x65x1x384xf32>) { | |
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%111 : tensor<1x384x65x65xf32>) outs(%112 : tensor<65x65x1x384xf32>) { | |
^bb0(%in: f32, %out: f32): | |
%238 = arith.cmpf ult, %in, %cst_14 : f32 | |
%239 = arith.select %238, %cst_14, %in : f32 | |
%240 = arith.cmpf ugt, %239, %cst_2 : f32 | |
%241 = arith.select %240, %cst_2, %239 : f32 | |
linalg.yield %241 : f32 | |
} -> tensor<65x65x1x384xf32> | |
flow.return %237 : tensor<65x65x1x384xf32> | |
} | |
%collapsed_156 = tensor.collapse_shape %113 [[0], [1], [2, 3]] : tensor<65x65x1x384xf32> into tensor<65x65x384xf32> | |
%expanded_157 = tensor.expand_shape %collapsed_156 [[0, 1], [2], [3]] output_shape [1, 65, 65, 384] : tensor<65x65x384xf32> into tensor<1x65x65x384xf32> | |
%114 = flow.dispatch.region -> (tensor<1x65x65x384xf32>) { | |
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_157 : tensor<1x65x65x384xf32>) outs(%102 : tensor<1x65x65x384xf32>) { | |
^bb0(%in: f32, %out: f32): | |
%238 = arith.divf %in, %cst_10 : f32 | |
%239 = math.round %238 : f32 | |
%240 = arith.addf %239, %cst_14 : f32 | |
%241 = arith.cmpf ult, %240, %cst_16 : f32 | |
%242 = arith.cmpf ugt, %240, %cst_15 : f32 | |
%243 = arith.select %241, %cst_16, %240 : f32 | |
%244 = arith.select %242, %cst_15, %243 : f32 | |
%245 = arith.fptosi %244 : f32 to i8 | |
%246 = arith.extsi %245 : i8 to i32 | |
%247 = arith.sitofp %246 : i32 to f32 | |
%248 = arith.mulf %247, %cst_10 : f32 | |
linalg.yield %248 : f32 | |
} -> tensor<1x65x65x384xf32> | |
flow.return %237 : tensor<1x65x65x384xf32> | |
} | |
%115 = flow.dispatch.region -> (tensor<1x65x65x64xi8>) { | |
%237 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%114, %cst_86 : tensor<1x65x65x384xf32>, tensor<1x1x384x64xf32>) outs(%98 : tensor<1x65x65x64xf32>) -> tensor<1x65x65x64xf32> | |
%238 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_152, %237, %cst_64 : tensor<1x65x65x64xf32>, tensor<1x65x65x64xf32>, tensor<64xf32>) outs(%99 : tensor<1x65x65x64xi8>) { | |
^bb0(%in: f32, %in_229: f32, %in_230: f32, %out: i8): | |
%239 = arith.addf %in_229, %in_230 : f32 | |
%240 = arith.divf %239, %cst_8 : f32 | |
%241 = math.round %240 : f32 | |
%242 = arith.addf %241, %cst_14 : f32 | |
%243 = arith.cmpf ult, %242, %cst_16 : f32 | |
%244 = arith.cmpf ugt, %242, %cst_15 : f32 | |
%245 = arith.select %243, %cst_16, %242 : f32 | |
%246 = arith.select %244, %cst_15, %245 : f32 | |
%247 = arith.fptosi %246 : f32 to i8 | |
%248 = arith.extsi %247 : i8 to i32 | |
%249 = arith.sitofp %248 : i32 to f32 | |
%250 = arith.mulf %249, %cst_8 : f32 | |
%251 = arith.addf %in, %250 : f32 | |
%252 = arith.divf %251, %cst_9 : f32 | |
%253 = math.round %252 : f32 | |
%254 = arith.addf %253, %cst_14 : f32 | |
%255 = arith.cmpf ult, %254, %cst_16 : f32 | |
%256 = arith.cmpf ugt, %254, %cst_15 : f32 | |
%257 = arith.select %255, %cst_16, %254 : f32 | |
%258 = arith.select %256, %cst_15, %257 : f32 | |
%259 = arith.fptosi %258 : f32 to i8 | |
linalg.yield %259 : i8 | |
} -> tensor<1x65x65x64xi8> | |
flow.return %238 : tensor<1x65x65x64xi8> | |
} | |
%collapsed_158 = tensor.collapse_shape %115 [[0, 1], [2], [3]] : tensor<1x65x65x64xi8> into tensor<65x65x64xi8> | |
%116 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%collapsed_158 : tensor<65x65x64xi8>) outs(%96 : tensor<65x65x64xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%237 = arith.extsi %in : i8 to i32 | |
%238 = arith.sitofp %237 : i32 to f32 | |
%239 = arith.mulf %238, %cst_9 : f32 | |
linalg.yield %239 : f32 | |
} -> tensor<65x65x64xf32> | |
%expanded_159 = tensor.expand_shape %116 [[0, 1], [2], [3]] output_shape [1, 65, 65, 64] : tensor<65x65x64xf32> into tensor<1x65x65x64xf32> | |
%117 = flow.dispatch.region -> (tensor<1x65x65x384xf32>) { | |
%237 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%expanded_159, %cst_87 : tensor<1x65x65x64xf32>, tensor<1x1x64x384xf32>) outs(%103 : tensor<1x65x65x384xf32>) -> tensor<1x65x65x384xf32> | |
%238 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%237, %cst_66 : tensor<1x65x65x384xf32>, tensor<384xf32>) outs(%102 : tensor<1x65x65x384xf32>) { | |
^bb0(%in: f32, %in_229: f32, %out: f32): | |
%239 = arith.addf %in, %in_229 : f32 | |
%240 = arith.cmpf ult, %239, %cst_14 : f32 | |
%241 = arith.select %240, %cst_14, %239 : f32 | |
%242 = arith.cmpf ugt, %241, %cst_2 : f32 | |
%243 = arith.select %242, %cst_2, %241 : f32 | |
%244 = arith.divf %243, %cst_10 : f32 | |
%245 = math.round %244 : f32 | |
%246 = arith.addf %245, %cst_14 : f32 | |
%247 = arith.cmpf ult, %246, %cst_16 : f32 | |
%248 = arith.cmpf ugt, %246, %cst_15 : f32 | |
%249 = arith.select %247, %cst_16, %246 : f32 | |
%250 = arith.select %248, %cst_15, %249 : f32 | |
%251 = arith.fptosi %250 : f32 to i8 | |
%252 = arith.extsi %251 : i8 to i32 | |
%253 = arith.sitofp %252 : i32 to f32 | |
%254 = arith.mulf %253, %cst_10 : f32 | |
linalg.yield %254 : f32 | |
} -> tensor<1x65x65x384xf32> | |
flow.return %238 : tensor<1x65x65x384xf32> | |
} | |
%collapsed_160 = tensor.collapse_shape %117 [[0, 1], [2], [3]] : tensor<1x65x65x384xf32> into tensor<65x65x384xf32> | |
%inserted_slice_161 = tensor.insert_slice %collapsed_160 into %106[2, 2, 0] [65, 65, 384] [1, 1, 1] : tensor<65x65x384xf32> into tensor<69x69x384xf32> | |
%expanded_162 = tensor.expand_shape %inserted_slice_161 [[0], [1], [2, 3]] output_shape [69, 69, 1, 384] : tensor<69x69x384xf32> into tensor<69x69x1x384xf32> | |
%118 = flow.dispatch.region -> (tensor<1x384x69x69xf32>) { | |
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_162 : tensor<69x69x1x384xf32>) outs(%107 : tensor<1x384x69x69xf32>) { | |
^bb0(%in: f32, %out: f32): | |
linalg.yield %in : f32 | |
} -> tensor<1x384x69x69xf32> | |
flow.return %237 : tensor<1x384x69x69xf32> | |
} | |
%119 = flow.dispatch.region -> (tensor<1x384x65x65xf32>) { | |
%237 = linalg.depthwise_conv_2d_nchw_chw {dilations = dense<2> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%118, %cst_65 : tensor<1x384x69x69xf32>, tensor<384x3x3xf32>) outs(%110 : tensor<1x384x65x65xf32>) -> tensor<1x384x65x65xf32> | |
flow.return %237 : tensor<1x384x65x65xf32> | |
} | |
%120 = flow.dispatch.region -> (tensor<65x65x1x384xf32>) { | |
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%119 : tensor<1x384x65x65xf32>) outs(%112 : tensor<65x65x1x384xf32>) { | |
^bb0(%in: f32, %out: f32): | |
%238 = arith.cmpf ult, %in, %cst_14 : f32 | |
%239 = arith.select %238, %cst_14, %in : f32 | |
%240 = arith.cmpf ugt, %239, %cst_2 : f32 | |
%241 = arith.select %240, %cst_2, %239 : f32 | |
linalg.yield %241 : f32 | |
} -> tensor<65x65x1x384xf32> | |
flow.return %237 : tensor<65x65x1x384xf32> | |
} | |
%collapsed_163 = tensor.collapse_shape %120 [[0], [1], [2, 3]] : tensor<65x65x1x384xf32> into tensor<65x65x384xf32> | |
%expanded_164 = tensor.expand_shape %collapsed_163 [[0, 1], [2], [3]] output_shape [1, 65, 65, 384] : tensor<65x65x384xf32> into tensor<1x65x65x384xf32> | |
%121 = flow.dispatch.region -> (tensor<1x65x65x384xf32>) { | |
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_164 : tensor<1x65x65x384xf32>) outs(%102 : tensor<1x65x65x384xf32>) { | |
^bb0(%in: f32, %out: f32): | |
%238 = arith.divf %in, %cst_10 : f32 | |
%239 = math.round %238 : f32 | |
%240 = arith.addf %239, %cst_14 : f32 | |
%241 = arith.cmpf ult, %240, %cst_16 : f32 | |
%242 = arith.cmpf ugt, %240, %cst_15 : f32 | |
%243 = arith.select %241, %cst_16, %240 : f32 | |
%244 = arith.select %242, %cst_15, %243 : f32 | |
%245 = arith.fptosi %244 : f32 to i8 | |
%246 = arith.extsi %245 : i8 to i32 | |
%247 = arith.sitofp %246 : i32 to f32 | |
%248 = arith.mulf %247, %cst_10 : f32 | |
linalg.yield %248 : f32 | |
} -> tensor<1x65x65x384xf32> | |
flow.return %237 : tensor<1x65x65x384xf32> | |
} | |
%122 = flow.dispatch.region -> (tensor<1x65x65x64xi8>) { | |
%237 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%121, %cst_88 : tensor<1x65x65x384xf32>, tensor<1x1x384x64xf32>) outs(%98 : tensor<1x65x65x64xf32>) -> tensor<1x65x65x64xf32> | |
%238 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_159, %237, %cst_67 : tensor<1x65x65x64xf32>, tensor<1x65x65x64xf32>, tensor<64xf32>) outs(%99 : tensor<1x65x65x64xi8>) { | |
^bb0(%in: f32, %in_229: f32, %in_230: f32, %out: i8): | |
%239 = arith.addf %in_229, %in_230 : f32 | |
%240 = arith.divf %239, %cst_8 : f32 | |
%241 = math.round %240 : f32 | |
%242 = arith.addf %241, %cst_14 : f32 | |
%243 = arith.cmpf ult, %242, %cst_16 : f32 | |
%244 = arith.cmpf ugt, %242, %cst_15 : f32 | |
%245 = arith.select %243, %cst_16, %242 : f32 | |
%246 = arith.select %244, %cst_15, %245 : f32 | |
%247 = arith.fptosi %246 : f32 to i8 | |
%248 = arith.extsi %247 : i8 to i32 | |
%249 = arith.sitofp %248 : i32 to f32 | |
%250 = arith.mulf %249, %cst_8 : f32 | |
%251 = arith.addf %in, %250 : f32 | |
%252 = arith.divf %251, %cst_9 : f32 | |
%253 = math.round %252 : f32 | |
%254 = arith.addf %253, %cst_14 : f32 | |
%255 = arith.cmpf ult, %254, %cst_16 : f32 | |
%256 = arith.cmpf ugt, %254, %cst_15 : f32 | |
%257 = arith.select %255, %cst_16, %254 : f32 | |
%258 = arith.select %256, %cst_15, %257 : f32 | |
%259 = arith.fptosi %258 : f32 to i8 | |
linalg.yield %259 : i8 | |
} -> tensor<1x65x65x64xi8> | |
flow.return %238 : tensor<1x65x65x64xi8> | |
} | |
%collapsed_165 = tensor.collapse_shape %122 [[0, 1], [2], [3]] : tensor<1x65x65x64xi8> into tensor<65x65x64xi8> | |
%123 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%collapsed_165 : tensor<65x65x64xi8>) outs(%96 : tensor<65x65x64xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%237 = arith.extsi %in : i8 to i32 | |
%238 = arith.sitofp %237 : i32 to f32 | |
%239 = arith.mulf %238, %cst_9 : f32 | |
linalg.yield %239 : f32 | |
} -> tensor<65x65x64xf32> | |
%expanded_166 = tensor.expand_shape %123 [[0, 1], [2], [3]] output_shape [1, 65, 65, 64] : tensor<65x65x64xf32> into tensor<1x65x65x64xf32> | |
%124 = flow.dispatch.region -> (tensor<1x65x65x384xf32>) { | |
%237 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%expanded_166, %cst_89 : tensor<1x65x65x64xf32>, tensor<1x1x64x384xf32>) outs(%103 : tensor<1x65x65x384xf32>) -> tensor<1x65x65x384xf32> | |
%238 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%237, %cst_69 : tensor<1x65x65x384xf32>, tensor<384xf32>) outs(%102 : tensor<1x65x65x384xf32>) { | |
^bb0(%in: f32, %in_229: f32, %out: f32): | |
%239 = arith.addf %in, %in_229 : f32 | |
%240 = arith.cmpf ult, %239, %cst_14 : f32 | |
%241 = arith.select %240, %cst_14, %239 : f32 | |
%242 = arith.cmpf ugt, %241, %cst_2 : f32 | |
%243 = arith.select %242, %cst_2, %241 : f32 | |
%244 = arith.divf %243, %cst_10 : f32 | |
%245 = math.round %244 : f32 | |
%246 = arith.addf %245, %cst_14 : f32 | |
%247 = arith.cmpf ult, %246, %cst_16 : f32 | |
%248 = arith.cmpf ugt, %246, %cst_15 : f32 | |
%249 = arith.select %247, %cst_16, %246 : f32 | |
%250 = arith.select %248, %cst_15, %249 : f32 | |
%251 = arith.fptosi %250 : f32 to i8 | |
%252 = arith.extsi %251 : i8 to i32 | |
%253 = arith.sitofp %252 : i32 to f32 | |
%254 = arith.mulf %253, %cst_10 : f32 | |
linalg.yield %254 : f32 | |
} -> tensor<1x65x65x384xf32> | |
flow.return %238 : tensor<1x65x65x384xf32> | |
} | |
%collapsed_167 = tensor.collapse_shape %124 [[0, 1], [2], [3]] : tensor<1x65x65x384xf32> into tensor<65x65x384xf32> | |
%inserted_slice_168 = tensor.insert_slice %collapsed_167 into %106[2, 2, 0] [65, 65, 384] [1, 1, 1] : tensor<65x65x384xf32> into tensor<69x69x384xf32> | |
%expanded_169 = tensor.expand_shape %inserted_slice_168 [[0], [1], [2, 3]] output_shape [69, 69, 1, 384] : tensor<69x69x384xf32> into tensor<69x69x1x384xf32> | |
%125 = flow.dispatch.region -> (tensor<1x384x69x69xf32>) { | |
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_169 : tensor<69x69x1x384xf32>) outs(%107 : tensor<1x384x69x69xf32>) { | |
^bb0(%in: f32, %out: f32): | |
linalg.yield %in : f32 | |
} -> tensor<1x384x69x69xf32> | |
flow.return %237 : tensor<1x384x69x69xf32> | |
} | |
%126 = flow.dispatch.region -> (tensor<1x384x65x65xf32>) { | |
%237 = linalg.depthwise_conv_2d_nchw_chw {dilations = dense<2> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%125, %cst_68 : tensor<1x384x69x69xf32>, tensor<384x3x3xf32>) outs(%110 : tensor<1x384x65x65xf32>) -> tensor<1x384x65x65xf32> | |
flow.return %237 : tensor<1x384x65x65xf32> | |
} | |
%127 = flow.dispatch.region -> (tensor<65x65x1x384xf32>) { | |
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%126 : tensor<1x384x65x65xf32>) outs(%112 : tensor<65x65x1x384xf32>) { | |
^bb0(%in: f32, %out: f32): | |
%238 = arith.cmpf ult, %in, %cst_14 : f32 | |
%239 = arith.select %238, %cst_14, %in : f32 | |
%240 = arith.cmpf ugt, %239, %cst_2 : f32 | |
%241 = arith.select %240, %cst_2, %239 : f32 | |
linalg.yield %241 : f32 | |
} -> tensor<65x65x1x384xf32> | |
flow.return %237 : tensor<65x65x1x384xf32> | |
} | |
%collapsed_170 = tensor.collapse_shape %127 [[0], [1], [2, 3]] : tensor<65x65x1x384xf32> into tensor<65x65x384xf32> | |
%expanded_171 = tensor.expand_shape %collapsed_170 [[0, 1], [2], [3]] output_shape [1, 65, 65, 384] : tensor<65x65x384xf32> into tensor<1x65x65x384xf32> | |
%128 = flow.dispatch.region -> (tensor<1x65x65x384xf32>) { | |
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_171 : tensor<1x65x65x384xf32>) outs(%102 : tensor<1x65x65x384xf32>) { | |
^bb0(%in: f32, %out: f32): | |
%238 = arith.divf %in, %cst_10 : f32 | |
%239 = math.round %238 : f32 | |
%240 = arith.addf %239, %cst_14 : f32 | |
%241 = arith.cmpf ult, %240, %cst_16 : f32 | |
%242 = arith.cmpf ugt, %240, %cst_15 : f32 | |
%243 = arith.select %241, %cst_16, %240 : f32 | |
%244 = arith.select %242, %cst_15, %243 : f32 | |
%245 = arith.fptosi %244 : f32 to i8 | |
%246 = arith.extsi %245 : i8 to i32 | |
%247 = arith.sitofp %246 : i32 to f32 | |
%248 = arith.mulf %247, %cst_10 : f32 | |
linalg.yield %248 : f32 | |
} -> tensor<1x65x65x384xf32> | |
flow.return %237 : tensor<1x65x65x384xf32> | |
} | |
%129 = flow.dispatch.region -> (tensor<1x65x65x64xf32>) { | |
%237 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%128, %cst_90 : tensor<1x65x65x384xf32>, tensor<1x1x384x64xf32>) outs(%98 : tensor<1x65x65x64xf32>) -> tensor<1x65x65x64xf32> | |
%238 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_166, %237, %cst_70 : tensor<1x65x65x64xf32>, tensor<1x65x65x64xf32>, tensor<64xf32>) outs(%97 : tensor<1x65x65x64xf32>) { | |
^bb0(%in: f32, %in_229: f32, %in_230: f32, %out: f32): | |
%239 = arith.addf %in_229, %in_230 : f32 | |
%240 = arith.divf %239, %cst_8 : f32 | |
%241 = math.round %240 : f32 | |
%242 = arith.addf %241, %cst_14 : f32 | |
%243 = arith.cmpf ult, %242, %cst_16 : f32 | |
%244 = arith.cmpf ugt, %242, %cst_15 : f32 | |
%245 = arith.select %243, %cst_16, %242 : f32 | |
%246 = arith.select %244, %cst_15, %245 : f32 | |
%247 = arith.fptosi %246 : f32 to i8 | |
%248 = arith.extsi %247 : i8 to i32 | |
%249 = arith.sitofp %248 : i32 to f32 | |
%250 = arith.mulf %249, %cst_8 : f32 | |
%251 = arith.addf %in, %250 : f32 | |
%252 = arith.divf %251, %cst_9 : f32 | |
%253 = math.round %252 : f32 | |
%254 = arith.addf %253, %cst_14 : f32 | |
%255 = arith.cmpf ult, %254, %cst_16 : f32 | |
%256 = arith.cmpf ugt, %254, %cst_15 : f32 | |
%257 = arith.select %255, %cst_16, %254 : f32 | |
%258 = arith.select %256, %cst_15, %257 : f32 | |
%259 = arith.fptosi %258 : f32 to i8 | |
%260 = arith.extsi %259 : i8 to i32 | |
%261 = arith.sitofp %260 : i32 to f32 | |
%262 = arith.mulf %261, %cst_9 : f32 | |
linalg.yield %262 : f32 | |
} -> tensor<1x65x65x64xf32> | |
flow.return %238 : tensor<1x65x65x64xf32> | |
} | |
%130 = flow.dispatch.region -> (tensor<1x65x65x384xf32>) { | |
%237 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%129, %cst_91 : tensor<1x65x65x64xf32>, tensor<1x1x64x384xf32>) outs(%103 : tensor<1x65x65x384xf32>) -> tensor<1x65x65x384xf32> | |
%238 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%237, %cst_32 : tensor<1x65x65x384xf32>, tensor<384xf32>) outs(%102 : tensor<1x65x65x384xf32>) { | |
^bb0(%in: f32, %in_229: f32, %out: f32): | |
%239 = arith.addf %in, %in_229 : f32 | |
%240 = arith.cmpf ult, %239, %cst_14 : f32 | |
%241 = arith.select %240, %cst_14, %239 : f32 | |
%242 = arith.cmpf ugt, %241, %cst_2 : f32 | |
%243 = arith.select %242, %cst_2, %241 : f32 | |
%244 = arith.divf %243, %cst_10 : f32 | |
%245 = math.round %244 : f32 | |
%246 = arith.addf %245, %cst_14 : f32 | |
%247 = arith.cmpf ult, %246, %cst_16 : f32 | |
%248 = arith.cmpf ugt, %246, %cst_15 : f32 | |
%249 = arith.select %247, %cst_16, %246 : f32 | |
%250 = arith.select %248, %cst_15, %249 : f32 | |
%251 = arith.fptosi %250 : f32 to i8 | |
%252 = arith.extsi %251 : i8 to i32 | |
%253 = arith.sitofp %252 : i32 to f32 | |
%254 = arith.mulf %253, %cst_10 : f32 | |
linalg.yield %254 : f32 | |
} -> tensor<1x65x65x384xf32> | |
flow.return %238 : tensor<1x65x65x384xf32> | |
} | |
%collapsed_172 = tensor.collapse_shape %130 [[0, 1], [2], [3]] : tensor<1x65x65x384xf32> into tensor<65x65x384xf32> | |
%inserted_slice_173 = tensor.insert_slice %collapsed_172 into %106[2, 2, 0] [65, 65, 384] [1, 1, 1] : tensor<65x65x384xf32> into tensor<69x69x384xf32> | |
%expanded_174 = tensor.expand_shape %inserted_slice_173 [[0], [1], [2, 3]] output_shape [69, 69, 1, 384] : tensor<69x69x384xf32> into tensor<69x69x1x384xf32> | |
%131 = flow.dispatch.region -> (tensor<1x384x69x69xf32>) { | |
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_174 : tensor<69x69x1x384xf32>) outs(%107 : tensor<1x384x69x69xf32>) { | |
^bb0(%in: f32, %out: f32): | |
linalg.yield %in : f32 | |
} -> tensor<1x384x69x69xf32> | |
flow.return %237 : tensor<1x384x69x69xf32> | |
} | |
%132 = flow.dispatch.region -> (tensor<1x384x65x65xf32>) { | |
%237 = linalg.depthwise_conv_2d_nchw_chw {dilations = dense<2> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%131, %cst_31 : tensor<1x384x69x69xf32>, tensor<384x3x3xf32>) outs(%110 : tensor<1x384x65x65xf32>) -> tensor<1x384x65x65xf32> | |
flow.return %237 : tensor<1x384x65x65xf32> | |
} | |
%133 = flow.dispatch.region -> (tensor<65x65x1x384xf32>) { | |
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%132 : tensor<1x384x65x65xf32>) outs(%112 : tensor<65x65x1x384xf32>) { | |
^bb0(%in: f32, %out: f32): | |
%238 = arith.cmpf ult, %in, %cst_14 : f32 | |
%239 = arith.select %238, %cst_14, %in : f32 | |
%240 = arith.cmpf ugt, %239, %cst_2 : f32 | |
%241 = arith.select %240, %cst_2, %239 : f32 | |
linalg.yield %241 : f32 | |
} -> tensor<65x65x1x384xf32> | |
flow.return %237 : tensor<65x65x1x384xf32> | |
} | |
%collapsed_175 = tensor.collapse_shape %133 [[0], [1], [2, 3]] : tensor<65x65x1x384xf32> into tensor<65x65x384xf32> | |
%expanded_176 = tensor.expand_shape %collapsed_175 [[0, 1], [2], [3]] output_shape [1, 65, 65, 384] : tensor<65x65x384xf32> into tensor<1x65x65x384xf32> | |
%134 = flow.dispatch.region -> (tensor<1x65x65x384xf32>) { | |
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_176 : tensor<1x65x65x384xf32>) outs(%102 : tensor<1x65x65x384xf32>) { | |
^bb0(%in: f32, %out: f32): | |
%238 = arith.divf %in, %cst_10 : f32 | |
%239 = math.round %238 : f32 | |
%240 = arith.addf %239, %cst_14 : f32 | |
%241 = arith.cmpf ult, %240, %cst_16 : f32 | |
%242 = arith.cmpf ugt, %240, %cst_15 : f32 | |
%243 = arith.select %241, %cst_16, %240 : f32 | |
%244 = arith.select %242, %cst_15, %243 : f32 | |
%245 = arith.fptosi %244 : f32 to i8 | |
%246 = arith.extsi %245 : i8 to i32 | |
%247 = arith.sitofp %246 : i32 to f32 | |
%248 = arith.mulf %247, %cst_10 : f32 | |
linalg.yield %248 : f32 | |
} -> tensor<1x65x65x384xf32> | |
flow.return %237 : tensor<1x65x65x384xf32> | |
} | |
%135 = tensor.empty() : tensor<65x65x96xf32> | |
%136 = tensor.empty() : tensor<1x65x65x96xf32> | |
%137 = linalg.fill ins(%cst_14 : f32) outs(%136 : tensor<1x65x65x96xf32>) -> tensor<1x65x65x96xf32> | |
%138 = tensor.empty() : tensor<1x65x65x96xi8> | |
%139 = flow.dispatch.region -> (tensor<1x65x65x96xi8>) { | |
%237 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%134, %cst_92 : tensor<1x65x65x384xf32>, tensor<1x1x384x96xf32>) outs(%137 : tensor<1x65x65x96xf32>) -> tensor<1x65x65x96xf32> | |
%238 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%237, %cst_33 : tensor<1x65x65x96xf32>, tensor<96xf32>) outs(%138 : tensor<1x65x65x96xi8>) { | |
^bb0(%in: f32, %in_229: f32, %out: i8): | |
%239 = arith.addf %in, %in_229 : f32 | |
%240 = arith.divf %239, %cst_8 : f32 | |
%241 = math.round %240 : f32 | |
%242 = arith.addf %241, %cst_14 : f32 | |
%243 = arith.cmpf ult, %242, %cst_16 : f32 | |
%244 = arith.cmpf ugt, %242, %cst_15 : f32 | |
%245 = arith.select %243, %cst_16, %242 : f32 | |
%246 = arith.select %244, %cst_15, %245 : f32 | |
%247 = arith.fptosi %246 : f32 to i8 | |
linalg.yield %247 : i8 | |
} -> tensor<1x65x65x96xi8> | |
flow.return %238 : tensor<1x65x65x96xi8> | |
} | |
%collapsed_177 = tensor.collapse_shape %139 [[0, 1], [2], [3]] : tensor<1x65x65x96xi8> into tensor<65x65x96xi8> | |
%140 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%collapsed_177 : tensor<65x65x96xi8>) outs(%135 : tensor<65x65x96xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%237 = arith.extsi %in : i8 to i32 | |
%238 = arith.sitofp %237 : i32 to f32 | |
%239 = arith.mulf %238, %cst_8 : f32 | |
linalg.yield %239 : f32 | |
} -> tensor<65x65x96xf32> | |
%expanded_178 = tensor.expand_shape %140 [[0, 1], [2], [3]] output_shape [1, 65, 65, 96] : tensor<65x65x96xf32> into tensor<1x65x65x96xf32> | |
%141 = tensor.empty() : tensor<1x65x65x576xf32> | |
%142 = linalg.fill ins(%cst_14 : f32) outs(%141 : tensor<1x65x65x576xf32>) -> tensor<1x65x65x576xf32> | |
%143 = flow.dispatch.region -> (tensor<1x65x65x576xf32>) { | |
%237 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%expanded_178, %cst_93 : tensor<1x65x65x96xf32>, tensor<1x1x96x576xf32>) outs(%142 : tensor<1x65x65x576xf32>) -> tensor<1x65x65x576xf32> | |
%238 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%237, %cst_19 : tensor<1x65x65x576xf32>, tensor<576xf32>) outs(%141 : tensor<1x65x65x576xf32>) { | |
^bb0(%in: f32, %in_229: f32, %out: f32): | |
%239 = arith.addf %in, %in_229 : f32 | |
%240 = arith.cmpf ult, %239, %cst_14 : f32 | |
%241 = arith.select %240, %cst_14, %239 : f32 | |
%242 = arith.cmpf ugt, %241, %cst_2 : f32 | |
%243 = arith.select %242, %cst_2, %241 : f32 | |
%244 = arith.divf %243, %cst_10 : f32 | |
%245 = math.round %244 : f32 | |
%246 = arith.addf %245, %cst_14 : f32 | |
%247 = arith.cmpf ult, %246, %cst_16 : f32 | |
%248 = arith.cmpf ugt, %246, %cst_15 : f32 | |
%249 = arith.select %247, %cst_16, %246 : f32 | |
%250 = arith.select %248, %cst_15, %249 : f32 | |
%251 = arith.fptosi %250 : f32 to i8 | |
%252 = arith.extsi %251 : i8 to i32 | |
%253 = arith.sitofp %252 : i32 to f32 | |
%254 = arith.mulf %253, %cst_10 : f32 | |
linalg.yield %254 : f32 | |
} -> tensor<1x65x65x576xf32> | |
flow.return %238 : tensor<1x65x65x576xf32> | |
} | |
%collapsed_179 = tensor.collapse_shape %143 [[0, 1], [2], [3]] : tensor<1x65x65x576xf32> into tensor<65x65x576xf32> | |
%144 = tensor.empty() : tensor<69x69x576xf32> | |
%145 = linalg.fill ins(%cst_14 : f32) outs(%144 : tensor<69x69x576xf32>) -> tensor<69x69x576xf32> | |
%inserted_slice_180 = tensor.insert_slice %collapsed_179 into %145[2, 2, 0] [65, 65, 576] [1, 1, 1] : tensor<65x65x576xf32> into tensor<69x69x576xf32> | |
%expanded_181 = tensor.expand_shape %inserted_slice_180 [[0], [1], [2, 3]] output_shape [69, 69, 1, 576] : tensor<69x69x576xf32> into tensor<69x69x1x576xf32> | |
%146 = tensor.empty() : tensor<1x576x69x69xf32> | |
%147 = flow.dispatch.region -> (tensor<1x576x69x69xf32>) { | |
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_181 : tensor<69x69x1x576xf32>) outs(%146 : tensor<1x576x69x69xf32>) { | |
^bb0(%in: f32, %out: f32): | |
linalg.yield %in : f32 | |
} -> tensor<1x576x69x69xf32> | |
flow.return %237 : tensor<1x576x69x69xf32> | |
} | |
%148 = tensor.empty() : tensor<1x576x65x65xf32> | |
%149 = linalg.fill ins(%cst_14 : f32) outs(%148 : tensor<1x576x65x65xf32>) -> tensor<1x576x65x65xf32> | |
%150 = flow.dispatch.region -> (tensor<1x576x65x65xf32>) { | |
%237 = linalg.depthwise_conv_2d_nchw_chw {dilations = dense<2> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%147, %cst_34 : tensor<1x576x69x69xf32>, tensor<576x3x3xf32>) outs(%149 : tensor<1x576x65x65xf32>) -> tensor<1x576x65x65xf32> | |
flow.return %237 : tensor<1x576x65x65xf32> | |
} | |
%151 = tensor.empty() : tensor<65x65x1x576xf32> | |
%152 = flow.dispatch.region -> (tensor<65x65x1x576xf32>) { | |
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%150 : tensor<1x576x65x65xf32>) outs(%151 : tensor<65x65x1x576xf32>) { | |
^bb0(%in: f32, %out: f32): | |
%238 = arith.cmpf ult, %in, %cst_14 : f32 | |
%239 = arith.select %238, %cst_14, %in : f32 | |
%240 = arith.cmpf ugt, %239, %cst_2 : f32 | |
%241 = arith.select %240, %cst_2, %239 : f32 | |
linalg.yield %241 : f32 | |
} -> tensor<65x65x1x576xf32> | |
flow.return %237 : tensor<65x65x1x576xf32> | |
} | |
%collapsed_182 = tensor.collapse_shape %152 [[0], [1], [2, 3]] : tensor<65x65x1x576xf32> into tensor<65x65x576xf32> | |
%expanded_183 = tensor.expand_shape %collapsed_182 [[0, 1], [2], [3]] output_shape [1, 65, 65, 576] : tensor<65x65x576xf32> into tensor<1x65x65x576xf32> | |
%153 = flow.dispatch.region -> (tensor<1x65x65x576xf32>) { | |
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_183 : tensor<1x65x65x576xf32>) outs(%141 : tensor<1x65x65x576xf32>) { | |
^bb0(%in: f32, %out: f32): | |
%238 = arith.divf %in, %cst_10 : f32 | |
%239 = math.round %238 : f32 | |
%240 = arith.addf %239, %cst_14 : f32 | |
%241 = arith.cmpf ult, %240, %cst_16 : f32 | |
%242 = arith.cmpf ugt, %240, %cst_15 : f32 | |
%243 = arith.select %241, %cst_16, %240 : f32 | |
%244 = arith.select %242, %cst_15, %243 : f32 | |
%245 = arith.fptosi %244 : f32 to i8 | |
%246 = arith.extsi %245 : i8 to i32 | |
%247 = arith.sitofp %246 : i32 to f32 | |
%248 = arith.mulf %247, %cst_10 : f32 | |
linalg.yield %248 : f32 | |
} -> tensor<1x65x65x576xf32> | |
flow.return %237 : tensor<1x65x65x576xf32> | |
} | |
%154 = flow.dispatch.region -> (tensor<1x65x65x96xi8>) { | |
%237 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%153, %cst_94 : tensor<1x65x65x576xf32>, tensor<1x1x576x96xf32>) outs(%137 : tensor<1x65x65x96xf32>) -> tensor<1x65x65x96xf32> | |
%238 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_178, %237, %cst_35 : tensor<1x65x65x96xf32>, tensor<1x65x65x96xf32>, tensor<96xf32>) outs(%138 : tensor<1x65x65x96xi8>) { | |
^bb0(%in: f32, %in_229: f32, %in_230: f32, %out: i8): | |
%239 = arith.addf %in_229, %in_230 : f32 | |
%240 = arith.divf %239, %cst_8 : f32 | |
%241 = math.round %240 : f32 | |
%242 = arith.addf %241, %cst_14 : f32 | |
%243 = arith.cmpf ult, %242, %cst_16 : f32 | |
%244 = arith.cmpf ugt, %242, %cst_15 : f32 | |
%245 = arith.select %243, %cst_16, %242 : f32 | |
%246 = arith.select %244, %cst_15, %245 : f32 | |
%247 = arith.fptosi %246 : f32 to i8 | |
%248 = arith.extsi %247 : i8 to i32 | |
%249 = arith.sitofp %248 : i32 to f32 | |
%250 = arith.mulf %249, %cst_8 : f32 | |
%251 = arith.addf %in, %250 : f32 | |
%252 = arith.divf %251, %cst_8 : f32 | |
%253 = math.round %252 : f32 | |
%254 = arith.addf %253, %cst_14 : f32 | |
%255 = arith.cmpf ult, %254, %cst_16 : f32 | |
%256 = arith.cmpf ugt, %254, %cst_15 : f32 | |
%257 = arith.select %255, %cst_16, %254 : f32 | |
%258 = arith.select %256, %cst_15, %257 : f32 | |
%259 = arith.fptosi %258 : f32 to i8 | |
linalg.yield %259 : i8 | |
} -> tensor<1x65x65x96xi8> | |
flow.return %238 : tensor<1x65x65x96xi8> | |
} | |
%collapsed_184 = tensor.collapse_shape %154 [[0, 1], [2], [3]] : tensor<1x65x65x96xi8> into tensor<65x65x96xi8> | |
%155 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%collapsed_184 : tensor<65x65x96xi8>) outs(%135 : tensor<65x65x96xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%237 = arith.extsi %in : i8 to i32 | |
%238 = arith.sitofp %237 : i32 to f32 | |
%239 = arith.mulf %238, %cst_8 : f32 | |
linalg.yield %239 : f32 | |
} -> tensor<65x65x96xf32> | |
%expanded_185 = tensor.expand_shape %155 [[0, 1], [2], [3]] output_shape [1, 65, 65, 96] : tensor<65x65x96xf32> into tensor<1x65x65x96xf32> | |
%156 = flow.dispatch.region -> (tensor<1x65x65x576xf32>) { | |
%237 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%expanded_185, %cst_95 : tensor<1x65x65x96xf32>, tensor<1x1x96x576xf32>) outs(%142 : tensor<1x65x65x576xf32>) -> tensor<1x65x65x576xf32> | |
%238 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%237, %cst_37 : tensor<1x65x65x576xf32>, tensor<576xf32>) outs(%141 : tensor<1x65x65x576xf32>) { | |
^bb0(%in: f32, %in_229: f32, %out: f32): | |
%239 = arith.addf %in, %in_229 : f32 | |
%240 = arith.cmpf ult, %239, %cst_14 : f32 | |
%241 = arith.select %240, %cst_14, %239 : f32 | |
%242 = arith.cmpf ugt, %241, %cst_2 : f32 | |
%243 = arith.select %242, %cst_2, %241 : f32 | |
%244 = arith.divf %243, %cst_10 : f32 | |
%245 = math.round %244 : f32 | |
%246 = arith.addf %245, %cst_14 : f32 | |
%247 = arith.cmpf ult, %246, %cst_16 : f32 | |
%248 = arith.cmpf ugt, %246, %cst_15 : f32 | |
%249 = arith.select %247, %cst_16, %246 : f32 | |
%250 = arith.select %248, %cst_15, %249 : f32 | |
%251 = arith.fptosi %250 : f32 to i8 | |
%252 = arith.extsi %251 : i8 to i32 | |
%253 = arith.sitofp %252 : i32 to f32 | |
%254 = arith.mulf %253, %cst_10 : f32 | |
linalg.yield %254 : f32 | |
} -> tensor<1x65x65x576xf32> | |
flow.return %238 : tensor<1x65x65x576xf32> | |
} | |
%collapsed_186 = tensor.collapse_shape %156 [[0, 1], [2], [3]] : tensor<1x65x65x576xf32> into tensor<65x65x576xf32> | |
%inserted_slice_187 = tensor.insert_slice %collapsed_186 into %145[2, 2, 0] [65, 65, 576] [1, 1, 1] : tensor<65x65x576xf32> into tensor<69x69x576xf32> | |
%expanded_188 = tensor.expand_shape %inserted_slice_187 [[0], [1], [2, 3]] output_shape [69, 69, 1, 576] : tensor<69x69x576xf32> into tensor<69x69x1x576xf32> | |
%157 = flow.dispatch.region -> (tensor<1x576x69x69xf32>) { | |
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_188 : tensor<69x69x1x576xf32>) outs(%146 : tensor<1x576x69x69xf32>) { | |
^bb0(%in: f32, %out: f32): | |
linalg.yield %in : f32 | |
} -> tensor<1x576x69x69xf32> | |
flow.return %237 : tensor<1x576x69x69xf32> | |
} | |
%158 = flow.dispatch.region -> (tensor<1x576x65x65xf32>) { | |
%237 = linalg.depthwise_conv_2d_nchw_chw {dilations = dense<2> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%157, %cst_36 : tensor<1x576x69x69xf32>, tensor<576x3x3xf32>) outs(%149 : tensor<1x576x65x65xf32>) -> tensor<1x576x65x65xf32> | |
flow.return %237 : tensor<1x576x65x65xf32> | |
} | |
%159 = flow.dispatch.region -> (tensor<65x65x1x576xf32>) { | |
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%158 : tensor<1x576x65x65xf32>) outs(%151 : tensor<65x65x1x576xf32>) { | |
^bb0(%in: f32, %out: f32): | |
%238 = arith.cmpf ult, %in, %cst_14 : f32 | |
%239 = arith.select %238, %cst_14, %in : f32 | |
%240 = arith.cmpf ugt, %239, %cst_2 : f32 | |
%241 = arith.select %240, %cst_2, %239 : f32 | |
linalg.yield %241 : f32 | |
} -> tensor<65x65x1x576xf32> | |
flow.return %237 : tensor<65x65x1x576xf32> | |
} | |
%collapsed_189 = tensor.collapse_shape %159 [[0], [1], [2, 3]] : tensor<65x65x1x576xf32> into tensor<65x65x576xf32> | |
%expanded_190 = tensor.expand_shape %collapsed_189 [[0, 1], [2], [3]] output_shape [1, 65, 65, 576] : tensor<65x65x576xf32> into tensor<1x65x65x576xf32> | |
%160 = flow.dispatch.region -> (tensor<1x65x65x576xf32>) { | |
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_190 : tensor<1x65x65x576xf32>) outs(%141 : tensor<1x65x65x576xf32>) { | |
^bb0(%in: f32, %out: f32): | |
%238 = arith.divf %in, %cst_10 : f32 | |
%239 = math.round %238 : f32 | |
%240 = arith.addf %239, %cst_14 : f32 | |
%241 = arith.cmpf ult, %240, %cst_16 : f32 | |
%242 = arith.cmpf ugt, %240, %cst_15 : f32 | |
%243 = arith.select %241, %cst_16, %240 : f32 | |
%244 = arith.select %242, %cst_15, %243 : f32 | |
%245 = arith.fptosi %244 : f32 to i8 | |
%246 = arith.extsi %245 : i8 to i32 | |
%247 = arith.sitofp %246 : i32 to f32 | |
%248 = arith.mulf %247, %cst_10 : f32 | |
linalg.yield %248 : f32 | |
} -> tensor<1x65x65x576xf32> | |
flow.return %237 : tensor<1x65x65x576xf32> | |
} | |
%161 = flow.dispatch.region -> (tensor<1x65x65x96xf32>) { | |
%237 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%160, %cst_96 : tensor<1x65x65x576xf32>, tensor<1x1x576x96xf32>) outs(%137 : tensor<1x65x65x96xf32>) -> tensor<1x65x65x96xf32> | |
%238 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_185, %237, %cst_38 : tensor<1x65x65x96xf32>, tensor<1x65x65x96xf32>, tensor<96xf32>) outs(%136 : tensor<1x65x65x96xf32>) { | |
^bb0(%in: f32, %in_229: f32, %in_230: f32, %out: f32): | |
%239 = arith.addf %in_229, %in_230 : f32 | |
%240 = arith.divf %239, %cst_8 : f32 | |
%241 = math.round %240 : f32 | |
%242 = arith.addf %241, %cst_14 : f32 | |
%243 = arith.cmpf ult, %242, %cst_16 : f32 | |
%244 = arith.cmpf ugt, %242, %cst_15 : f32 | |
%245 = arith.select %243, %cst_16, %242 : f32 | |
%246 = arith.select %244, %cst_15, %245 : f32 | |
%247 = arith.fptosi %246 : f32 to i8 | |
%248 = arith.extsi %247 : i8 to i32 | |
%249 = arith.sitofp %248 : i32 to f32 | |
%250 = arith.mulf %249, %cst_8 : f32 | |
%251 = arith.addf %in, %250 : f32 | |
%252 = arith.divf %251, %cst_8 : f32 | |
%253 = math.round %252 : f32 | |
%254 = arith.addf %253, %cst_14 : f32 | |
%255 = arith.cmpf ult, %254, %cst_16 : f32 | |
%256 = arith.cmpf ugt, %254, %cst_15 : f32 | |
%257 = arith.select %255, %cst_16, %254 : f32 | |
%258 = arith.select %256, %cst_15, %257 : f32 | |
%259 = arith.fptosi %258 : f32 to i8 | |
%260 = arith.extsi %259 : i8 to i32 | |
%261 = arith.sitofp %260 : i32 to f32 | |
%262 = arith.mulf %261, %cst_8 : f32 | |
linalg.yield %262 : f32 | |
} -> tensor<1x65x65x96xf32> | |
flow.return %238 : tensor<1x65x65x96xf32> | |
} | |
%162 = flow.dispatch.region -> (tensor<1x65x65x576xf32>) { | |
%237 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%161, %cst_97 : tensor<1x65x65x96xf32>, tensor<1x1x96x576xf32>) outs(%142 : tensor<1x65x65x576xf32>) -> tensor<1x65x65x576xf32> | |
%238 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%237, %cst_40 : tensor<1x65x65x576xf32>, tensor<576xf32>) outs(%141 : tensor<1x65x65x576xf32>) { | |
^bb0(%in: f32, %in_229: f32, %out: f32): | |
%239 = arith.addf %in, %in_229 : f32 | |
%240 = arith.cmpf ult, %239, %cst_14 : f32 | |
%241 = arith.select %240, %cst_14, %239 : f32 | |
%242 = arith.cmpf ugt, %241, %cst_2 : f32 | |
%243 = arith.select %242, %cst_2, %241 : f32 | |
%244 = arith.divf %243, %cst_10 : f32 | |
%245 = math.round %244 : f32 | |
%246 = arith.addf %245, %cst_14 : f32 | |
%247 = arith.cmpf ult, %246, %cst_16 : f32 | |
%248 = arith.cmpf ugt, %246, %cst_15 : f32 | |
%249 = arith.select %247, %cst_16, %246 : f32 | |
%250 = arith.select %248, %cst_15, %249 : f32 | |
%251 = arith.fptosi %250 : f32 to i8 | |
%252 = arith.extsi %251 : i8 to i32 | |
%253 = arith.sitofp %252 : i32 to f32 | |
%254 = arith.mulf %253, %cst_10 : f32 | |
linalg.yield %254 : f32 | |
} -> tensor<1x65x65x576xf32> | |
flow.return %238 : tensor<1x65x65x576xf32> | |
} | |
%collapsed_191 = tensor.collapse_shape %162 [[0, 1], [2], [3]] : tensor<1x65x65x576xf32> into tensor<65x65x576xf32> | |
%inserted_slice_192 = tensor.insert_slice %collapsed_191 into %145[2, 2, 0] [65, 65, 576] [1, 1, 1] : tensor<65x65x576xf32> into tensor<69x69x576xf32> | |
%expanded_193 = tensor.expand_shape %inserted_slice_192 [[0], [1], [2, 3]] output_shape [69, 69, 1, 576] : tensor<69x69x576xf32> into tensor<69x69x1x576xf32> | |
%163 = flow.dispatch.region -> (tensor<1x576x69x69xf32>) { | |
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_193 : tensor<69x69x1x576xf32>) outs(%146 : tensor<1x576x69x69xf32>) { | |
^bb0(%in: f32, %out: f32): | |
linalg.yield %in : f32 | |
} -> tensor<1x576x69x69xf32> | |
flow.return %237 : tensor<1x576x69x69xf32> | |
} | |
%164 = flow.dispatch.region -> (tensor<1x576x65x65xf32>) { | |
%237 = linalg.depthwise_conv_2d_nchw_chw {dilations = dense<2> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%163, %cst_39 : tensor<1x576x69x69xf32>, tensor<576x3x3xf32>) outs(%149 : tensor<1x576x65x65xf32>) -> tensor<1x576x65x65xf32> | |
flow.return %237 : tensor<1x576x65x65xf32> | |
} | |
%165 = flow.dispatch.region -> (tensor<65x65x1x576xf32>) { | |
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%164 : tensor<1x576x65x65xf32>) outs(%151 : tensor<65x65x1x576xf32>) { | |
^bb0(%in: f32, %out: f32): | |
%238 = arith.cmpf ult, %in, %cst_14 : f32 | |
%239 = arith.select %238, %cst_14, %in : f32 | |
%240 = arith.cmpf ugt, %239, %cst_2 : f32 | |
%241 = arith.select %240, %cst_2, %239 : f32 | |
linalg.yield %241 : f32 | |
} -> tensor<65x65x1x576xf32> | |
flow.return %237 : tensor<65x65x1x576xf32> | |
} | |
%collapsed_194 = tensor.collapse_shape %165 [[0], [1], [2, 3]] : tensor<65x65x1x576xf32> into tensor<65x65x576xf32> | |
%expanded_195 = tensor.expand_shape %collapsed_194 [[0, 1], [2], [3]] output_shape [1, 65, 65, 576] : tensor<65x65x576xf32> into tensor<1x65x65x576xf32> | |
%166 = flow.dispatch.region -> (tensor<1x65x65x576xf32>) { | |
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_195 : tensor<1x65x65x576xf32>) outs(%141 : tensor<1x65x65x576xf32>) { | |
^bb0(%in: f32, %out: f32): | |
%238 = arith.divf %in, %cst_10 : f32 | |
%239 = math.round %238 : f32 | |
%240 = arith.addf %239, %cst_14 : f32 | |
%241 = arith.cmpf ult, %240, %cst_16 : f32 | |
%242 = arith.cmpf ugt, %240, %cst_15 : f32 | |
%243 = arith.select %241, %cst_16, %240 : f32 | |
%244 = arith.select %242, %cst_15, %243 : f32 | |
%245 = arith.fptosi %244 : f32 to i8 | |
%246 = arith.extsi %245 : i8 to i32 | |
%247 = arith.sitofp %246 : i32 to f32 | |
%248 = arith.mulf %247, %cst_10 : f32 | |
linalg.yield %248 : f32 | |
} -> tensor<1x65x65x576xf32> | |
flow.return %237 : tensor<1x65x65x576xf32> | |
} | |
%167 = tensor.empty() : tensor<65x65x160xf32> | |
%168 = tensor.empty() : tensor<1x65x65x160xf32> | |
%169 = linalg.fill ins(%cst_14 : f32) outs(%168 : tensor<1x65x65x160xf32>) -> tensor<1x65x65x160xf32> | |
%170 = tensor.empty() : tensor<1x65x65x160xi8> | |
%171 = flow.dispatch.region -> (tensor<1x65x65x160xi8>) { | |
%237 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%166, %cst_98 : tensor<1x65x65x576xf32>, tensor<1x1x576x160xf32>) outs(%169 : tensor<1x65x65x160xf32>) -> tensor<1x65x65x160xf32> | |
%238 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%237, %cst_41 : tensor<1x65x65x160xf32>, tensor<160xf32>) outs(%170 : tensor<1x65x65x160xi8>) { | |
^bb0(%in: f32, %in_229: f32, %out: i8): | |
%239 = arith.addf %in, %in_229 : f32 | |
%240 = arith.divf %239, %cst_8 : f32 | |
%241 = math.round %240 : f32 | |
%242 = arith.addf %241, %cst_14 : f32 | |
%243 = arith.cmpf ult, %242, %cst_16 : f32 | |
%244 = arith.cmpf ugt, %242, %cst_15 : f32 | |
%245 = arith.select %243, %cst_16, %242 : f32 | |
%246 = arith.select %244, %cst_15, %245 : f32 | |
%247 = arith.fptosi %246 : f32 to i8 | |
linalg.yield %247 : i8 | |
} -> tensor<1x65x65x160xi8> | |
flow.return %238 : tensor<1x65x65x160xi8> | |
} | |
%collapsed_196 = tensor.collapse_shape %171 [[0, 1], [2], [3]] : tensor<1x65x65x160xi8> into tensor<65x65x160xi8> | |
%172 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%collapsed_196 : tensor<65x65x160xi8>) outs(%167 : tensor<65x65x160xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%237 = arith.extsi %in : i8 to i32 | |
%238 = arith.sitofp %237 : i32 to f32 | |
%239 = arith.mulf %238, %cst_8 : f32 | |
linalg.yield %239 : f32 | |
} -> tensor<65x65x160xf32> | |
%expanded_197 = tensor.expand_shape %172 [[0, 1], [2], [3]] output_shape [1, 65, 65, 160] : tensor<65x65x160xf32> into tensor<1x65x65x160xf32> | |
%173 = tensor.empty() : tensor<1x65x65x960xf32> | |
%174 = linalg.fill ins(%cst_14 : f32) outs(%173 : tensor<1x65x65x960xf32>) -> tensor<1x65x65x960xf32> | |
%175 = flow.dispatch.region -> (tensor<1x65x65x960xf32>) { | |
%237 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%expanded_197, %cst_99 : tensor<1x65x65x160xf32>, tensor<1x1x160x960xf32>) outs(%174 : tensor<1x65x65x960xf32>) -> tensor<1x65x65x960xf32> | |
%238 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%237, %cst_20 : tensor<1x65x65x960xf32>, tensor<960xf32>) outs(%173 : tensor<1x65x65x960xf32>) { | |
^bb0(%in: f32, %in_229: f32, %out: f32): | |
%239 = arith.addf %in, %in_229 : f32 | |
%240 = arith.cmpf ult, %239, %cst_14 : f32 | |
%241 = arith.select %240, %cst_14, %239 : f32 | |
%242 = arith.cmpf ugt, %241, %cst_2 : f32 | |
%243 = arith.select %242, %cst_2, %241 : f32 | |
%244 = arith.divf %243, %cst_10 : f32 | |
%245 = math.round %244 : f32 | |
%246 = arith.addf %245, %cst_14 : f32 | |
%247 = arith.cmpf ult, %246, %cst_16 : f32 | |
%248 = arith.cmpf ugt, %246, %cst_15 : f32 | |
%249 = arith.select %247, %cst_16, %246 : f32 | |
%250 = arith.select %248, %cst_15, %249 : f32 | |
%251 = arith.fptosi %250 : f32 to i8 | |
%252 = arith.extsi %251 : i8 to i32 | |
%253 = arith.sitofp %252 : i32 to f32 | |
%254 = arith.mulf %253, %cst_10 : f32 | |
linalg.yield %254 : f32 | |
} -> tensor<1x65x65x960xf32> | |
flow.return %238 : tensor<1x65x65x960xf32> | |
} | |
%collapsed_198 = tensor.collapse_shape %175 [[0, 1], [2], [3]] : tensor<1x65x65x960xf32> into tensor<65x65x960xf32> | |
%176 = tensor.empty() : tensor<73x73x960xf32> | |
%177 = linalg.fill ins(%cst_14 : f32) outs(%176 : tensor<73x73x960xf32>) -> tensor<73x73x960xf32> | |
%inserted_slice_199 = tensor.insert_slice %collapsed_198 into %177[4, 4, 0] [65, 65, 960] [1, 1, 1] : tensor<65x65x960xf32> into tensor<73x73x960xf32> | |
%expanded_200 = tensor.expand_shape %inserted_slice_199 [[0], [1], [2, 3]] output_shape [73, 73, 1, 960] : tensor<73x73x960xf32> into tensor<73x73x1x960xf32> | |
%178 = tensor.empty() : tensor<1x960x73x73xf32> | |
%179 = flow.dispatch.region -> (tensor<1x960x73x73xf32>) { | |
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_200 : tensor<73x73x1x960xf32>) outs(%178 : tensor<1x960x73x73xf32>) { | |
^bb0(%in: f32, %out: f32): | |
linalg.yield %in : f32 | |
} -> tensor<1x960x73x73xf32> | |
flow.return %237 : tensor<1x960x73x73xf32> | |
} | |
%180 = tensor.empty() : tensor<1x960x65x65xf32> | |
%181 = linalg.fill ins(%cst_14 : f32) outs(%180 : tensor<1x960x65x65xf32>) -> tensor<1x960x65x65xf32> | |
%182 = flow.dispatch.region -> (tensor<1x960x65x65xf32>) { | |
%237 = linalg.depthwise_conv_2d_nchw_chw {dilations = dense<4> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%179, %cst_42 : tensor<1x960x73x73xf32>, tensor<960x3x3xf32>) outs(%181 : tensor<1x960x65x65xf32>) -> tensor<1x960x65x65xf32> | |
flow.return %237 : tensor<1x960x65x65xf32> | |
} | |
%183 = tensor.empty() : tensor<65x65x1x960xf32> | |
%184 = flow.dispatch.region -> (tensor<65x65x1x960xf32>) { | |
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%182 : tensor<1x960x65x65xf32>) outs(%183 : tensor<65x65x1x960xf32>) { | |
^bb0(%in: f32, %out: f32): | |
%238 = arith.cmpf ult, %in, %cst_14 : f32 | |
%239 = arith.select %238, %cst_14, %in : f32 | |
%240 = arith.cmpf ugt, %239, %cst_2 : f32 | |
%241 = arith.select %240, %cst_2, %239 : f32 | |
linalg.yield %241 : f32 | |
} -> tensor<65x65x1x960xf32> | |
flow.return %237 : tensor<65x65x1x960xf32> | |
} | |
%collapsed_201 = tensor.collapse_shape %184 [[0], [1], [2, 3]] : tensor<65x65x1x960xf32> into tensor<65x65x960xf32> | |
%expanded_202 = tensor.expand_shape %collapsed_201 [[0, 1], [2], [3]] output_shape [1, 65, 65, 960] : tensor<65x65x960xf32> into tensor<1x65x65x960xf32> | |
%185 = flow.dispatch.region -> (tensor<1x65x65x960xf32>) { | |
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_202 : tensor<1x65x65x960xf32>) outs(%173 : tensor<1x65x65x960xf32>) { | |
^bb0(%in: f32, %out: f32): | |
%238 = arith.divf %in, %cst_10 : f32 | |
%239 = math.round %238 : f32 | |
%240 = arith.addf %239, %cst_14 : f32 | |
%241 = arith.cmpf ult, %240, %cst_16 : f32 | |
%242 = arith.cmpf ugt, %240, %cst_15 : f32 | |
%243 = arith.select %241, %cst_16, %240 : f32 | |
%244 = arith.select %242, %cst_15, %243 : f32 | |
%245 = arith.fptosi %244 : f32 to i8 | |
%246 = arith.extsi %245 : i8 to i32 | |
%247 = arith.sitofp %246 : i32 to f32 | |
%248 = arith.mulf %247, %cst_10 : f32 | |
linalg.yield %248 : f32 | |
} -> tensor<1x65x65x960xf32> | |
flow.return %237 : tensor<1x65x65x960xf32> | |
} | |
%186 = flow.dispatch.region -> (tensor<1x65x65x160xi8>) { | |
%237 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%185, %cst_100 : tensor<1x65x65x960xf32>, tensor<1x1x960x160xf32>) outs(%169 : tensor<1x65x65x160xf32>) -> tensor<1x65x65x160xf32> | |
%238 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_197, %237, %cst_43 : tensor<1x65x65x160xf32>, tensor<1x65x65x160xf32>, tensor<160xf32>) outs(%170 : tensor<1x65x65x160xi8>) { | |
^bb0(%in: f32, %in_229: f32, %in_230: f32, %out: i8): | |
%239 = arith.addf %in_229, %in_230 : f32 | |
%240 = arith.divf %239, %cst_10 : f32 | |
%241 = math.round %240 : f32 | |
%242 = arith.addf %241, %cst_14 : f32 | |
%243 = arith.cmpf ult, %242, %cst_16 : f32 | |
%244 = arith.cmpf ugt, %242, %cst_15 : f32 | |
%245 = arith.select %243, %cst_16, %242 : f32 | |
%246 = arith.select %244, %cst_15, %245 : f32 | |
%247 = arith.fptosi %246 : f32 to i8 | |
%248 = arith.extsi %247 : i8 to i32 | |
%249 = arith.sitofp %248 : i32 to f32 | |
%250 = arith.mulf %249, %cst_10 : f32 | |
%251 = arith.addf %in, %250 : f32 | |
%252 = arith.divf %251, %cst_8 : f32 | |
%253 = math.round %252 : f32 | |
%254 = arith.addf %253, %cst_14 : f32 | |
%255 = arith.cmpf ult, %254, %cst_16 : f32 | |
%256 = arith.cmpf ugt, %254, %cst_15 : f32 | |
%257 = arith.select %255, %cst_16, %254 : f32 | |
%258 = arith.select %256, %cst_15, %257 : f32 | |
%259 = arith.fptosi %258 : f32 to i8 | |
linalg.yield %259 : i8 | |
} -> tensor<1x65x65x160xi8> | |
flow.return %238 : tensor<1x65x65x160xi8> | |
} | |
%collapsed_203 = tensor.collapse_shape %186 [[0, 1], [2], [3]] : tensor<1x65x65x160xi8> into tensor<65x65x160xi8> | |
%187 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%collapsed_203 : tensor<65x65x160xi8>) outs(%167 : tensor<65x65x160xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%237 = arith.extsi %in : i8 to i32 | |
%238 = arith.sitofp %237 : i32 to f32 | |
%239 = arith.mulf %238, %cst_8 : f32 | |
linalg.yield %239 : f32 | |
} -> tensor<65x65x160xf32> | |
%expanded_204 = tensor.expand_shape %187 [[0, 1], [2], [3]] output_shape [1, 65, 65, 160] : tensor<65x65x160xf32> into tensor<1x65x65x160xf32> | |
%188 = flow.dispatch.region -> (tensor<1x65x65x960xf32>) { | |
%237 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%expanded_204, %cst_101 : tensor<1x65x65x160xf32>, tensor<1x1x160x960xf32>) outs(%174 : tensor<1x65x65x960xf32>) -> tensor<1x65x65x960xf32> | |
%238 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%237, %cst_45 : tensor<1x65x65x960xf32>, tensor<960xf32>) outs(%173 : tensor<1x65x65x960xf32>) { | |
^bb0(%in: f32, %in_229: f32, %out: f32): | |
%239 = arith.addf %in, %in_229 : f32 | |
%240 = arith.cmpf ult, %239, %cst_14 : f32 | |
%241 = arith.select %240, %cst_14, %239 : f32 | |
%242 = arith.cmpf ugt, %241, %cst_2 : f32 | |
%243 = arith.select %242, %cst_2, %241 : f32 | |
%244 = arith.divf %243, %cst_10 : f32 | |
%245 = math.round %244 : f32 | |
%246 = arith.addf %245, %cst_14 : f32 | |
%247 = arith.cmpf ult, %246, %cst_16 : f32 | |
%248 = arith.cmpf ugt, %246, %cst_15 : f32 | |
%249 = arith.select %247, %cst_16, %246 : f32 | |
%250 = arith.select %248, %cst_15, %249 : f32 | |
%251 = arith.fptosi %250 : f32 to i8 | |
%252 = arith.extsi %251 : i8 to i32 | |
%253 = arith.sitofp %252 : i32 to f32 | |
%254 = arith.mulf %253, %cst_10 : f32 | |
linalg.yield %254 : f32 | |
} -> tensor<1x65x65x960xf32> | |
flow.return %238 : tensor<1x65x65x960xf32> | |
} | |
%collapsed_205 = tensor.collapse_shape %188 [[0, 1], [2], [3]] : tensor<1x65x65x960xf32> into tensor<65x65x960xf32> | |
%inserted_slice_206 = tensor.insert_slice %collapsed_205 into %177[4, 4, 0] [65, 65, 960] [1, 1, 1] : tensor<65x65x960xf32> into tensor<73x73x960xf32> | |
%expanded_207 = tensor.expand_shape %inserted_slice_206 [[0], [1], [2, 3]] output_shape [73, 73, 1, 960] : tensor<73x73x960xf32> into tensor<73x73x1x960xf32> | |
%189 = flow.dispatch.region -> (tensor<1x960x73x73xf32>) { | |
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_207 : tensor<73x73x1x960xf32>) outs(%178 : tensor<1x960x73x73xf32>) { | |
^bb0(%in: f32, %out: f32): | |
linalg.yield %in : f32 | |
} -> tensor<1x960x73x73xf32> | |
flow.return %237 : tensor<1x960x73x73xf32> | |
} | |
%190 = flow.dispatch.region -> (tensor<1x960x65x65xf32>) { | |
%237 = linalg.depthwise_conv_2d_nchw_chw {dilations = dense<4> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%189, %cst_44 : tensor<1x960x73x73xf32>, tensor<960x3x3xf32>) outs(%181 : tensor<1x960x65x65xf32>) -> tensor<1x960x65x65xf32> | |
flow.return %237 : tensor<1x960x65x65xf32> | |
} | |
%191 = flow.dispatch.region -> (tensor<65x65x1x960xf32>) { | |
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%190 : tensor<1x960x65x65xf32>) outs(%183 : tensor<65x65x1x960xf32>) { | |
^bb0(%in: f32, %out: f32): | |
%238 = arith.cmpf ult, %in, %cst_14 : f32 | |
%239 = arith.select %238, %cst_14, %in : f32 | |
%240 = arith.cmpf ugt, %239, %cst_2 : f32 | |
%241 = arith.select %240, %cst_2, %239 : f32 | |
linalg.yield %241 : f32 | |
} -> tensor<65x65x1x960xf32> | |
flow.return %237 : tensor<65x65x1x960xf32> | |
} | |
%collapsed_208 = tensor.collapse_shape %191 [[0], [1], [2, 3]] : tensor<65x65x1x960xf32> into tensor<65x65x960xf32> | |
%expanded_209 = tensor.expand_shape %collapsed_208 [[0, 1], [2], [3]] output_shape [1, 65, 65, 960] : tensor<65x65x960xf32> into tensor<1x65x65x960xf32> | |
%192 = flow.dispatch.region -> (tensor<1x65x65x960xf32>) { | |
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_209 : tensor<1x65x65x960xf32>) outs(%173 : tensor<1x65x65x960xf32>) { | |
^bb0(%in: f32, %out: f32): | |
%238 = arith.divf %in, %cst_10 : f32 | |
%239 = math.round %238 : f32 | |
%240 = arith.addf %239, %cst_14 : f32 | |
%241 = arith.cmpf ult, %240, %cst_16 : f32 | |
%242 = arith.cmpf ugt, %240, %cst_15 : f32 | |
%243 = arith.select %241, %cst_16, %240 : f32 | |
%244 = arith.select %242, %cst_15, %243 : f32 | |
%245 = arith.fptosi %244 : f32 to i8 | |
%246 = arith.extsi %245 : i8 to i32 | |
%247 = arith.sitofp %246 : i32 to f32 | |
%248 = arith.mulf %247, %cst_10 : f32 | |
linalg.yield %248 : f32 | |
} -> tensor<1x65x65x960xf32> | |
flow.return %237 : tensor<1x65x65x960xf32> | |
} | |
%193 = flow.dispatch.region -> (tensor<1x65x65x160xf32>) { | |
%237 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%192, %cst_102 : tensor<1x65x65x960xf32>, tensor<1x1x960x160xf32>) outs(%169 : tensor<1x65x65x160xf32>) -> tensor<1x65x65x160xf32> | |
%238 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_204, %237, %cst_46 : tensor<1x65x65x160xf32>, tensor<1x65x65x160xf32>, tensor<160xf32>) outs(%168 : tensor<1x65x65x160xf32>) { | |
^bb0(%in: f32, %in_229: f32, %in_230: f32, %out: f32): | |
%239 = arith.addf %in_229, %in_230 : f32 | |
%240 = arith.divf %239, %cst_8 : f32 | |
%241 = math.round %240 : f32 | |
%242 = arith.addf %241, %cst_14 : f32 | |
%243 = arith.cmpf ult, %242, %cst_16 : f32 | |
%244 = arith.cmpf ugt, %242, %cst_15 : f32 | |
%245 = arith.select %243, %cst_16, %242 : f32 | |
%246 = arith.select %244, %cst_15, %245 : f32 | |
%247 = arith.fptosi %246 : f32 to i8 | |
%248 = arith.extsi %247 : i8 to i32 | |
%249 = arith.sitofp %248 : i32 to f32 | |
%250 = arith.mulf %249, %cst_8 : f32 | |
%251 = arith.addf %in, %250 : f32 | |
%252 = arith.divf %251, %cst_8 : f32 | |
%253 = math.round %252 : f32 | |
%254 = arith.addf %253, %cst_14 : f32 | |
%255 = arith.cmpf ult, %254, %cst_16 : f32 | |
%256 = arith.cmpf ugt, %254, %cst_15 : f32 | |
%257 = arith.select %255, %cst_16, %254 : f32 | |
%258 = arith.select %256, %cst_15, %257 : f32 | |
%259 = arith.fptosi %258 : f32 to i8 | |
%260 = arith.extsi %259 : i8 to i32 | |
%261 = arith.sitofp %260 : i32 to f32 | |
%262 = arith.mulf %261, %cst_8 : f32 | |
linalg.yield %262 : f32 | |
} -> tensor<1x65x65x160xf32> | |
flow.return %238 : tensor<1x65x65x160xf32> | |
} | |
%194 = flow.dispatch.region -> (tensor<1x65x65x960xf32>) { | |
%237 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%193, %cst_103 : tensor<1x65x65x160xf32>, tensor<1x1x160x960xf32>) outs(%174 : tensor<1x65x65x960xf32>) -> tensor<1x65x65x960xf32> | |
%238 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%237, %cst_48 : tensor<1x65x65x960xf32>, tensor<960xf32>) outs(%173 : tensor<1x65x65x960xf32>) { | |
^bb0(%in: f32, %in_229: f32, %out: f32): | |
%239 = arith.addf %in, %in_229 : f32 | |
%240 = arith.cmpf ult, %239, %cst_14 : f32 | |
%241 = arith.select %240, %cst_14, %239 : f32 | |
%242 = arith.cmpf ugt, %241, %cst_2 : f32 | |
%243 = arith.select %242, %cst_2, %241 : f32 | |
%244 = arith.divf %243, %cst_10 : f32 | |
%245 = math.round %244 : f32 | |
%246 = arith.addf %245, %cst_14 : f32 | |
%247 = arith.cmpf ult, %246, %cst_16 : f32 | |
%248 = arith.cmpf ugt, %246, %cst_15 : f32 | |
%249 = arith.select %247, %cst_16, %246 : f32 | |
%250 = arith.select %248, %cst_15, %249 : f32 | |
%251 = arith.fptosi %250 : f32 to i8 | |
%252 = arith.extsi %251 : i8 to i32 | |
%253 = arith.sitofp %252 : i32 to f32 | |
%254 = arith.mulf %253, %cst_10 : f32 | |
linalg.yield %254 : f32 | |
} -> tensor<1x65x65x960xf32> | |
flow.return %238 : tensor<1x65x65x960xf32> | |
} | |
%collapsed_210 = tensor.collapse_shape %194 [[0, 1], [2], [3]] : tensor<1x65x65x960xf32> into tensor<65x65x960xf32> | |
%inserted_slice_211 = tensor.insert_slice %collapsed_210 into %177[4, 4, 0] [65, 65, 960] [1, 1, 1] : tensor<65x65x960xf32> into tensor<73x73x960xf32> | |
%expanded_212 = tensor.expand_shape %inserted_slice_211 [[0], [1], [2, 3]] output_shape [73, 73, 1, 960] : tensor<73x73x960xf32> into tensor<73x73x1x960xf32> | |
%195 = flow.dispatch.region -> (tensor<1x960x73x73xf32>) { | |
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_212 : tensor<73x73x1x960xf32>) outs(%178 : tensor<1x960x73x73xf32>) { | |
^bb0(%in: f32, %out: f32): | |
linalg.yield %in : f32 | |
} -> tensor<1x960x73x73xf32> | |
flow.return %237 : tensor<1x960x73x73xf32> | |
} | |
%196 = flow.dispatch.region -> (tensor<1x960x65x65xf32>) { | |
%237 = linalg.depthwise_conv_2d_nchw_chw {dilations = dense<4> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%195, %cst_47 : tensor<1x960x73x73xf32>, tensor<960x3x3xf32>) outs(%181 : tensor<1x960x65x65xf32>) -> tensor<1x960x65x65xf32> | |
flow.return %237 : tensor<1x960x65x65xf32> | |
} | |
%197 = flow.dispatch.region -> (tensor<65x65x1x960xf32>) { | |
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%196 : tensor<1x960x65x65xf32>) outs(%183 : tensor<65x65x1x960xf32>) { | |
^bb0(%in: f32, %out: f32): | |
%238 = arith.cmpf ult, %in, %cst_14 : f32 | |
%239 = arith.select %238, %cst_14, %in : f32 | |
%240 = arith.cmpf ugt, %239, %cst_2 : f32 | |
%241 = arith.select %240, %cst_2, %239 : f32 | |
linalg.yield %241 : f32 | |
} -> tensor<65x65x1x960xf32> | |
flow.return %237 : tensor<65x65x1x960xf32> | |
} | |
%collapsed_213 = tensor.collapse_shape %197 [[0], [1], [2, 3]] : tensor<65x65x1x960xf32> into tensor<65x65x960xf32> | |
%expanded_214 = tensor.expand_shape %collapsed_213 [[0, 1], [2], [3]] output_shape [1, 65, 65, 960] : tensor<65x65x960xf32> into tensor<1x65x65x960xf32> | |
%198 = flow.dispatch.region -> (tensor<1x65x65x960xf32>) { | |
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_214 : tensor<1x65x65x960xf32>) outs(%173 : tensor<1x65x65x960xf32>) { | |
^bb0(%in: f32, %out: f32): | |
%238 = arith.divf %in, %cst_10 : f32 | |
%239 = math.round %238 : f32 | |
%240 = arith.addf %239, %cst_14 : f32 | |
%241 = arith.cmpf ult, %240, %cst_16 : f32 | |
%242 = arith.cmpf ugt, %240, %cst_15 : f32 | |
%243 = arith.select %241, %cst_16, %240 : f32 | |
%244 = arith.select %242, %cst_15, %243 : f32 | |
%245 = arith.fptosi %244 : f32 to i8 | |
%246 = arith.extsi %245 : i8 to i32 | |
%247 = arith.sitofp %246 : i32 to f32 | |
%248 = arith.mulf %247, %cst_10 : f32 | |
linalg.yield %248 : f32 | |
} -> tensor<1x65x65x960xf32> | |
flow.return %237 : tensor<1x65x65x960xf32> | |
} | |
%199 = tensor.empty() : tensor<65x65x320xf32> | |
%200 = tensor.empty() : tensor<1x65x65x320xf32> | |
%201 = linalg.fill ins(%cst_14 : f32) outs(%200 : tensor<1x65x65x320xf32>) -> tensor<1x65x65x320xf32> | |
%202 = tensor.empty() : tensor<1x65x65x320xi8> | |
%203 = flow.dispatch.region -> (tensor<1x65x65x320xi8>) { | |
%237 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%198, %cst_104 : tensor<1x65x65x960xf32>, tensor<1x1x960x320xf32>) outs(%201 : tensor<1x65x65x320xf32>) -> tensor<1x65x65x320xf32> | |
%238 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%237, %cst_49 : tensor<1x65x65x320xf32>, tensor<320xf32>) outs(%202 : tensor<1x65x65x320xi8>) { | |
^bb0(%in: f32, %in_229: f32, %out: i8): | |
%239 = arith.addf %in, %in_229 : f32 | |
%240 = arith.divf %239, %cst_8 : f32 | |
%241 = math.round %240 : f32 | |
%242 = arith.addf %241, %cst_14 : f32 | |
%243 = arith.cmpf ult, %242, %cst_16 : f32 | |
%244 = arith.cmpf ugt, %242, %cst_15 : f32 | |
%245 = arith.select %243, %cst_16, %242 : f32 | |
%246 = arith.select %244, %cst_15, %245 : f32 | |
%247 = arith.fptosi %246 : f32 to i8 | |
linalg.yield %247 : i8 | |
} -> tensor<1x65x65x320xi8> | |
flow.return %238 : tensor<1x65x65x320xi8> | |
} | |
%collapsed_215 = tensor.collapse_shape %203 [[0, 1], [2], [3]] : tensor<1x65x65x320xi8> into tensor<65x65x320xi8> | |
%204 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%collapsed_215 : tensor<65x65x320xi8>) outs(%199 : tensor<65x65x320xf32>) { | |
^bb0(%in: i8, %out: f32): | |
%237 = arith.extsi %in : i8 to i32 | |
%238 = arith.sitofp %237 : i32 to f32 | |
%239 = arith.mulf %238, %cst_8 : f32 | |
linalg.yield %239 : f32 | |
} -> tensor<65x65x320xf32> | |
%expanded_216 = tensor.expand_shape %204 [[0, 1], [2], [3]] output_shape [1, 65, 65, 320] : tensor<65x65x320xf32> into tensor<1x65x65x320xf32> | |
%205 = tensor.empty() : tensor<320x65x65xf32> | |
%206 = flow.dispatch.region -> (tensor<320x65x65xf32>) { | |
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d1, d2, d0)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%204 : tensor<65x65x320xf32>) outs(%205 : tensor<320x65x65xf32>) { | |
^bb0(%in: f32, %out: f32): | |
linalg.yield %in : f32 | |
} -> tensor<320x65x65xf32> | |
flow.return %237 : tensor<320x65x65xf32> | |
} | |
%expanded_217 = tensor.expand_shape %206 [[0, 1], [2], [3]] output_shape [1, 320, 65, 65] : tensor<320x65x65xf32> into tensor<1x320x65x65xf32> | |
%207 = tensor.empty() : tensor<1x320x1x1xf32> | |
%208 = linalg.fill ins(%cst_14 : f32) outs(%207 : tensor<1x320x1x1xf32>) -> tensor<1x320x1x1xf32> | |
%209 = tensor.empty() : tensor<65x65xf32> | |
%210 = flow.dispatch.region -> (tensor<1x320x1x1xf32>) { | |
%237 = linalg.pooling_nchw_sum {dilations = dense<1> : vector<2xi64>, strides = dense<65> : vector<2xi64>} ins(%expanded_217, %209 : tensor<1x320x65x65xf32>, tensor<65x65xf32>) outs(%208 : tensor<1x320x1x1xf32>) -> tensor<1x320x1x1xf32> | |
flow.return %237 : tensor<1x320x1x1xf32> | |
} | |
%collapsed_218 = tensor.collapse_shape %210 [[0, 1, 2, 3]] : tensor<1x320x1x1xf32> into tensor<320xf32> | |
%211 = tensor.empty() : tensor<1x65x65x256xf32> | |
%212 = linalg.fill ins(%cst_14 : f32) outs(%211 : tensor<1x65x65x256xf32>) -> tensor<1x65x65x256xf32> | |
%expanded_219 = tensor.expand_shape %collapsed_218 [[0, 1, 2, 3]] output_shape [1, 1, 1, 320] : tensor<320xf32> into tensor<1x1x1x320xf32> | |
%213 = tensor.empty() : tensor<1x1x1x320xf32> | |
%214 = flow.dispatch.region -> (tensor<1x1x1x320xf32>) { | |
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_219 : tensor<1x1x1x320xf32>) outs(%213 : tensor<1x1x1x320xf32>) { | |
^bb0(%in: f32, %out: f32): | |
%238 = arith.divf %in, %cst_17 : f32 | |
%239 = arith.mulf %238, %cst_1 : f32 | |
%240 = arith.divf %239, %cst_8 : f32 | |
%241 = math.round %240 : f32 | |
%242 = arith.addf %241, %cst_14 : f32 | |
%243 = arith.cmpf ult, %242, %cst_16 : f32 | |
%244 = arith.cmpf ugt, %242, %cst_15 : f32 | |
%245 = arith.select %243, %cst_16, %242 : f32 | |
%246 = arith.select %244, %cst_15, %245 : f32 | |
%247 = arith.fptosi %246 : f32 to i8 | |
%248 = arith.extsi %247 : i8 to i32 | |
%249 = arith.sitofp %248 : i32 to f32 | |
%250 = arith.mulf %249, %cst_8 : f32 | |
linalg.yield %250 : f32 | |
} -> tensor<1x1x1x320xf32> | |
flow.return %237 : tensor<1x1x1x320xf32> | |
} | |
%215 = tensor.empty() : tensor<256x65x65xf32> | |
%216 = tensor.empty() : tensor<256x1x65x65xf32> | |
%217 = flow.dispatch.region -> (tensor<256x1x65x65xf32>) { | |
%237 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%expanded_216, %cst_105 : tensor<1x65x65x320xf32>, tensor<1x1x320x256xf32>) outs(%212 : tensor<1x65x65x256xf32>) -> tensor<1x65x65x256xf32> | |
%238 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%237, %cst_24 : tensor<1x65x65x256xf32>, tensor<256xf32>) outs(%216 : tensor<256x1x65x65xf32>) { | |
^bb0(%in: f32, %in_229: f32, %out: f32): | |
%239 = arith.addf %in, %in_229 : f32 | |
%240 = arith.cmpf ugt, %239, %cst_14 : f32 | |
%241 = arith.select %240, %239, %cst_14 : f32 | |
%242 = arith.divf %241, %cst_11 : f32 | |
%243 = math.round %242 : f32 | |
%244 = arith.addf %243, %cst_14 : f32 | |
%245 = arith.cmpf ult, %244, %cst_16 : f32 | |
%246 = arith.cmpf ugt, %244, %cst_15 : f32 | |
%247 = arith.select %245, %cst_16, %244 : f32 | |
%248 = arith.select %246, %cst_15, %247 : f32 | |
%249 = arith.fptosi %248 : f32 to i8 | |
%250 = arith.extsi %249 : i8 to i32 | |
%251 = arith.sitofp %250 : i32 to f32 | |
%252 = arith.mulf %251, %cst_11 : f32 | |
linalg.yield %252 : f32 | |
} -> tensor<256x1x65x65xf32> | |
flow.return %238 : tensor<256x1x65x65xf32> | |
} | |
%collapsed_220 = tensor.collapse_shape %217 [[0], [1, 2], [3]] : tensor<256x1x65x65xf32> into tensor<256x65x65xf32> | |
%218 = tensor.empty() : tensor<1x1x1x256xf32> | |
%219 = linalg.fill ins(%cst_14 : f32) outs(%218 : tensor<1x1x1x256xf32>) -> tensor<1x1x1x256xf32> | |
%220 = flow.dispatch.region -> (tensor<1x1x1x256xf32>) { | |
%237 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%214, %cst_106 : tensor<1x1x1x320xf32>, tensor<1x1x320x256xf32>) outs(%219 : tensor<1x1x1x256xf32>) -> tensor<1x1x1x256xf32> | |
flow.return %237 : tensor<1x1x1x256xf32> | |
} | |
%collapsed_221 = tensor.collapse_shape %220 [[0, 1, 2, 3]] : tensor<1x1x1x256xf32> into tensor<256xf32> | |
%expanded_222 = tensor.expand_shape %collapsed_221 [[0, 1, 2, 3]] output_shape [1, 256, 1, 1] : tensor<256xf32> into tensor<1x256x1x1xf32> | |
%221 = tensor.empty() : tensor<1x256x1x1xf32> | |
%222 = flow.dispatch.region -> (tensor<1x256x1x1xf32>) { | |
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_222, %cst_0 : tensor<1x256x1x1xf32>, tensor<1x256x1x1xf32>) outs(%221 : tensor<1x256x1x1xf32>) { | |
^bb0(%in: f32, %in_229: f32, %out: f32): | |
%238 = arith.addf %in, %in_229 : f32 | |
%239 = arith.cmpf ugt, %238, %cst_14 : f32 | |
%240 = arith.select %239, %238, %cst_14 : f32 | |
%241 = arith.divf %240, %cst_11 : f32 | |
%242 = math.round %241 : f32 | |
%243 = arith.addf %242, %cst_14 : f32 | |
%244 = arith.cmpf ult, %243, %cst_16 : f32 | |
%245 = arith.cmpf ugt, %243, %cst_15 : f32 | |
%246 = arith.select %244, %cst_16, %243 : f32 | |
%247 = arith.select %245, %cst_15, %246 : f32 | |
%248 = arith.fptosi %247 : f32 to i8 | |
%249 = arith.extsi %248 : i8 to i32 | |
%250 = arith.sitofp %249 : i32 to f32 | |
%251 = arith.mulf %250, %cst_11 : f32 | |
linalg.yield %251 : f32 | |
} -> tensor<1x256x1x1xf32> | |
flow.return %237 : tensor<1x256x1x1xf32> | |
} | |
%223 = flow.dispatch.region -> (tensor<256x65x65xf32>) { | |
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} outs(%215 : tensor<256x65x65xf32>) { | |
^bb0(%out: f32): | |
%238 = linalg.index 0 : index | |
%239 = linalg.index 1 : index | |
%240 = linalg.index 2 : index | |
%241 = arith.index_cast %239 : index to i64 | |
%242 = arith.sitofp %241 : i64 to f32 | |
%243 = arith.addf %242, %cst_12 : f32 | |
%244 = arith.divf %243, %cst_6 : f32 | |
%245 = arith.subf %244, %cst_12 : f32 | |
%246 = arith.maximumf %245, %cst_14 : f32 | |
%247 = arith.minimumf %246, %cst_4 : f32 | |
%248 = arith.minimumf %246, %cst_14 : f32 | |
%249 = math.floor %247 : f32 | |
%250 = arith.addf %247, %cst_13 : f32 | |
%251 = math.floor %250 : f32 | |
%252 = arith.fptosi %249 : f32 to i64 | |
%253 = arith.index_cast %252 : i64 to index | |
%254 = arith.fptosi %251 : f32 to i64 | |
%255 = arith.index_cast %254 : i64 to index | |
%256 = arith.index_cast %240 : index to i64 | |
%257 = arith.sitofp %256 : i64 to f32 | |
%258 = arith.addf %257, %cst_12 : f32 | |
%259 = arith.divf %258, %cst_6 : f32 | |
%260 = arith.subf %259, %cst_12 : f32 | |
%261 = arith.maximumf %260, %cst_14 : f32 | |
%262 = arith.minimumf %261, %cst_4 : f32 | |
%263 = arith.minimumf %261, %cst_14 : f32 | |
%264 = math.floor %262 : f32 | |
%265 = arith.addf %262, %cst_13 : f32 | |
%266 = math.floor %265 : f32 | |
%267 = arith.fptosi %264 : f32 to i64 | |
%268 = arith.index_cast %267 : i64 to index | |
%269 = arith.fptosi %266 : f32 to i64 | |
%270 = arith.index_cast %269 : i64 to index | |
%extracted = tensor.extract %222[%c0, %238, %253, %268] : tensor<1x256x1x1xf32> | |
%extracted_229 = tensor.extract %222[%c0, %238, %253, %270] : tensor<1x256x1x1xf32> | |
%extracted_230 = tensor.extract %222[%c0, %238, %255, %268] : tensor<1x256x1x1xf32> | |
%extracted_231 = tensor.extract %222[%c0, %238, %255, %270] : tensor<1x256x1x1xf32> | |
%271 = arith.subf %251, %248 : f32 | |
%272 = arith.subf %248, %249 : f32 | |
%273 = arith.subf %266, %263 : f32 | |
%274 = arith.subf %263, %264 : f32 | |
%275 = arith.mulf %273, %extracted : f32 | |
%276 = arith.mulf %274, %extracted_229 : f32 | |
%277 = arith.addf %275, %276 : f32 | |
%278 = arith.mulf %271, %277 : f32 | |
%279 = arith.mulf %273, %extracted_230 : f32 | |
%280 = arith.mulf %274, %extracted_231 : f32 | |
%281 = arith.addf %279, %280 : f32 | |
%282 = arith.mulf %272, %281 : f32 | |
%283 = arith.addf %278, %282 : f32 | |
%284 = arith.divf %283, %cst_11 : f32 | |
%285 = math.round %284 : f32 | |
%286 = arith.addf %285, %cst_14 : f32 | |
%287 = arith.cmpf ult, %286, %cst_16 : f32 | |
%288 = arith.cmpf ugt, %286, %cst_15 : f32 | |
%289 = arith.select %287, %cst_16, %286 : f32 | |
%290 = arith.select %288, %cst_15, %289 : f32 | |
%291 = arith.fptosi %290 : f32 to i8 | |
%292 = arith.extsi %291 : i8 to i32 | |
%293 = arith.sitofp %292 : i32 to f32 | |
%294 = arith.mulf %293, %cst_11 : f32 | |
linalg.yield %294 : f32 | |
} -> tensor<256x65x65xf32> | |
flow.return %237 : tensor<256x65x65xf32> | |
} | |
%224 = tensor.empty() : tensor<1x512x65x65xf32> | |
%inserted_slice_223 = tensor.insert_slice %223 into %224[0, 0, 0, 0] [1, 256, 65, 65] [1, 1, 1, 1] : tensor<256x65x65xf32> into tensor<1x512x65x65xf32> | |
%inserted_slice_224 = tensor.insert_slice %collapsed_220 into %inserted_slice_223[0, 256, 0, 0] [1, 256, 65, 65] [1, 1, 1, 1] : tensor<256x65x65xf32> into tensor<1x512x65x65xf32> | |
%collapsed_225 = tensor.collapse_shape %inserted_slice_224 [[0, 1], [2], [3]] : tensor<1x512x65x65xf32> into tensor<512x65x65xf32> | |
%expanded_226 = tensor.expand_shape %collapsed_225 [[0], [1, 2], [3]] output_shape [512, 1, 65, 65] : tensor<512x65x65xf32> into tensor<512x1x65x65xf32> | |
%225 = tensor.empty() : tensor<1x65x65x512xf32> | |
%226 = flow.dispatch.region -> (tensor<1x65x65x512xf32>) { | |
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_226 : tensor<512x1x65x65xf32>) outs(%225 : tensor<1x65x65x512xf32>) { | |
^bb0(%in: f32, %out: f32): | |
%238 = arith.divf %in, %cst_11 : f32 | |
%239 = math.round %238 : f32 | |
%240 = arith.addf %239, %cst_14 : f32 | |
%241 = arith.cmpf ult, %240, %cst_16 : f32 | |
%242 = arith.cmpf ugt, %240, %cst_15 : f32 | |
%243 = arith.select %241, %cst_16, %240 : f32 | |
%244 = arith.select %242, %cst_15, %243 : f32 | |
%245 = arith.fptosi %244 : f32 to i8 | |
%246 = arith.extsi %245 : i8 to i32 | |
%247 = arith.sitofp %246 : i32 to f32 | |
%248 = arith.mulf %247, %cst_11 : f32 | |
linalg.yield %248 : f32 | |
} -> tensor<1x65x65x512xf32> | |
flow.return %237 : tensor<1x65x65x512xf32> | |
} | |
%227 = flow.dispatch.region -> (tensor<1x65x65x256xf32>) { | |
%237 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%226, %cst_107 : tensor<1x65x65x512xf32>, tensor<1x1x512x256xf32>) outs(%212 : tensor<1x65x65x256xf32>) -> tensor<1x65x65x256xf32> | |
%238 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%237, %cst_71 : tensor<1x65x65x256xf32>, tensor<256xf32>) outs(%211 : tensor<1x65x65x256xf32>) { | |
^bb0(%in: f32, %in_229: f32, %out: f32): | |
%239 = arith.addf %in, %in_229 : f32 | |
%240 = arith.cmpf ugt, %239, %cst_14 : f32 | |
%241 = arith.select %240, %239, %cst_14 : f32 | |
%242 = arith.divf %241, %cst_10 : f32 | |
%243 = math.round %242 : f32 | |
%244 = arith.addf %243, %cst_14 : f32 | |
%245 = arith.cmpf ult, %244, %cst_16 : f32 | |
%246 = arith.cmpf ugt, %244, %cst_15 : f32 | |
%247 = arith.select %245, %cst_16, %244 : f32 | |
%248 = arith.select %246, %cst_15, %247 : f32 | |
%249 = arith.fptosi %248 : f32 to i8 | |
%250 = arith.extsi %249 : i8 to i32 | |
%251 = arith.sitofp %250 : i32 to f32 | |
%252 = arith.mulf %251, %cst_10 : f32 | |
linalg.yield %252 : f32 | |
} -> tensor<1x65x65x256xf32> | |
flow.return %238 : tensor<1x65x65x256xf32> | |
} | |
%228 = tensor.empty() : tensor<1x65x65x21xf32> | |
%229 = linalg.fill ins(%cst_14 : f32) outs(%228 : tensor<1x65x65x21xf32>) -> tensor<1x65x65x21xf32> | |
%230 = flow.dispatch.region -> (tensor<1x65x65x21xf32>) { | |
%237 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%227, %cst_108 : tensor<1x65x65x256xf32>, tensor<1x1x256x21xf32>) outs(%229 : tensor<1x65x65x21xf32>) -> tensor<1x65x65x21xf32> | |
flow.return %237 : tensor<1x65x65x21xf32> | |
} | |
%collapsed_227 = tensor.collapse_shape %230 [[0, 1], [2], [3]] : tensor<1x65x65x21xf32> into tensor<65x65x21xf32> | |
%expanded_228 = tensor.expand_shape %collapsed_227 [[0], [1], [2, 3]] output_shape [65, 65, 1, 21] : tensor<65x65x21xf32> into tensor<65x65x1x21xf32> | |
%231 = tensor.empty() : tensor<1x21x65x65xi8> | |
%232 = flow.dispatch.region -> (tensor<1x21x65x65xi8>) { | |
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>, affine_map<(d0, d1, d2, d3) -> (d0, d1)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_228, %cst : tensor<65x65x1x21xf32>, tensor<1x21xf32>) outs(%231 : tensor<1x21x65x65xi8>) { | |
^bb0(%in: f32, %in_229: f32, %out: i8): | |
%238 = arith.addf %in, %in_229 : f32 | |
%239 = arith.divf %238, %cst_9 : f32 | |
%240 = math.round %239 : f32 | |
%241 = arith.addf %240, %cst_14 : f32 | |
%242 = arith.cmpf ult, %241, %cst_16 : f32 | |
%243 = arith.cmpf ugt, %241, %cst_15 : f32 | |
%244 = arith.select %242, %cst_16, %241 : f32 | |
%245 = arith.select %243, %cst_15, %244 : f32 | |
%246 = arith.fptosi %245 : f32 to i8 | |
linalg.yield %246 : i8 | |
} -> tensor<1x21x65x65xi8> | |
flow.return %237 : tensor<1x21x65x65xi8> | |
} | |
%233 = tensor.empty() : tensor<1x513x513x21xf32> | |
%234 = flow.dispatch.region -> (tensor<1x513x513x21xf32>) { | |
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} outs(%233 : tensor<1x513x513x21xf32>) { | |
^bb0(%out: f32): | |
%238 = linalg.index 0 : index | |
%239 = linalg.index 1 : index | |
%240 = linalg.index 2 : index | |
%241 = linalg.index 3 : index | |
%242 = affine.apply affine_map<(d0, d1) -> (d0 + d1 * 513)>(%239, %238) | |
%243 = arith.index_cast %242 : index to i64 | |
%244 = arith.sitofp %243 : i64 to f32 | |
%245 = arith.addf %244, %cst_12 : f32 | |
%246 = arith.divf %245, %cst_5 : f32 | |
%247 = arith.subf %246, %cst_12 : f32 | |
%248 = arith.maximumf %247, %cst_14 : f32 | |
%249 = arith.minimumf %248, %cst_3 : f32 | |
%250 = math.floor %249 : f32 | |
%251 = arith.addf %249, %cst_13 : f32 | |
%252 = math.floor %251 : f32 | |
%253 = arith.fptosi %250 : f32 to i64 | |
%254 = arith.index_cast %253 : i64 to index | |
%255 = arith.fptosi %252 : f32 to i64 | |
%256 = arith.index_cast %255 : i64 to index | |
%257 = arith.index_cast %240 : index to i64 | |
%258 = arith.sitofp %257 : i64 to f32 | |
%259 = arith.addf %258, %cst_12 : f32 | |
%260 = arith.divf %259, %cst_5 : f32 | |
%261 = arith.subf %260, %cst_12 : f32 | |
%262 = arith.maximumf %261, %cst_14 : f32 | |
%263 = arith.minimumf %262, %cst_3 : f32 | |
%264 = math.floor %263 : f32 | |
%265 = arith.addf %263, %cst_13 : f32 | |
%266 = math.floor %265 : f32 | |
%267 = arith.fptosi %264 : f32 to i64 | |
%268 = arith.index_cast %267 : i64 to index | |
%269 = arith.fptosi %266 : f32 to i64 | |
%270 = arith.index_cast %269 : i64 to index | |
%extracted = tensor.extract %232[%c0, %241, %254, %268] : tensor<1x21x65x65xi8> | |
%271 = arith.extsi %extracted : i8 to i32 | |
%272 = arith.sitofp %271 : i32 to f32 | |
%273 = arith.mulf %272, %cst_9 : f32 | |
%extracted_229 = tensor.extract %232[%c0, %241, %254, %270] : tensor<1x21x65x65xi8> | |
%274 = arith.extsi %extracted_229 : i8 to i32 | |
%275 = arith.sitofp %274 : i32 to f32 | |
%276 = arith.mulf %275, %cst_9 : f32 | |
%extracted_230 = tensor.extract %232[%c0, %241, %256, %268] : tensor<1x21x65x65xi8> | |
%277 = arith.extsi %extracted_230 : i8 to i32 | |
%278 = arith.sitofp %277 : i32 to f32 | |
%279 = arith.mulf %278, %cst_9 : f32 | |
%extracted_231 = tensor.extract %232[%c0, %241, %256, %270] : tensor<1x21x65x65xi8> | |
%280 = arith.extsi %extracted_231 : i8 to i32 | |
%281 = arith.sitofp %280 : i32 to f32 | |
%282 = arith.mulf %281, %cst_9 : f32 | |
%283 = arith.subf %252, %249 : f32 | |
%284 = arith.subf %249, %250 : f32 | |
%285 = arith.subf %266, %263 : f32 | |
%286 = arith.subf %263, %264 : f32 | |
%287 = arith.mulf %285, %273 : f32 | |
%288 = arith.mulf %286, %276 : f32 | |
%289 = arith.addf %287, %288 : f32 | |
%290 = arith.mulf %283, %289 : f32 | |
%291 = arith.mulf %285, %279 : f32 | |
%292 = arith.mulf %286, %282 : f32 | |
%293 = arith.addf %291, %292 : f32 | |
%294 = arith.mulf %284, %293 : f32 | |
%295 = arith.addf %290, %294 : f32 | |
%296 = arith.divf %295, %cst_9 : f32 | |
%297 = math.round %296 : f32 | |
%298 = arith.addf %297, %cst_14 : f32 | |
%299 = arith.cmpf ult, %298, %cst_16 : f32 | |
%300 = arith.cmpf ugt, %298, %cst_15 : f32 | |
%301 = arith.select %299, %cst_16, %298 : f32 | |
%302 = arith.select %300, %cst_15, %301 : f32 | |
%303 = arith.fptosi %302 : f32 to i8 | |
%304 = arith.extsi %303 : i8 to i32 | |
%305 = arith.sitofp %304 : i32 to f32 | |
%306 = arith.mulf %305, %cst_9 : f32 | |
%307 = arith.divf %306, %cst_9 : f32 | |
%308 = math.round %307 : f32 | |
%309 = arith.addf %308, %cst_14 : f32 | |
%310 = arith.cmpf ult, %309, %cst_16 : f32 | |
%311 = arith.cmpf ugt, %309, %cst_15 : f32 | |
%312 = arith.select %310, %cst_16, %309 : f32 | |
%313 = arith.select %311, %cst_15, %312 : f32 | |
%314 = arith.fptosi %313 : f32 to i8 | |
%315 = arith.extsi %314 : i8 to i32 | |
%316 = arith.sitofp %315 : i32 to f32 | |
%317 = arith.mulf %316, %cst_9 : f32 | |
linalg.yield %317 : f32 | |
} -> tensor<1x513x513x21xf32> | |
flow.return %237 : tensor<1x513x513x21xf32> | |
} | |
%235 = hal.tensor.barrier join(%234 : tensor<1x513x513x21xf32>) => %arg2 : !hal.fence | |
%236 = hal.tensor.export %235 : tensor<1x513x513x21xf32> -> !hal.buffer_view | |
util.return %236 : !hal.buffer_view | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment