Skip to content

Instantly share code, notes, and snippets.

@yzhang93
Created July 10, 2024 20:59
Show Gist options
  • Save yzhang93/d0b09b559800f74314eb2d95c0aa2b7d to your computer and use it in GitHub Desktop.
Save yzhang93/d0b09b559800f74314eb2d95c0aa2b7d to your computer and use it in GitHub Desktop.
deeplabv3 after PadToIntrinsics
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_0(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_0(%input0: tensor<576xi8>) -> (%output0: tensor<576xf32>)"}} {
%cst = arith.constant 1.562500e-02 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<576xi8>
%1 = tensor.empty() : tensor<576xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<576xi8>) outs(%1 : tensor<576xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<576xf32>
%3 = hal.tensor.export %2 "output0" : tensor<576xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval(%input0: tensor<96xi8>) -> (%output0: tensor<96xf32>)"}} {
%cst = arith.constant 3.125000e-02 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<96xi8>
%1 = tensor.empty() : tensor<96xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<96xi8>) outs(%1 : tensor<96xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<96xf32>
%3 = hal.tensor.export %2 "output0" : tensor<96xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_5(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_5(%input0: tensor<256xi8>) -> (%output0: tensor<256xf32>)"}} {
%cst = arith.constant 9.765625E-4 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<256xi8>
%1 = tensor.empty() : tensor<256xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<256xi8>) outs(%1 : tensor<256xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<256xf32>
%3 = hal.tensor.export %2 "output0" : tensor<256xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_1(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_1(%input0: tensor<960xi8>) -> (%output0: tensor<960xf32>)"}} {
%cst = arith.constant 3.125000e-02 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<960xi8>
%1 = tensor.empty() : tensor<960xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<960xi8>) outs(%1 : tensor<960xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<960xf32>
%3 = hal.tensor.export %2 "output0" : tensor<960xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_9(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_9(%input0: tensor<16xi8>) -> (%output0: tensor<16xf32>)"}} {
%cst = arith.constant 2.500000e-01 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<16xi8>
%1 = tensor.empty() : tensor<16xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<16xi8>) outs(%1 : tensor<16xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<16xf32>
%3 = hal.tensor.export %2 "output0" : tensor<16xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_2(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_2(%input0: tensor<144xi8>) -> (%output0: tensor<144xf32>)"}} {
%cst = arith.constant 3.125000e-02 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<144xi8>
%1 = tensor.empty() : tensor<144xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<144xi8>) outs(%1 : tensor<144xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<144xf32>
%3 = hal.tensor.export %2 "output0" : tensor<144xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_10(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_10(%input0: tensor<96x3x3xi8>) -> (%output0: tensor<96x3x3xf32>)"}} {
%cst = arith.constant 3.125000e-02 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<96x3x3xi8>
%1 = tensor.empty() : tensor<96x3x3xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%0 : tensor<96x3x3xi8>) outs(%1 : tensor<96x3x3xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<96x3x3xf32>
%3 = hal.tensor.export %2 "output0" : tensor<96x3x3xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_6(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_6(%input0: tensor<32xi8>) -> (%output0: tensor<32xf32>)"}} {
%cst = arith.constant 3.125000e-02 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<32xi8>
%1 = tensor.empty() : tensor<32xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<32xi8>) outs(%1 : tensor<32xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<32xf32>
%3 = hal.tensor.export %2 "output0" : tensor<32xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_8(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_8(%input0: tensor<32x3x3xi8>) -> (%output0: tensor<32x3x3xf32>)"}} {
%cst = arith.constant 5.000000e-01 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<32x3x3xi8>
%1 = tensor.empty() : tensor<32x3x3xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%0 : tensor<32x3x3xi8>) outs(%1 : tensor<32x3x3xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<32x3x3xf32>
%3 = hal.tensor.export %2 "output0" : tensor<32x3x3xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_4(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_4(%input0: tensor<384xi8>) -> (%output0: tensor<384xf32>)"}} {
%cst = arith.constant 3.125000e-02 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<384xi8>
%1 = tensor.empty() : tensor<384xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<384xi8>) outs(%1 : tensor<384xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<384xf32>
%3 = hal.tensor.export %2 "output0" : tensor<384xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_3(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_3(%input0: tensor<192xi8>) -> (%output0: tensor<192xf32>)"}} {
%cst = arith.constant 3.125000e-02 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<192xi8>
%1 = tensor.empty() : tensor<192xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<192xi8>) outs(%1 : tensor<192xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<192xf32>
%3 = hal.tensor.export %2 "output0" : tensor<192xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_7(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_7(%input0: tensor<32x3x3x3xi8>) -> (%output0: tensor<3x3x3x32xf32>)"}} {
%cst = arith.constant 3.125000e-02 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<32x3x3x3xi8>
%1 = tensor.empty() : tensor<3x3x3x32xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d2, d0, d1)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<32x3x3x3xi8>) outs(%1 : tensor<3x3x3x32xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<3x3x3x32xf32>
%3 = hal.tensor.export %2 "output0" : tensor<3x3x3x32xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_12(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_12(%input0: tensor<384x3x3xi8>) -> (%output0: tensor<384x3x3xf32>)"}} {
%cst = arith.constant 6.250000e-02 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<384x3x3xi8>
%1 = tensor.empty() : tensor<384x3x3xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%0 : tensor<384x3x3xi8>) outs(%1 : tensor<384x3x3xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<384x3x3xf32>
%3 = hal.tensor.export %2 "output0" : tensor<384x3x3xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_13(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_13(%input0: tensor<384xi8>) -> (%output0: tensor<384xf32>)"}} {
%cst = arith.constant 3.125000e-02 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<384xi8>
%1 = tensor.empty() : tensor<384xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<384xi8>) outs(%1 : tensor<384xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<384xf32>
%3 = hal.tensor.export %2 "output0" : tensor<384xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_14(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_14(%input0: tensor<96xi8>) -> (%output0: tensor<96xf32>)"}} {
%cst = arith.constant 1.250000e-01 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<96xi8>
%1 = tensor.empty() : tensor<96xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<96xi8>) outs(%1 : tensor<96xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<96xf32>
%3 = hal.tensor.export %2 "output0" : tensor<96xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_15(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_15(%input0: tensor<576x3x3xi8>) -> (%output0: tensor<576x3x3xf32>)"}} {
%cst = arith.constant 6.250000e-02 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<576x3x3xi8>
%1 = tensor.empty() : tensor<576x3x3xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%0 : tensor<576x3x3xi8>) outs(%1 : tensor<576x3x3xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<576x3x3xf32>
%3 = hal.tensor.export %2 "output0" : tensor<576x3x3xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_16(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_16(%input0: tensor<96xi8>) -> (%output0: tensor<96xf32>)"}} {
%cst = arith.constant 6.250000e-02 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<96xi8>
%1 = tensor.empty() : tensor<96xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<96xi8>) outs(%1 : tensor<96xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<96xf32>
%3 = hal.tensor.export %2 "output0" : tensor<96xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_11(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_11(%input0: tensor<24xi8>) -> (%output0: tensor<24xf32>)"}} {
%cst = arith.constant 5.000000e-01 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<24xi8>
%1 = tensor.empty() : tensor<24xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<24xi8>) outs(%1 : tensor<24xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<24xf32>
%3 = hal.tensor.export %2 "output0" : tensor<24xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_20(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_20(%input0: tensor<576x3x3xi8>) -> (%output0: tensor<576x3x3xf32>)"}} {
%cst = arith.constant 3.125000e-02 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<576x3x3xi8>
%1 = tensor.empty() : tensor<576x3x3xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%0 : tensor<576x3x3xi8>) outs(%1 : tensor<576x3x3xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<576x3x3xf32>
%3 = hal.tensor.export %2 "output0" : tensor<576x3x3xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_24(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_24(%input0: tensor<160xi8>) -> (%output0: tensor<160xf32>)"}} {
%cst = arith.constant 6.250000e-02 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<160xi8>
%1 = tensor.empty() : tensor<160xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<160xi8>) outs(%1 : tensor<160xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<160xf32>
%3 = hal.tensor.export %2 "output0" : tensor<160xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_17(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_17(%input0: tensor<576x3x3xi8>) -> (%output0: tensor<576x3x3xf32>)"}} {
%cst = arith.constant 6.250000e-02 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<576x3x3xi8>
%1 = tensor.empty() : tensor<576x3x3xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%0 : tensor<576x3x3xi8>) outs(%1 : tensor<576x3x3xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<576x3x3xf32>
%3 = hal.tensor.export %2 "output0" : tensor<576x3x3xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_26(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_26(%input0: tensor<960xi8>) -> (%output0: tensor<960xf32>)"}} {
%cst = arith.constant 1.562500e-02 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<960xi8>
%1 = tensor.empty() : tensor<960xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<960xi8>) outs(%1 : tensor<960xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<960xf32>
%3 = hal.tensor.export %2 "output0" : tensor<960xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_25(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_25(%input0: tensor<960x3x3xi8>) -> (%output0: tensor<960x3x3xf32>)"}} {
%cst = arith.constant 6.250000e-02 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<960x3x3xi8>
%1 = tensor.empty() : tensor<960x3x3xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%0 : tensor<960x3x3xi8>) outs(%1 : tensor<960x3x3xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<960x3x3xf32>
%3 = hal.tensor.export %2 "output0" : tensor<960x3x3xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_27(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_27(%input0: tensor<160xi8>) -> (%output0: tensor<160xf32>)"}} {
%cst = arith.constant 1.250000e-01 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<160xi8>
%1 = tensor.empty() : tensor<160xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<160xi8>) outs(%1 : tensor<160xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<160xf32>
%3 = hal.tensor.export %2 "output0" : tensor<160xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_29(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_29(%input0: tensor<960xi8>) -> (%output0: tensor<960xf32>)"}} {
%cst = arith.constant 1.562500e-02 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<960xi8>
%1 = tensor.empty() : tensor<960xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<960xi8>) outs(%1 : tensor<960xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<960xf32>
%3 = hal.tensor.export %2 "output0" : tensor<960xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_23(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_23(%input0: tensor<960x3x3xi8>) -> (%output0: tensor<960x3x3xf32>)"}} {
%cst = arith.constant 6.250000e-02 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<960x3x3xi8>
%1 = tensor.empty() : tensor<960x3x3xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%0 : tensor<960x3x3xi8>) outs(%1 : tensor<960x3x3xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<960x3x3xf32>
%3 = hal.tensor.export %2 "output0" : tensor<960x3x3xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_18(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_18(%input0: tensor<576xi8>) -> (%output0: tensor<576xf32>)"}} {
%cst = arith.constant 3.125000e-02 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<576xi8>
%1 = tensor.empty() : tensor<576xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<576xi8>) outs(%1 : tensor<576xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<576xf32>
%3 = hal.tensor.export %2 "output0" : tensor<576xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_19(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_19(%input0: tensor<96xi8>) -> (%output0: tensor<96xf32>)"}} {
%cst = arith.constant 1.250000e-01 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<96xi8>
%1 = tensor.empty() : tensor<96xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<96xi8>) outs(%1 : tensor<96xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<96xf32>
%3 = hal.tensor.export %2 "output0" : tensor<96xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_22(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_22(%input0: tensor<160xi8>) -> (%output0: tensor<160xf32>)"}} {
%cst = arith.constant 1.250000e-01 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<160xi8>
%1 = tensor.empty() : tensor<160xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<160xi8>) outs(%1 : tensor<160xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<160xf32>
%3 = hal.tensor.export %2 "output0" : tensor<160xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_21(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_21(%input0: tensor<576xi8>) -> (%output0: tensor<576xf32>)"}} {
%cst = arith.constant 3.125000e-02 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<576xi8>
%1 = tensor.empty() : tensor<576xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<576xi8>) outs(%1 : tensor<576xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<576xf32>
%3 = hal.tensor.export %2 "output0" : tensor<576xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_30(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_30(%input0: tensor<320xi8>) -> (%output0: tensor<320xf32>)"}} {
%cst = arith.constant 1.250000e-01 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<320xi8>
%1 = tensor.empty() : tensor<320xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<320xi8>) outs(%1 : tensor<320xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<320xf32>
%3 = hal.tensor.export %2 "output0" : tensor<320xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_32(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_32(%input0: tensor<24xi8>) -> (%output0: tensor<24xf32>)"}} {
%cst = arith.constant 1.250000e-01 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<24xi8>
%1 = tensor.empty() : tensor<24xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<24xi8>) outs(%1 : tensor<24xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<24xf32>
%3 = hal.tensor.export %2 "output0" : tensor<24xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_28(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_28(%input0: tensor<960x3x3xi8>) -> (%output0: tensor<960x3x3xf32>)"}} {
%cst = arith.constant 1.250000e-01 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<960x3x3xi8>
%1 = tensor.empty() : tensor<960x3x3xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%0 : tensor<960x3x3xi8>) outs(%1 : tensor<960x3x3xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<960x3x3xf32>
%3 = hal.tensor.export %2 "output0" : tensor<960x3x3xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_35(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_35(%input0: tensor<32xi8>) -> (%output0: tensor<32xf32>)"}} {
%cst = arith.constant 2.500000e-01 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<32xi8>
%1 = tensor.empty() : tensor<32xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<32xi8>) outs(%1 : tensor<32xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<32xf32>
%3 = hal.tensor.export %2 "output0" : tensor<32xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_33(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_33(%input0: tensor<144x3x3xi8>) -> (%output0: tensor<144x3x3xf32>)"}} {
%cst = arith.constant 3.125000e-02 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<144x3x3xi8>
%1 = tensor.empty() : tensor<144x3x3xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%0 : tensor<144x3x3xi8>) outs(%1 : tensor<144x3x3xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<144x3x3xf32>
%3 = hal.tensor.export %2 "output0" : tensor<144x3x3xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_34(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_34(%input0: tensor<144xi8>) -> (%output0: tensor<144xf32>)"}} {
%cst = arith.constant 3.125000e-02 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<144xi8>
%1 = tensor.empty() : tensor<144xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<144xi8>) outs(%1 : tensor<144xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<144xf32>
%3 = hal.tensor.export %2 "output0" : tensor<144xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_37(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_37(%input0: tensor<32xi8>) -> (%output0: tensor<32xf32>)"}} {
%cst = arith.constant 1.250000e-01 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<32xi8>
%1 = tensor.empty() : tensor<32xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<32xi8>) outs(%1 : tensor<32xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<32xf32>
%3 = hal.tensor.export %2 "output0" : tensor<32xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_31(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_31(%input0: tensor<144x3x3xi8>) -> (%output0: tensor<144x3x3xf32>)"}} {
%cst = arith.constant 1.250000e-01 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<144x3x3xi8>
%1 = tensor.empty() : tensor<144x3x3xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%0 : tensor<144x3x3xi8>) outs(%1 : tensor<144x3x3xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<144x3x3xf32>
%3 = hal.tensor.export %2 "output0" : tensor<144x3x3xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_42(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_42(%input0: tensor<192xi8>) -> (%output0: tensor<192xf32>)"}} {
%cst = arith.constant 3.125000e-02 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<192xi8>
%1 = tensor.empty() : tensor<192xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<192xi8>) outs(%1 : tensor<192xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<192xf32>
%3 = hal.tensor.export %2 "output0" : tensor<192xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_40(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_40(%input0: tensor<32xi8>) -> (%output0: tensor<32xf32>)"}} {
%cst = arith.constant 1.250000e-01 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<32xi8>
%1 = tensor.empty() : tensor<32xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<32xi8>) outs(%1 : tensor<32xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<32xf32>
%3 = hal.tensor.export %2 "output0" : tensor<32xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_36(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_36(%input0: tensor<192x3x3xi8>) -> (%output0: tensor<192x3x3xf32>)"}} {
%cst = arith.constant 1.250000e-01 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<192x3x3xi8>
%1 = tensor.empty() : tensor<192x3x3xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%0 : tensor<192x3x3xi8>) outs(%1 : tensor<192x3x3xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<192x3x3xf32>
%3 = hal.tensor.export %2 "output0" : tensor<192x3x3xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_45(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_45(%input0: tensor<64xi8>) -> (%output0: tensor<64xf32>)"}} {
%cst = arith.constant 6.250000e-02 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<64xi8>
%1 = tensor.empty() : tensor<64xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<64xi8>) outs(%1 : tensor<64xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<64xf32>
%3 = hal.tensor.export %2 "output0" : tensor<64xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_47(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_47(%input0: tensor<384xi8>) -> (%output0: tensor<384xf32>)"}} {
%cst = arith.constant 1.562500e-02 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<384xi8>
%1 = tensor.empty() : tensor<384xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<384xi8>) outs(%1 : tensor<384xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<384xf32>
%3 = hal.tensor.export %2 "output0" : tensor<384xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_48(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_48(%input0: tensor<64xi8>) -> (%output0: tensor<64xf32>)"}} {
%cst = arith.constant 6.250000e-02 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<64xi8>
%1 = tensor.empty() : tensor<64xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<64xi8>) outs(%1 : tensor<64xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<64xf32>
%3 = hal.tensor.export %2 "output0" : tensor<64xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_43(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_43(%input0: tensor<64xi8>) -> (%output0: tensor<64xf32>)"}} {
%cst = arith.constant 2.500000e-01 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<64xi8>
%1 = tensor.empty() : tensor<64xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<64xi8>) outs(%1 : tensor<64xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<64xf32>
%3 = hal.tensor.export %2 "output0" : tensor<64xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_41(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_41(%input0: tensor<192x3x3xi8>) -> (%output0: tensor<192x3x3xf32>)"}} {
%cst = arith.constant 1.562500e-02 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<192x3x3xi8>
%1 = tensor.empty() : tensor<192x3x3xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%0 : tensor<192x3x3xi8>) outs(%1 : tensor<192x3x3xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<192x3x3xf32>
%3 = hal.tensor.export %2 "output0" : tensor<192x3x3xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_46(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_46(%input0: tensor<384x3x3xi8>) -> (%output0: tensor<384x3x3xf32>)"}} {
%cst = arith.constant 6.250000e-02 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<384x3x3xi8>
%1 = tensor.empty() : tensor<384x3x3xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%0 : tensor<384x3x3xi8>) outs(%1 : tensor<384x3x3xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<384x3x3xf32>
%3 = hal.tensor.export %2 "output0" : tensor<384x3x3xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_49(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_49(%input0: tensor<384x3x3xi8>) -> (%output0: tensor<384x3x3xf32>)"}} {
%cst = arith.constant 6.250000e-02 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<384x3x3xi8>
%1 = tensor.empty() : tensor<384x3x3xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%0 : tensor<384x3x3xi8>) outs(%1 : tensor<384x3x3xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<384x3x3xf32>
%3 = hal.tensor.export %2 "output0" : tensor<384x3x3xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_39(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_39(%input0: tensor<192xi8>) -> (%output0: tensor<192xf32>)"}} {
%cst = arith.constant 3.125000e-02 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<192xi8>
%1 = tensor.empty() : tensor<192xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<192xi8>) outs(%1 : tensor<192xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<192xf32>
%3 = hal.tensor.export %2 "output0" : tensor<192xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_44(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_44(%input0: tensor<384x3x3xi8>) -> (%output0: tensor<384x3x3xf32>)"}} {
%cst = arith.constant 1.250000e-01 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<384x3x3xi8>
%1 = tensor.empty() : tensor<384x3x3xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%0 : tensor<384x3x3xi8>) outs(%1 : tensor<384x3x3xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<384x3x3xf32>
%3 = hal.tensor.export %2 "output0" : tensor<384x3x3xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_38(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_38(%input0: tensor<192x3x3xi8>) -> (%output0: tensor<192x3x3xf32>)"}} {
%cst = arith.constant 6.250000e-02 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<192x3x3xi8>
%1 = tensor.empty() : tensor<192x3x3xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%0 : tensor<192x3x3xi8>) outs(%1 : tensor<192x3x3xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<192x3x3xf32>
%3 = hal.tensor.export %2 "output0" : tensor<192x3x3xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_50(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_50(%input0: tensor<384xi8>) -> (%output0: tensor<384xf32>)"}} {
%cst = arith.constant 1.562500e-02 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<384xi8>
%1 = tensor.empty() : tensor<384xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<384xi8>) outs(%1 : tensor<384xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<384xf32>
%3 = hal.tensor.export %2 "output0" : tensor<384xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_51(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_51(%input0: tensor<64xi8>) -> (%output0: tensor<64xf32>)"}} {
%cst = arith.constant 1.250000e-01 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<64xi8>
%1 = tensor.empty() : tensor<64xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<64xi8>) outs(%1 : tensor<64xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<64xf32>
%3 = hal.tensor.export %2 "output0" : tensor<64xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_55(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_55(%input0: tensor<16x1x1x32xi8>) -> (%output0: tensor<1x1x32x16xf32>)"}} {
%cst = arith.constant 6.250000e-02 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<16x1x1x32xi8>
%1 = tensor.empty() : tensor<1x1x32x16xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<16x1x1x32xi8>) outs(%1 : tensor<1x1x32x16xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<1x1x32x16xf32>
%3 = hal.tensor.export %2 "output0" : tensor<1x1x32x16xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_52(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_52(%input0: tensor<256xi8>) -> (%output0: tensor<256xf32>)"}} {
%cst = arith.constant 1.562500e-02 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<256xi8>
%1 = tensor.empty() : tensor<256xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<256xi8>) outs(%1 : tensor<256xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<256xf32>
%3 = hal.tensor.export %2 "output0" : tensor<256xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_54(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_54(%input0: tensor<21xi8>) -> (%output0: tensor<21xf32>)"}} {
%cst = arith.constant 1.562500e-02 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<21xi8>
%1 = tensor.empty() : tensor<21xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<21xi8>) outs(%1 : tensor<21xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<21xf32>
%3 = hal.tensor.export %2 "output0" : tensor<21xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_56(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_56(%input0: tensor<96x1x1x16xi8>) -> (%output0: tensor<1x1x16x96xf32>)"}} {
%cst = arith.constant 3.906250e-03 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<96x1x1x16xi8>
%1 = tensor.empty() : tensor<1x1x16x96xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<96x1x1x16xi8>) outs(%1 : tensor<1x1x16x96xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<1x1x16x96xf32>
%3 = hal.tensor.export %2 "output0" : tensor<1x1x16x96xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_61(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_61(%input0: tensor<32x1x1x144xi8>) -> (%output0: tensor<1x1x144x32xf32>)"}} {
%cst = arith.constant 1.562500e-02 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<32x1x1x144xi8>
%1 = tensor.empty() : tensor<1x1x144x32xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<32x1x1x144xi8>) outs(%1 : tensor<1x1x144x32xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<1x1x144x32xf32>
%3 = hal.tensor.export %2 "output0" : tensor<1x1x144x32xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_53(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_53(%input0: tensor<256xi8>) -> (%output0: tensor<256xf32>)"}} {
%cst = arith.constant 0.001953125 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<256xi8>
%1 = tensor.empty() : tensor<256xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<256xi8>) outs(%1 : tensor<256xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<256xf32>
%3 = hal.tensor.export %2 "output0" : tensor<256xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_64(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_64(%input0: tensor<192x1x1x32xi8>) -> (%output0: tensor<1x1x32x192xf32>)"}} {
%cst = arith.constant 9.765625E-4 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<192x1x1x32xi8>
%1 = tensor.empty() : tensor<1x1x32x192xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<192x1x1x32xi8>) outs(%1 : tensor<1x1x32x192xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<1x1x32x192xf32>
%3 = hal.tensor.export %2 "output0" : tensor<1x1x32x192xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_65(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_65(%input0: tensor<32x1x1x192xi8>) -> (%output0: tensor<1x1x192x32xf32>)"}} {
%cst = arith.constant 3.125000e-02 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<32x1x1x192xi8>
%1 = tensor.empty() : tensor<1x1x192x32xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<32x1x1x192xi8>) outs(%1 : tensor<1x1x192x32xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<1x1x192x32xf32>
%3 = hal.tensor.export %2 "output0" : tensor<1x1x192x32xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_66(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_66(%input0: tensor<192x1x1x32xi8>) -> (%output0: tensor<1x1x32x192xf32>)"}} {
%cst = arith.constant 0.001953125 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<192x1x1x32xi8>
%1 = tensor.empty() : tensor<1x1x32x192xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<192x1x1x32xi8>) outs(%1 : tensor<1x1x32x192xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<1x1x32x192xf32>
%3 = hal.tensor.export %2 "output0" : tensor<1x1x32x192xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_58(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_58(%input0: tensor<144x1x1x24xi8>) -> (%output0: tensor<1x1x24x144xf32>)"}} {
%cst = arith.constant 3.906250e-03 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<144x1x1x24xi8>
%1 = tensor.empty() : tensor<1x1x24x144xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<144x1x1x24xi8>) outs(%1 : tensor<1x1x24x144xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<1x1x24x144xf32>
%3 = hal.tensor.export %2 "output0" : tensor<1x1x24x144xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_67(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_67(%input0: tensor<64x1x1x192xi8>) -> (%output0: tensor<1x1x192x64xf32>)"}} {
%cst = arith.constant 1.562500e-02 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<64x1x1x192xi8>
%1 = tensor.empty() : tensor<1x1x192x64xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<64x1x1x192xi8>) outs(%1 : tensor<1x1x192x64xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<1x1x192x64xf32>
%3 = hal.tensor.export %2 "output0" : tensor<1x1x192x64xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_62(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_62(%input0: tensor<192x1x1x32xi8>) -> (%output0: tensor<1x1x32x192xf32>)"}} {
%cst = arith.constant 0.001953125 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<192x1x1x32xi8>
%1 = tensor.empty() : tensor<1x1x32x192xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<192x1x1x32xi8>) outs(%1 : tensor<1x1x32x192xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<1x1x32x192xf32>
%3 = hal.tensor.export %2 "output0" : tensor<1x1x32x192xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_63(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_63(%input0: tensor<32x1x1x192xi8>) -> (%output0: tensor<1x1x192x32xf32>)"}} {
%cst = arith.constant 1.562500e-02 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<32x1x1x192xi8>
%1 = tensor.empty() : tensor<1x1x192x32xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<32x1x1x192xi8>) outs(%1 : tensor<1x1x192x32xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<1x1x192x32xf32>
%3 = hal.tensor.export %2 "output0" : tensor<1x1x192x32xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_57(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_57(%input0: tensor<24x1x1x96xi8>) -> (%output0: tensor<1x1x96x24xf32>)"}} {
%cst = arith.constant 3.125000e-02 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<24x1x1x96xi8>
%1 = tensor.empty() : tensor<1x1x96x24xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<24x1x1x96xi8>) outs(%1 : tensor<1x1x96x24xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<1x1x96x24xf32>
%3 = hal.tensor.export %2 "output0" : tensor<1x1x96x24xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_60(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_60(%input0: tensor<144x1x1x24xi8>) -> (%output0: tensor<1x1x24x144xf32>)"}} {
%cst = arith.constant 3.906250e-03 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<144x1x1x24xi8>
%1 = tensor.empty() : tensor<1x1x24x144xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<144x1x1x24xi8>) outs(%1 : tensor<1x1x24x144xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<1x1x24x144xf32>
%3 = hal.tensor.export %2 "output0" : tensor<1x1x24x144xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_59(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_59(%input0: tensor<24x1x1x144xi8>) -> (%output0: tensor<1x1x144x24xf32>)"}} {
%cst = arith.constant 3.125000e-02 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<24x1x1x144xi8>
%1 = tensor.empty() : tensor<1x1x144x24xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<24x1x1x144xi8>) outs(%1 : tensor<1x1x144x24xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<1x1x144x24xf32>
%3 = hal.tensor.export %2 "output0" : tensor<1x1x144x24xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_71(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_71(%input0: tensor<64x1x1x384xi8>) -> (%output0: tensor<1x1x384x64xf32>)"}} {
%cst = arith.constant 7.812500e-03 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<64x1x1x384xi8>
%1 = tensor.empty() : tensor<1x1x384x64xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<64x1x1x384xi8>) outs(%1 : tensor<1x1x384x64xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<1x1x384x64xf32>
%3 = hal.tensor.export %2 "output0" : tensor<1x1x384x64xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_68(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_68(%input0: tensor<384x1x1x64xi8>) -> (%output0: tensor<1x1x64x384xf32>)"}} {
%cst = arith.constant 9.765625E-4 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<384x1x1x64xi8>
%1 = tensor.empty() : tensor<1x1x64x384xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<384x1x1x64xi8>) outs(%1 : tensor<1x1x64x384xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<1x1x64x384xf32>
%3 = hal.tensor.export %2 "output0" : tensor<1x1x64x384xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_70(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_70(%input0: tensor<384x1x1x64xi8>) -> (%output0: tensor<1x1x64x384xf32>)"}} {
%cst = arith.constant 9.765625E-4 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<384x1x1x64xi8>
%1 = tensor.empty() : tensor<1x1x64x384xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<384x1x1x64xi8>) outs(%1 : tensor<1x1x64x384xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<1x1x64x384xf32>
%3 = hal.tensor.export %2 "output0" : tensor<1x1x64x384xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_72(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_72(%input0: tensor<384x1x1x64xi8>) -> (%output0: tensor<1x1x64x384xf32>)"}} {
%cst = arith.constant 9.765625E-4 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<384x1x1x64xi8>
%1 = tensor.empty() : tensor<1x1x64x384xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<384x1x1x64xi8>) outs(%1 : tensor<1x1x64x384xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<1x1x64x384xf32>
%3 = hal.tensor.export %2 "output0" : tensor<1x1x64x384xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_73(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_73(%input0: tensor<64x1x1x384xi8>) -> (%output0: tensor<1x1x384x64xf32>)"}} {
%cst = arith.constant 7.812500e-03 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<64x1x1x384xi8>
%1 = tensor.empty() : tensor<1x1x384x64xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<64x1x1x384xi8>) outs(%1 : tensor<1x1x384x64xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<1x1x384x64xf32>
%3 = hal.tensor.export %2 "output0" : tensor<1x1x384x64xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_69(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_69(%input0: tensor<64x1x1x384xi8>) -> (%output0: tensor<1x1x384x64xf32>)"}} {
%cst = arith.constant 7.812500e-03 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<64x1x1x384xi8>
%1 = tensor.empty() : tensor<1x1x384x64xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<64x1x1x384xi8>) outs(%1 : tensor<1x1x384x64xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<1x1x384x64xf32>
%3 = hal.tensor.export %2 "output0" : tensor<1x1x384x64xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_76(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_76(%input0: tensor<576x1x1x96xi8>) -> (%output0: tensor<1x1x96x576xf32>)"}} {
%cst = arith.constant 0.001953125 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<576x1x1x96xi8>
%1 = tensor.empty() : tensor<1x1x96x576xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<576x1x1x96xi8>) outs(%1 : tensor<1x1x96x576xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<1x1x96x576xf32>
%3 = hal.tensor.export %2 "output0" : tensor<1x1x96x576xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_79(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_79(%input0: tensor<96x1x1x576xi8>) -> (%output0: tensor<1x1x576x96xf32>)"}} {
%cst = arith.constant 7.812500e-03 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<96x1x1x576xi8>
%1 = tensor.empty() : tensor<1x1x576x96xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<96x1x1x576xi8>) outs(%1 : tensor<1x1x576x96xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<1x1x576x96xf32>
%3 = hal.tensor.export %2 "output0" : tensor<1x1x576x96xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_82(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_82(%input0: tensor<960x1x1x160xi8>) -> (%output0: tensor<1x1x160x960xf32>)"}} {
%cst = arith.constant 0.001953125 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<960x1x1x160xi8>
%1 = tensor.empty() : tensor<1x1x160x960xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<960x1x1x160xi8>) outs(%1 : tensor<1x1x160x960xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<1x1x160x960xf32>
%3 = hal.tensor.export %2 "output0" : tensor<1x1x160x960xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_74(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_74(%input0: tensor<384x1x1x64xi8>) -> (%output0: tensor<1x1x64x384xf32>)"}} {
%cst = arith.constant 0.001953125 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<384x1x1x64xi8>
%1 = tensor.empty() : tensor<1x1x64x384xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<384x1x1x64xi8>) outs(%1 : tensor<1x1x64x384xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<1x1x64x384xf32>
%3 = hal.tensor.export %2 "output0" : tensor<1x1x64x384xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_75(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_75(%input0: tensor<96x1x1x384xi8>) -> (%output0: tensor<1x1x384x96xf32>)"}} {
%cst = arith.constant 7.812500e-03 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<96x1x1x384xi8>
%1 = tensor.empty() : tensor<1x1x384x96xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<96x1x1x384xi8>) outs(%1 : tensor<1x1x384x96xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<1x1x384x96xf32>
%3 = hal.tensor.export %2 "output0" : tensor<1x1x384x96xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_85(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_85(%input0: tensor<160x1x1x960xi8>) -> (%output0: tensor<1x1x960x160xf32>)"}} {
%cst = arith.constant 7.812500e-03 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<160x1x1x960xi8>
%1 = tensor.empty() : tensor<1x1x960x160xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<160x1x1x960xi8>) outs(%1 : tensor<1x1x960x160xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<1x1x960x160xf32>
%3 = hal.tensor.export %2 "output0" : tensor<1x1x960x160xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_78(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_78(%input0: tensor<576x1x1x96xi8>) -> (%output0: tensor<1x1x96x576xf32>)"}} {
%cst = arith.constant 9.765625E-4 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<576x1x1x96xi8>
%1 = tensor.empty() : tensor<1x1x96x576xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<576x1x1x96xi8>) outs(%1 : tensor<1x1x96x576xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<1x1x96x576xf32>
%3 = hal.tensor.export %2 "output0" : tensor<1x1x96x576xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_77(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_77(%input0: tensor<96x1x1x576xi8>) -> (%output0: tensor<1x1x576x96xf32>)"}} {
%cst = arith.constant 7.812500e-03 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<96x1x1x576xi8>
%1 = tensor.empty() : tensor<1x1x576x96xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<96x1x1x576xi8>) outs(%1 : tensor<1x1x576x96xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<1x1x576x96xf32>
%3 = hal.tensor.export %2 "output0" : tensor<1x1x576x96xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_87(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_87(%input0: tensor<320x1x1x960xi8>) -> (%output0: tensor<1x1x960x320xf32>)"}} {
%cst = arith.constant 3.906250e-03 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<320x1x1x960xi8>
%1 = tensor.empty() : tensor<1x1x960x320xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<320x1x1x960xi8>) outs(%1 : tensor<1x1x960x320xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<1x1x960x320xf32>
%3 = hal.tensor.export %2 "output0" : tensor<1x1x960x320xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_88(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_88(%input0: tensor<256x1x1x320xi8>) -> (%output0: tensor<1x1x320x256xf32>)"}} {
%cst = arith.constant 9.765625E-4 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<256x1x1x320xi8>
%1 = tensor.empty() : tensor<1x1x320x256xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<256x1x1x320xi8>) outs(%1 : tensor<1x1x320x256xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<1x1x320x256xf32>
%3 = hal.tensor.export %2 "output0" : tensor<1x1x320x256xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_83(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_83(%input0: tensor<160x1x1x960xi8>) -> (%output0: tensor<1x1x960x160xf32>)"}} {
%cst = arith.constant 7.812500e-03 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<160x1x1x960xi8>
%1 = tensor.empty() : tensor<1x1x960x160xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<160x1x1x960xi8>) outs(%1 : tensor<1x1x960x160xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<1x1x960x160xf32>
%3 = hal.tensor.export %2 "output0" : tensor<1x1x960x160xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_84(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_84(%input0: tensor<960x1x1x160xi8>) -> (%output0: tensor<1x1x160x960xf32>)"}} {
%cst = arith.constant 9.765625E-4 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<960x1x1x160xi8>
%1 = tensor.empty() : tensor<1x1x160x960xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<960x1x1x160xi8>) outs(%1 : tensor<1x1x160x960xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<1x1x160x960xf32>
%3 = hal.tensor.export %2 "output0" : tensor<1x1x160x960xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_81(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_81(%input0: tensor<160x1x1x576xi8>) -> (%output0: tensor<1x1x576x160xf32>)"}} {
%cst = arith.constant 3.906250e-03 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<160x1x1x576xi8>
%1 = tensor.empty() : tensor<1x1x576x160xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<160x1x1x576xi8>) outs(%1 : tensor<1x1x576x160xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<1x1x576x160xf32>
%3 = hal.tensor.export %2 "output0" : tensor<1x1x576x160xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_80(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_80(%input0: tensor<576x1x1x96xi8>) -> (%output0: tensor<1x1x96x576xf32>)"}} {
%cst = arith.constant 0.001953125 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<576x1x1x96xi8>
%1 = tensor.empty() : tensor<1x1x96x576xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<576x1x1x96xi8>) outs(%1 : tensor<1x1x96x576xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<1x1x96x576xf32>
%3 = hal.tensor.export %2 "output0" : tensor<1x1x96x576xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_86(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_86(%input0: tensor<960x1x1x160xi8>) -> (%output0: tensor<1x1x160x960xf32>)"}} {
%cst = arith.constant 0.001953125 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<960x1x1x160xi8>
%1 = tensor.empty() : tensor<1x1x160x960xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<960x1x1x160xi8>) outs(%1 : tensor<1x1x160x960xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<1x1x160x960xf32>
%3 = hal.tensor.export %2 "output0" : tensor<1x1x160x960xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_89(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_89(%input0: tensor<256x1x1x320xi8>) -> (%output0: tensor<1x1x320x256xf32>)"}} {
%cst = arith.constant 0.001953125 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<256x1x1x320xi8>
%1 = tensor.empty() : tensor<1x1x320x256xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<256x1x1x320xi8>) outs(%1 : tensor<1x1x320x256xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<1x1x320x256xf32>
%3 = hal.tensor.export %2 "output0" : tensor<1x1x320x256xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_90(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_90(%input0: tensor<256x1x1x512xi8>) -> (%output0: tensor<1x1x512x256xf32>)"}} {
%cst = arith.constant 0.001953125 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<256x1x1x512xi8>
%1 = tensor.empty() : tensor<1x1x512x256xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<256x1x1x512xi8>) outs(%1 : tensor<1x1x512x256xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<1x1x512x256xf32>
%3 = hal.tensor.export %2 "output0" : tensor<1x1x512x256xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_91(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_91(%input0: tensor<21x256xi8>) -> (%output0: tensor<256x24xf32>)"}} {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant 3.906250e-03 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<21x256xi8>
%1 = tensor.empty() : tensor<256x21xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d1, d0)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%0 : tensor<21x256xi8>) outs(%1 : tensor<256x21xf32>) {
^bb0(%in: i8, %out: f32):
%6 = arith.extsi %in : i8 to i32
%7 = arith.sitofp %6 : i32 to f32
%8 = arith.mulf %7, %cst_0 : f32
linalg.yield %8 : f32
} -> tensor<256x21xf32>
%3 = tensor.empty() : tensor<256x24xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<256x24xf32>) -> tensor<256x24xf32>
%inserted_slice = tensor.insert_slice %2 into %4[0, 0] [256, 21] [1, 1] : tensor<256x21xf32> into tensor<256x24xf32>
%5 = hal.tensor.export %inserted_slice "output0" : tensor<256x24xf32> -> !hal.buffer_view
util.return %5 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @tf2onnx(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} {
%c-1_i32 = arith.constant -1 : i32
%c0 = arith.constant 0 : index
%device_0 = hal.devices.get %c0 : !hal.device
%0 = util.null : !hal.fence
%fence = hal.fence.create device(%device_0 : !hal.device) flags("None") : !hal.fence
%1 = util.call @tf2onnx$async(%arg0, %0, %fence) : (!hal.buffer_view, !hal.fence, !hal.fence) -> !hal.buffer_view
%status = hal.fence.await until([%fence]) timeout_millis(%c-1_i32) : i32
util.return %1 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @tf2onnx$async(%arg0: !hal.buffer_view, %arg1: !hal.fence, %arg2: !hal.fence) -> !hal.buffer_view attributes {inlining_policy = #util.inline.never, iree.abi.model = "coarse-fences", iree.abi.stub} {
%cst = arith.constant dense_resource<__elided__> : tensor<1x21xf32>
%cst_0 = arith.constant dense_resource<__elided__> : tensor<1x256x1x1xf32>
%cst_1 = arith.constant 0.999259948 : f32
%cst_2 = arith.constant 6.000000e+00 : f32
%cst_3 = arith.constant dense_resource<__elided__> : tensor<1x1x256x24xf32>
%cst_4 = arith.constant 6.400000e+01 : f32
%cst_5 = arith.constant -9.53674316E-7 : f32
%cst_6 = arith.constant 7.89230776 : f32
%cst_7 = arith.constant 6.500000e+01 : f32
%cst_8 = arith.constant 7.812500e-03 : f32
%cst_9 = arith.constant 1.250000e-01 : f32
%cst_10 = arith.constant 2.500000e-01 : f32
%cst_11 = arith.constant 6.250000e-02 : f32
%cst_12 = arith.constant 3.125000e-02 : f32
%cst_13 = arith.constant 5.000000e-01 : f32
%cst_14 = arith.constant 1.000000e+00 : f32
%cst_15 = arith.constant 0.000000e+00 : f32
%cst_16 = arith.constant 1.270000e+02 : f32
%cst_17 = arith.constant -1.280000e+02 : f32
%cst_18 = arith.constant 4.225000e+03 : f32
%c0 = arith.constant 0 : index
%cst_19 = arith.constant dense_resource<__elided__> : tensor<96xf32>
%cst_20 = arith.constant dense_resource<__elided__> : tensor<576xf32>
%cst_21 = arith.constant dense_resource<__elided__> : tensor<960xf32>
%cst_22 = arith.constant dense_resource<__elided__> : tensor<144xf32>
%cst_23 = arith.constant dense_resource<__elided__> : tensor<192xf32>
%cst_24 = arith.constant dense_resource<__elided__> : tensor<384xf32>
%cst_25 = arith.constant dense_resource<__elided__> : tensor<256xf32>
%cst_26 = arith.constant dense_resource<__elided__> : tensor<32xf32>
%cst_27 = arith.constant dense_resource<__elided__> : tensor<3x3x3x32xf32>
%cst_28 = arith.constant dense_resource<__elided__> : tensor<32x3x3xf32>
%cst_29 = arith.constant dense_resource<__elided__> : tensor<16xf32>
%cst_30 = arith.constant dense_resource<__elided__> : tensor<96x3x3xf32>
%cst_31 = arith.constant dense_resource<__elided__> : tensor<24xf32>
%cst_32 = arith.constant dense_resource<__elided__> : tensor<384x3x3xf32>
%cst_33 = arith.constant dense_resource<__elided__> : tensor<384xf32>
%cst_34 = arith.constant dense_resource<__elided__> : tensor<96xf32>
%cst_35 = arith.constant dense_resource<__elided__> : tensor<576x3x3xf32>
%cst_36 = arith.constant dense_resource<__elided__> : tensor<96xf32>
%cst_37 = arith.constant dense_resource<__elided__> : tensor<576x3x3xf32>
%cst_38 = arith.constant dense_resource<__elided__> : tensor<576xf32>
%cst_39 = arith.constant dense_resource<__elided__> : tensor<96xf32>
%cst_40 = arith.constant dense_resource<__elided__> : tensor<576x3x3xf32>
%cst_41 = arith.constant dense_resource<__elided__> : tensor<576xf32>
%cst_42 = arith.constant dense_resource<__elided__> : tensor<160xf32>
%cst_43 = arith.constant dense_resource<__elided__> : tensor<960x3x3xf32>
%cst_44 = arith.constant dense_resource<__elided__> : tensor<160xf32>
%cst_45 = arith.constant dense_resource<__elided__> : tensor<960x3x3xf32>
%cst_46 = arith.constant dense_resource<__elided__> : tensor<960xf32>
%cst_47 = arith.constant dense_resource<__elided__> : tensor<160xf32>
%cst_48 = arith.constant dense_resource<__elided__> : tensor<960x3x3xf32>
%cst_49 = arith.constant dense_resource<__elided__> : tensor<960xf32>
%cst_50 = arith.constant dense_resource<__elided__> : tensor<320xf32>
%cst_51 = arith.constant dense_resource<__elided__> : tensor<144x3x3xf32>
%cst_52 = arith.constant dense_resource<__elided__> : tensor<24xf32>
%cst_53 = arith.constant dense_resource<__elided__> : tensor<144x3x3xf32>
%cst_54 = arith.constant dense_resource<__elided__> : tensor<144xf32>
%cst_55 = arith.constant dense_resource<__elided__> : tensor<32xf32>
%cst_56 = arith.constant dense_resource<__elided__> : tensor<192x3x3xf32>
%cst_57 = arith.constant dense_resource<__elided__> : tensor<32xf32>
%cst_58 = arith.constant dense_resource<__elided__> : tensor<192x3x3xf32>
%cst_59 = arith.constant dense_resource<__elided__> : tensor<192xf32>
%cst_60 = arith.constant dense_resource<__elided__> : tensor<32xf32>
%cst_61 = arith.constant dense_resource<__elided__> : tensor<192x3x3xf32>
%cst_62 = arith.constant dense_resource<__elided__> : tensor<192xf32>
%cst_63 = arith.constant dense_resource<__elided__> : tensor<64xf32>
%cst_64 = arith.constant dense_resource<__elided__> : tensor<384x3x3xf32>
%cst_65 = arith.constant dense_resource<__elided__> : tensor<64xf32>
%cst_66 = arith.constant dense_resource<__elided__> : tensor<384x3x3xf32>
%cst_67 = arith.constant dense_resource<__elided__> : tensor<384xf32>
%cst_68 = arith.constant dense_resource<__elided__> : tensor<64xf32>
%cst_69 = arith.constant dense_resource<__elided__> : tensor<384x3x3xf32>
%cst_70 = arith.constant dense_resource<__elided__> : tensor<384xf32>
%cst_71 = arith.constant dense_resource<__elided__> : tensor<64xf32>
%cst_72 = arith.constant dense_resource<__elided__> : tensor<256xf32>
%cst_73 = arith.constant dense_resource<__elided__> : tensor<1x1x32x16xf32>
%cst_74 = arith.constant dense_resource<__elided__> : tensor<1x1x16x96xf32>
%cst_75 = arith.constant dense_resource<__elided__> : tensor<1x1x96x24xf32>
%cst_76 = arith.constant dense_resource<__elided__> : tensor<1x1x24x144xf32>
%cst_77 = arith.constant dense_resource<__elided__> : tensor<1x1x144x24xf32>
%cst_78 = arith.constant dense_resource<__elided__> : tensor<1x1x24x144xf32>
%cst_79 = arith.constant dense_resource<__elided__> : tensor<1x1x144x32xf32>
%cst_80 = arith.constant dense_resource<__elided__> : tensor<1x1x32x192xf32>
%cst_81 = arith.constant dense_resource<__elided__> : tensor<1x1x192x32xf32>
%cst_82 = arith.constant dense_resource<__elided__> : tensor<1x1x32x192xf32>
%cst_83 = arith.constant dense_resource<__elided__> : tensor<1x1x192x32xf32>
%cst_84 = arith.constant dense_resource<__elided__> : tensor<1x1x32x192xf32>
%cst_85 = arith.constant dense_resource<__elided__> : tensor<1x1x192x64xf32>
%cst_86 = arith.constant dense_resource<__elided__> : tensor<1x1x64x384xf32>
%cst_87 = arith.constant dense_resource<__elided__> : tensor<1x1x384x64xf32>
%cst_88 = arith.constant dense_resource<__elided__> : tensor<1x1x64x384xf32>
%cst_89 = arith.constant dense_resource<__elided__> : tensor<1x1x384x64xf32>
%cst_90 = arith.constant dense_resource<__elided__> : tensor<1x1x64x384xf32>
%cst_91 = arith.constant dense_resource<__elided__> : tensor<1x1x384x64xf32>
%cst_92 = arith.constant dense_resource<__elided__> : tensor<1x1x64x384xf32>
%cst_93 = arith.constant dense_resource<__elided__> : tensor<1x1x384x96xf32>
%cst_94 = arith.constant dense_resource<__elided__> : tensor<1x1x96x576xf32>
%cst_95 = arith.constant dense_resource<__elided__> : tensor<1x1x576x96xf32>
%cst_96 = arith.constant dense_resource<__elided__> : tensor<1x1x96x576xf32>
%cst_97 = arith.constant dense_resource<__elided__> : tensor<1x1x576x96xf32>
%cst_98 = arith.constant dense_resource<__elided__> : tensor<1x1x96x576xf32>
%cst_99 = arith.constant dense_resource<__elided__> : tensor<1x1x576x160xf32>
%cst_100 = arith.constant dense_resource<__elided__> : tensor<1x1x160x960xf32>
%cst_101 = arith.constant dense_resource<__elided__> : tensor<1x1x960x160xf32>
%cst_102 = arith.constant dense_resource<__elided__> : tensor<1x1x160x960xf32>
%cst_103 = arith.constant dense_resource<__elided__> : tensor<1x1x960x160xf32>
%cst_104 = arith.constant dense_resource<__elided__> : tensor<1x1x160x960xf32>
%cst_105 = arith.constant dense_resource<__elided__> : tensor<1x1x960x320xf32>
%cst_106 = arith.constant dense_resource<__elided__> : tensor<1x1x320x256xf32>
%cst_107 = arith.constant dense_resource<__elided__> : tensor<1x1x320x256xf32>
%cst_108 = arith.constant dense_resource<__elided__> : tensor<1x1x512x256xf32>
%0 = hal.tensor.import wait(%arg1) => %arg0 : !hal.buffer_view -> tensor<1x513x513x3xf32>
%expanded = tensor.expand_shape %0 [[0], [1], [2], [3, 4]] output_shape [1, 513, 513, 1, 3] : tensor<1x513x513x3xf32> into tensor<1x513x513x1x3xf32>
%1 = tensor.empty() : tensor<1x513x513x1x3xf32>
%2 = flow.dispatch.region -> (tensor<1x513x513x1x3xf32>) {
%296 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2, d3, d4)>, affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2, d3, d4)>], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel"]} ins(%expanded : tensor<1x513x513x1x3xf32>) outs(%1 : tensor<1x513x513x1x3xf32>) {
^bb0(%in: f32, %out: f32):
%297 = arith.divf %in, %cst_8 : f32
%298 = math.round %297 : f32
%299 = arith.addf %298, %cst_15 : f32
%300 = arith.cmpf ult, %299, %cst_17 : f32
%301 = arith.cmpf ugt, %299, %cst_16 : f32
%302 = arith.select %300, %cst_17, %299 : f32
%303 = arith.select %301, %cst_16, %302 : f32
%304 = arith.fptosi %303 : f32 to i8
%305 = arith.extsi %304 : i8 to i32
%306 = arith.sitofp %305 : i32 to f32
%307 = arith.mulf %306, %cst_8 : f32
linalg.yield %307 : f32
} -> tensor<1x513x513x1x3xf32>
flow.return %296 : tensor<1x513x513x1x3xf32>
}
%collapsed = tensor.collapse_shape %2 [[0, 1], [2, 3], [4]] : tensor<1x513x513x1x3xf32> into tensor<513x513x3xf32>
%3 = tensor.empty() : tensor<515x515x3xf32>
%4 = linalg.fill ins(%cst_15 : f32) outs(%3 : tensor<515x515x3xf32>) -> tensor<515x515x3xf32>
%inserted_slice = tensor.insert_slice %collapsed into %4[1, 1, 0] [513, 513, 3] [1, 1, 1] : tensor<513x513x3xf32> into tensor<515x515x3xf32>
%expanded_109 = tensor.expand_shape %inserted_slice [[0, 1], [2], [3]] output_shape [1, 515, 515, 3] : tensor<515x515x3xf32> into tensor<1x515x515x3xf32>
%5 = tensor.empty() : tensor<1x257x257x32xf32>
%6 = linalg.fill ins(%cst_15 : f32) outs(%5 : tensor<1x257x257x32xf32>) -> tensor<1x257x257x32xf32>
%7 = flow.dispatch.region -> (tensor<1x257x257x32xf32>) {
%296 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%expanded_109, %cst_27 : tensor<1x515x515x3xf32>, tensor<3x3x3x32xf32>) outs(%6 : tensor<1x257x257x32xf32>) -> tensor<1x257x257x32xf32>
%297 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%296, %cst_26 : tensor<1x257x257x32xf32>, tensor<32xf32>) outs(%5 : tensor<1x257x257x32xf32>) {
^bb0(%in: f32, %in_256: f32, %out: f32):
%298 = arith.addf %in, %in_256 : f32
%299 = arith.cmpf ult, %298, %cst_15 : f32
%300 = arith.select %299, %cst_15, %298 : f32
%301 = arith.cmpf ugt, %300, %cst_2 : f32
%302 = arith.select %301, %cst_2, %300 : f32
%303 = arith.divf %302, %cst_11 : f32
%304 = math.round %303 : f32
%305 = arith.addf %304, %cst_15 : f32
%306 = arith.cmpf ult, %305, %cst_17 : f32
%307 = arith.cmpf ugt, %305, %cst_16 : f32
%308 = arith.select %306, %cst_17, %305 : f32
%309 = arith.select %307, %cst_16, %308 : f32
%310 = arith.fptosi %309 : f32 to i8
%311 = arith.extsi %310 : i8 to i32
%312 = arith.sitofp %311 : i32 to f32
%313 = arith.mulf %312, %cst_11 : f32
linalg.yield %313 : f32
} -> tensor<1x257x257x32xf32>
flow.return %297 : tensor<1x257x257x32xf32>
}
%collapsed_110 = tensor.collapse_shape %7 [[0, 1], [2], [3]] : tensor<1x257x257x32xf32> into tensor<257x257x32xf32>
%8 = tensor.empty() : tensor<259x259x32xf32>
%9 = linalg.fill ins(%cst_15 : f32) outs(%8 : tensor<259x259x32xf32>) -> tensor<259x259x32xf32>
%inserted_slice_111 = tensor.insert_slice %collapsed_110 into %9[1, 1, 0] [257, 257, 32] [1, 1, 1] : tensor<257x257x32xf32> into tensor<259x259x32xf32>
%expanded_112 = tensor.expand_shape %inserted_slice_111 [[0], [1], [2, 3]] output_shape [259, 259, 1, 32] : tensor<259x259x32xf32> into tensor<259x259x1x32xf32>
%10 = tensor.empty() : tensor<1x32x259x259xf32>
%11 = flow.dispatch.region -> (tensor<1x32x259x259xf32>) {
%296 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_112 : tensor<259x259x1x32xf32>) outs(%10 : tensor<1x32x259x259xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<1x32x259x259xf32>
flow.return %296 : tensor<1x32x259x259xf32>
}
%12 = tensor.empty() : tensor<1x32x257x257xf32>
%13 = linalg.fill ins(%cst_15 : f32) outs(%12 : tensor<1x32x257x257xf32>) -> tensor<1x32x257x257xf32>
%14 = flow.dispatch.region -> (tensor<1x32x257x257xf32>) {
%296 = linalg.depthwise_conv_2d_nchw_chw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%11, %cst_28 : tensor<1x32x259x259xf32>, tensor<32x3x3xf32>) outs(%13 : tensor<1x32x257x257xf32>) -> tensor<1x32x257x257xf32>
flow.return %296 : tensor<1x32x257x257xf32>
}
%15 = tensor.empty() : tensor<257x257x1x32xf32>
%16 = flow.dispatch.region -> (tensor<257x257x1x32xf32>) {
%296 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%14 : tensor<1x32x257x257xf32>) outs(%15 : tensor<257x257x1x32xf32>) {
^bb0(%in: f32, %out: f32):
%297 = arith.cmpf ult, %in, %cst_15 : f32
%298 = arith.select %297, %cst_15, %in : f32
%299 = arith.cmpf ugt, %298, %cst_2 : f32
%300 = arith.select %299, %cst_2, %298 : f32
%301 = arith.divf %300, %cst_11 : f32
%302 = math.round %301 : f32
%303 = arith.addf %302, %cst_15 : f32
%304 = arith.cmpf ult, %303, %cst_17 : f32
%305 = arith.cmpf ugt, %303, %cst_16 : f32
%306 = arith.select %304, %cst_17, %303 : f32
%307 = arith.select %305, %cst_16, %306 : f32
%308 = arith.fptosi %307 : f32 to i8
%309 = arith.extsi %308 : i8 to i32
%310 = arith.sitofp %309 : i32 to f32
%311 = arith.mulf %310, %cst_11 : f32
linalg.yield %311 : f32
} -> tensor<257x257x1x32xf32>
flow.return %296 : tensor<257x257x1x32xf32>
}
%collapsed_113 = tensor.collapse_shape %16 [[0], [1], [2, 3]] : tensor<257x257x1x32xf32> into tensor<257x257x32xf32>
%17 = tensor.empty() : tensor<257x257x16xf32>
%18 = tensor.empty() : tensor<260x260x32xf32>
%19 = linalg.fill ins(%cst_15 : f32) outs(%18 : tensor<260x260x32xf32>) -> tensor<260x260x32xf32>
%inserted_slice_114 = tensor.insert_slice %collapsed_113 into %19[0, 0, 0] [257, 257, 32] [1, 1, 1] : tensor<257x257x32xf32> into tensor<260x260x32xf32>
%expanded_115 = tensor.expand_shape %inserted_slice_114 [[0, 1], [2], [3]] output_shape [1, 260, 260, 32] : tensor<260x260x32xf32> into tensor<1x260x260x32xf32>
%20 = tensor.empty() : tensor<1x260x260x16xf32>
%21 = linalg.fill ins(%cst_15 : f32) outs(%20 : tensor<1x260x260x16xf32>) -> tensor<1x260x260x16xf32>
%22 = flow.dispatch.region -> (tensor<1x260x260x16xf32>) {
%296 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%expanded_115, %cst_73 : tensor<1x260x260x32xf32>, tensor<1x1x32x16xf32>) outs(%21 : tensor<1x260x260x16xf32>) -> tensor<1x260x260x16xf32>
flow.return %296 : tensor<1x260x260x16xf32>
}
%extracted_slice = tensor.extract_slice %22[0, 0, 0, 0] [1, 257, 257, 16] [1, 1, 1, 1] : tensor<1x260x260x16xf32> to tensor<257x257x16xf32>
%23 = tensor.empty() : tensor<257x257x96xf32>
%24 = tensor.empty() : tensor<260x260x16xf32>
%25 = linalg.fill ins(%cst_15 : f32) outs(%24 : tensor<260x260x16xf32>) -> tensor<260x260x16xf32>
%26 = flow.dispatch.region -> (tensor<260x260x16xf32>) {
%296 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice, %cst_29 : tensor<257x257x16xf32>, tensor<16xf32>) outs(%17 : tensor<257x257x16xf32>) {
^bb0(%in: f32, %in_257: f32, %out: f32):
%297 = arith.addf %in, %in_257 : f32
%298 = arith.divf %297, %cst_10 : f32
%299 = math.round %298 : f32
%300 = arith.addf %299, %cst_15 : f32
%301 = arith.cmpf ult, %300, %cst_17 : f32
%302 = arith.cmpf ugt, %300, %cst_16 : f32
%303 = arith.select %301, %cst_17, %300 : f32
%304 = arith.select %302, %cst_16, %303 : f32
%305 = arith.fptosi %304 : f32 to i8
%306 = arith.extsi %305 : i8 to i32
%307 = arith.sitofp %306 : i32 to f32
%308 = arith.mulf %307, %cst_10 : f32
linalg.yield %308 : f32
} -> tensor<257x257x16xf32>
%inserted_slice_256 = tensor.insert_slice %296 into %25[0, 0, 0] [257, 257, 16] [1, 1, 1] : tensor<257x257x16xf32> into tensor<260x260x16xf32>
flow.return %inserted_slice_256 : tensor<260x260x16xf32>
}
%expanded_116 = tensor.expand_shape %26 [[0, 1], [2], [3]] output_shape [1, 260, 260, 16] : tensor<260x260x16xf32> into tensor<1x260x260x16xf32>
%27 = tensor.empty() : tensor<1x260x260x96xf32>
%28 = linalg.fill ins(%cst_15 : f32) outs(%27 : tensor<1x260x260x96xf32>) -> tensor<1x260x260x96xf32>
%29 = flow.dispatch.region -> (tensor<1x260x260x96xf32>) {
%296 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%expanded_116, %cst_74 : tensor<1x260x260x16xf32>, tensor<1x1x16x96xf32>) outs(%28 : tensor<1x260x260x96xf32>) -> tensor<1x260x260x96xf32>
flow.return %296 : tensor<1x260x260x96xf32>
}
%extracted_slice_117 = tensor.extract_slice %29[0, 0, 0, 0] [1, 257, 257, 96] [1, 1, 1, 1] : tensor<1x260x260x96xf32> to tensor<257x257x96xf32>
%30 = tensor.empty() : tensor<259x259x96xf32>
%31 = linalg.fill ins(%cst_15 : f32) outs(%30 : tensor<259x259x96xf32>) -> tensor<259x259x96xf32>
%32 = flow.dispatch.region -> (tensor<259x259x96xf32>) {
%296 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_117, %cst_19 : tensor<257x257x96xf32>, tensor<96xf32>) outs(%23 : tensor<257x257x96xf32>) {
^bb0(%in: f32, %in_257: f32, %out: f32):
%297 = arith.addf %in, %in_257 : f32
%298 = arith.cmpf ult, %297, %cst_15 : f32
%299 = arith.select %298, %cst_15, %297 : f32
%300 = arith.cmpf ugt, %299, %cst_2 : f32
%301 = arith.select %300, %cst_2, %299 : f32
%302 = arith.divf %301, %cst_11 : f32
%303 = math.round %302 : f32
%304 = arith.addf %303, %cst_15 : f32
%305 = arith.cmpf ult, %304, %cst_17 : f32
%306 = arith.cmpf ugt, %304, %cst_16 : f32
%307 = arith.select %305, %cst_17, %304 : f32
%308 = arith.select %306, %cst_16, %307 : f32
%309 = arith.fptosi %308 : f32 to i8
%310 = arith.extsi %309 : i8 to i32
%311 = arith.sitofp %310 : i32 to f32
%312 = arith.mulf %311, %cst_11 : f32
linalg.yield %312 : f32
} -> tensor<257x257x96xf32>
%inserted_slice_256 = tensor.insert_slice %296 into %31[1, 1, 0] [257, 257, 96] [1, 1, 1] : tensor<257x257x96xf32> into tensor<259x259x96xf32>
flow.return %inserted_slice_256 : tensor<259x259x96xf32>
}
%expanded_118 = tensor.expand_shape %32 [[0], [1], [2, 3]] output_shape [259, 259, 1, 96] : tensor<259x259x96xf32> into tensor<259x259x1x96xf32>
%33 = tensor.empty() : tensor<1x96x259x259xf32>
%34 = flow.dispatch.region -> (tensor<1x96x259x259xf32>) {
%296 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_118 : tensor<259x259x1x96xf32>) outs(%33 : tensor<1x96x259x259xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<1x96x259x259xf32>
flow.return %296 : tensor<1x96x259x259xf32>
}
%35 = tensor.empty() : tensor<1x96x129x129xf32>
%36 = linalg.fill ins(%cst_15 : f32) outs(%35 : tensor<1x96x129x129xf32>) -> tensor<1x96x129x129xf32>
%37 = flow.dispatch.region -> (tensor<1x96x129x129xf32>) {
%296 = linalg.depthwise_conv_2d_nchw_chw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%34, %cst_30 : tensor<1x96x259x259xf32>, tensor<96x3x3xf32>) outs(%36 : tensor<1x96x129x129xf32>) -> tensor<1x96x129x129xf32>
flow.return %296 : tensor<1x96x129x129xf32>
}
%38 = tensor.empty() : tensor<129x129x1x96xf32>
%39 = flow.dispatch.region -> (tensor<129x129x1x96xf32>) {
%296 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%37 : tensor<1x96x129x129xf32>) outs(%38 : tensor<129x129x1x96xf32>) {
^bb0(%in: f32, %out: f32):
%297 = arith.cmpf ult, %in, %cst_15 : f32
%298 = arith.select %297, %cst_15, %in : f32
%299 = arith.cmpf ugt, %298, %cst_2 : f32
%300 = arith.select %299, %cst_2, %298 : f32
%301 = arith.divf %300, %cst_11 : f32
%302 = math.round %301 : f32
%303 = arith.addf %302, %cst_15 : f32
%304 = arith.cmpf ult, %303, %cst_17 : f32
%305 = arith.cmpf ugt, %303, %cst_16 : f32
%306 = arith.select %304, %cst_17, %303 : f32
%307 = arith.select %305, %cst_16, %306 : f32
%308 = arith.fptosi %307 : f32 to i8
%309 = arith.extsi %308 : i8 to i32
%310 = arith.sitofp %309 : i32 to f32
%311 = arith.mulf %310, %cst_11 : f32
linalg.yield %311 : f32
} -> tensor<129x129x1x96xf32>
flow.return %296 : tensor<129x129x1x96xf32>
}
%collapsed_119 = tensor.collapse_shape %39 [[0], [1], [2, 3]] : tensor<129x129x1x96xf32> into tensor<129x129x96xf32>
%40 = tensor.empty() : tensor<129x129x24xf32>
%41 = tensor.empty() : tensor<132x132x96xf32>
%42 = linalg.fill ins(%cst_15 : f32) outs(%41 : tensor<132x132x96xf32>) -> tensor<132x132x96xf32>
%inserted_slice_120 = tensor.insert_slice %collapsed_119 into %42[0, 0, 0] [129, 129, 96] [1, 1, 1] : tensor<129x129x96xf32> into tensor<132x132x96xf32>
%expanded_121 = tensor.expand_shape %inserted_slice_120 [[0, 1], [2], [3]] output_shape [1, 132, 132, 96] : tensor<132x132x96xf32> into tensor<1x132x132x96xf32>
%43 = tensor.empty() : tensor<1x132x132x24xf32>
%44 = linalg.fill ins(%cst_15 : f32) outs(%43 : tensor<1x132x132x24xf32>) -> tensor<1x132x132x24xf32>
%45 = flow.dispatch.region -> (tensor<1x132x132x24xf32>) {
%296 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%expanded_121, %cst_75 : tensor<1x132x132x96xf32>, tensor<1x1x96x24xf32>) outs(%44 : tensor<1x132x132x24xf32>) -> tensor<1x132x132x24xf32>
flow.return %296 : tensor<1x132x132x24xf32>
}
%extracted_slice_122 = tensor.extract_slice %45[0, 0, 0, 0] [1, 129, 129, 24] [1, 1, 1, 1] : tensor<1x132x132x24xf32> to tensor<129x129x24xf32>
%46 = tensor.empty() : tensor<129x129x24xi8>
%47 = flow.dispatch.region -> (tensor<129x129x24xi8>) {
%296 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_122, %cst_31 : tensor<129x129x24xf32>, tensor<24xf32>) outs(%46 : tensor<129x129x24xi8>) {
^bb0(%in: f32, %in_256: f32, %out: i8):
%297 = arith.addf %in, %in_256 : f32
%298 = arith.divf %297, %cst_10 : f32
%299 = math.round %298 : f32
%300 = arith.addf %299, %cst_15 : f32
%301 = arith.cmpf ult, %300, %cst_17 : f32
%302 = arith.cmpf ugt, %300, %cst_16 : f32
%303 = arith.select %301, %cst_17, %300 : f32
%304 = arith.select %302, %cst_16, %303 : f32
%305 = arith.fptosi %304 : f32 to i8
linalg.yield %305 : i8
} -> tensor<129x129x24xi8>
flow.return %296 : tensor<129x129x24xi8>
}
%48 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%47 : tensor<129x129x24xi8>) outs(%40 : tensor<129x129x24xf32>) {
^bb0(%in: i8, %out: f32):
%296 = arith.extsi %in : i8 to i32
%297 = arith.sitofp %296 : i32 to f32
%298 = arith.mulf %297, %cst_10 : f32
linalg.yield %298 : f32
} -> tensor<129x129x24xf32>
%49 = tensor.empty() : tensor<129x129x144xf32>
%50 = tensor.empty() : tensor<132x132x24xf32>
%51 = linalg.fill ins(%cst_15 : f32) outs(%50 : tensor<132x132x24xf32>) -> tensor<132x132x24xf32>
%inserted_slice_123 = tensor.insert_slice %48 into %51[0, 0, 0] [129, 129, 24] [1, 1, 1] : tensor<129x129x24xf32> into tensor<132x132x24xf32>
%expanded_124 = tensor.expand_shape %inserted_slice_123 [[0, 1], [2], [3]] output_shape [1, 132, 132, 24] : tensor<132x132x24xf32> into tensor<1x132x132x24xf32>
%52 = tensor.empty() : tensor<1x132x132x144xf32>
%53 = linalg.fill ins(%cst_15 : f32) outs(%52 : tensor<1x132x132x144xf32>) -> tensor<1x132x132x144xf32>
%54 = flow.dispatch.region -> (tensor<1x132x132x144xf32>) {
%296 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%expanded_124, %cst_76 : tensor<1x132x132x24xf32>, tensor<1x1x24x144xf32>) outs(%53 : tensor<1x132x132x144xf32>) -> tensor<1x132x132x144xf32>
flow.return %296 : tensor<1x132x132x144xf32>
}
%extracted_slice_125 = tensor.extract_slice %54[0, 0, 0, 0] [1, 129, 129, 144] [1, 1, 1, 1] : tensor<1x132x132x144xf32> to tensor<129x129x144xf32>
%55 = tensor.empty() : tensor<131x131x144xf32>
%56 = linalg.fill ins(%cst_15 : f32) outs(%55 : tensor<131x131x144xf32>) -> tensor<131x131x144xf32>
%57 = flow.dispatch.region -> (tensor<131x131x144xf32>) {
%296 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_125, %cst_22 : tensor<129x129x144xf32>, tensor<144xf32>) outs(%49 : tensor<129x129x144xf32>) {
^bb0(%in: f32, %in_257: f32, %out: f32):
%297 = arith.addf %in, %in_257 : f32
%298 = arith.cmpf ult, %297, %cst_15 : f32
%299 = arith.select %298, %cst_15, %297 : f32
%300 = arith.cmpf ugt, %299, %cst_2 : f32
%301 = arith.select %300, %cst_2, %299 : f32
%302 = arith.divf %301, %cst_11 : f32
%303 = math.round %302 : f32
%304 = arith.addf %303, %cst_15 : f32
%305 = arith.cmpf ult, %304, %cst_17 : f32
%306 = arith.cmpf ugt, %304, %cst_16 : f32
%307 = arith.select %305, %cst_17, %304 : f32
%308 = arith.select %306, %cst_16, %307 : f32
%309 = arith.fptosi %308 : f32 to i8
%310 = arith.extsi %309 : i8 to i32
%311 = arith.sitofp %310 : i32 to f32
%312 = arith.mulf %311, %cst_11 : f32
linalg.yield %312 : f32
} -> tensor<129x129x144xf32>
%inserted_slice_256 = tensor.insert_slice %296 into %56[1, 1, 0] [129, 129, 144] [1, 1, 1] : tensor<129x129x144xf32> into tensor<131x131x144xf32>
flow.return %inserted_slice_256 : tensor<131x131x144xf32>
}
%expanded_126 = tensor.expand_shape %57 [[0], [1], [2, 3]] output_shape [131, 131, 1, 144] : tensor<131x131x144xf32> into tensor<131x131x1x144xf32>
%58 = tensor.empty() : tensor<1x144x131x131xf32>
%59 = flow.dispatch.region -> (tensor<1x144x131x131xf32>) {
%296 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_126 : tensor<131x131x1x144xf32>) outs(%58 : tensor<1x144x131x131xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<1x144x131x131xf32>
flow.return %296 : tensor<1x144x131x131xf32>
}
%60 = tensor.empty() : tensor<1x144x129x129xf32>
%61 = linalg.fill ins(%cst_15 : f32) outs(%60 : tensor<1x144x129x129xf32>) -> tensor<1x144x129x129xf32>
%62 = flow.dispatch.region -> (tensor<1x144x129x129xf32>) {
%296 = linalg.depthwise_conv_2d_nchw_chw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%59, %cst_51 : tensor<1x144x131x131xf32>, tensor<144x3x3xf32>) outs(%61 : tensor<1x144x129x129xf32>) -> tensor<1x144x129x129xf32>
flow.return %296 : tensor<1x144x129x129xf32>
}
%63 = tensor.empty() : tensor<129x129x1x144xf32>
%64 = flow.dispatch.region -> (tensor<129x129x1x144xf32>) {
%296 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%62 : tensor<1x144x129x129xf32>) outs(%63 : tensor<129x129x1x144xf32>) {
^bb0(%in: f32, %out: f32):
%297 = arith.cmpf ult, %in, %cst_15 : f32
%298 = arith.select %297, %cst_15, %in : f32
%299 = arith.cmpf ugt, %298, %cst_2 : f32
%300 = arith.select %299, %cst_2, %298 : f32
%301 = arith.divf %300, %cst_11 : f32
%302 = math.round %301 : f32
%303 = arith.addf %302, %cst_15 : f32
%304 = arith.cmpf ult, %303, %cst_17 : f32
%305 = arith.cmpf ugt, %303, %cst_16 : f32
%306 = arith.select %304, %cst_17, %303 : f32
%307 = arith.select %305, %cst_16, %306 : f32
%308 = arith.fptosi %307 : f32 to i8
%309 = arith.extsi %308 : i8 to i32
%310 = arith.sitofp %309 : i32 to f32
%311 = arith.mulf %310, %cst_11 : f32
linalg.yield %311 : f32
} -> tensor<129x129x1x144xf32>
flow.return %296 : tensor<129x129x1x144xf32>
}
%collapsed_127 = tensor.collapse_shape %64 [[0], [1], [2, 3]] : tensor<129x129x1x144xf32> into tensor<129x129x144xf32>
%65 = tensor.empty() : tensor<132x132x144xf32>
%66 = linalg.fill ins(%cst_15 : f32) outs(%65 : tensor<132x132x144xf32>) -> tensor<132x132x144xf32>
%inserted_slice_128 = tensor.insert_slice %collapsed_127 into %66[0, 0, 0] [129, 129, 144] [1, 1, 1] : tensor<129x129x144xf32> into tensor<132x132x144xf32>
%expanded_129 = tensor.expand_shape %inserted_slice_128 [[0, 1], [2], [3]] output_shape [1, 132, 132, 144] : tensor<132x132x144xf32> into tensor<1x132x132x144xf32>
%67 = flow.dispatch.region -> (tensor<1x132x132x24xf32>) {
%296 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%expanded_129, %cst_77 : tensor<1x132x132x144xf32>, tensor<1x1x144x24xf32>) outs(%44 : tensor<1x132x132x24xf32>) -> tensor<1x132x132x24xf32>
flow.return %296 : tensor<1x132x132x24xf32>
}
%extracted_slice_130 = tensor.extract_slice %67[0, 0, 0, 0] [1, 129, 129, 24] [1, 1, 1, 1] : tensor<1x132x132x24xf32> to tensor<129x129x24xf32>
%68 = flow.dispatch.region -> (tensor<132x132x24xf32>) {
%296 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%48, %extracted_slice_130, %cst_52 : tensor<129x129x24xf32>, tensor<129x129x24xf32>, tensor<24xf32>) outs(%40 : tensor<129x129x24xf32>) {
^bb0(%in: f32, %in_257: f32, %in_258: f32, %out: f32):
%297 = arith.addf %in_257, %in_258 : f32
%298 = arith.divf %297, %cst_10 : f32
%299 = math.round %298 : f32
%300 = arith.addf %299, %cst_15 : f32
%301 = arith.cmpf ult, %300, %cst_17 : f32
%302 = arith.cmpf ugt, %300, %cst_16 : f32
%303 = arith.select %301, %cst_17, %300 : f32
%304 = arith.select %302, %cst_16, %303 : f32
%305 = arith.fptosi %304 : f32 to i8
%306 = arith.extsi %305 : i8 to i32
%307 = arith.sitofp %306 : i32 to f32
%308 = arith.mulf %307, %cst_10 : f32
%309 = arith.addf %in, %308 : f32
%310 = arith.divf %309, %cst_13 : f32
%311 = math.round %310 : f32
%312 = arith.addf %311, %cst_15 : f32
%313 = arith.cmpf ult, %312, %cst_17 : f32
%314 = arith.cmpf ugt, %312, %cst_16 : f32
%315 = arith.select %313, %cst_17, %312 : f32
%316 = arith.select %314, %cst_16, %315 : f32
%317 = arith.fptosi %316 : f32 to i8
%318 = arith.extsi %317 : i8 to i32
%319 = arith.sitofp %318 : i32 to f32
%320 = arith.mulf %319, %cst_13 : f32
linalg.yield %320 : f32
} -> tensor<129x129x24xf32>
%inserted_slice_256 = tensor.insert_slice %296 into %51[0, 0, 0] [129, 129, 24] [1, 1, 1] : tensor<129x129x24xf32> into tensor<132x132x24xf32>
flow.return %inserted_slice_256 : tensor<132x132x24xf32>
}
%expanded_131 = tensor.expand_shape %68 [[0, 1], [2], [3]] output_shape [1, 132, 132, 24] : tensor<132x132x24xf32> into tensor<1x132x132x24xf32>
%69 = flow.dispatch.region -> (tensor<1x132x132x144xf32>) {
%296 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%expanded_131, %cst_78 : tensor<1x132x132x24xf32>, tensor<1x1x24x144xf32>) outs(%53 : tensor<1x132x132x144xf32>) -> tensor<1x132x132x144xf32>
flow.return %296 : tensor<1x132x132x144xf32>
}
%extracted_slice_132 = tensor.extract_slice %69[0, 0, 0, 0] [1, 129, 129, 144] [1, 1, 1, 1] : tensor<1x132x132x144xf32> to tensor<129x129x144xf32>
%70 = flow.dispatch.region -> (tensor<131x131x144xf32>) {
%296 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_132, %cst_54 : tensor<129x129x144xf32>, tensor<144xf32>) outs(%49 : tensor<129x129x144xf32>) {
^bb0(%in: f32, %in_257: f32, %out: f32):
%297 = arith.addf %in, %in_257 : f32
%298 = arith.cmpf ult, %297, %cst_15 : f32
%299 = arith.select %298, %cst_15, %297 : f32
%300 = arith.cmpf ugt, %299, %cst_2 : f32
%301 = arith.select %300, %cst_2, %299 : f32
%302 = arith.divf %301, %cst_11 : f32
%303 = math.round %302 : f32
%304 = arith.addf %303, %cst_15 : f32
%305 = arith.cmpf ult, %304, %cst_17 : f32
%306 = arith.cmpf ugt, %304, %cst_16 : f32
%307 = arith.select %305, %cst_17, %304 : f32
%308 = arith.select %306, %cst_16, %307 : f32
%309 = arith.fptosi %308 : f32 to i8
%310 = arith.extsi %309 : i8 to i32
%311 = arith.sitofp %310 : i32 to f32
%312 = arith.mulf %311, %cst_11 : f32
linalg.yield %312 : f32
} -> tensor<129x129x144xf32>
%inserted_slice_256 = tensor.insert_slice %296 into %56[1, 1, 0] [129, 129, 144] [1, 1, 1] : tensor<129x129x144xf32> into tensor<131x131x144xf32>
flow.return %inserted_slice_256 : tensor<131x131x144xf32>
}
%expanded_133 = tensor.expand_shape %70 [[0], [1], [2, 3]] output_shape [131, 131, 1, 144] : tensor<131x131x144xf32> into tensor<131x131x1x144xf32>
%71 = flow.dispatch.region -> (tensor<1x144x131x131xf32>) {
%296 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_133 : tensor<131x131x1x144xf32>) outs(%58 : tensor<1x144x131x131xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<1x144x131x131xf32>
flow.return %296 : tensor<1x144x131x131xf32>
}
%72 = tensor.empty() : tensor<1x144x65x65xf32>
%73 = linalg.fill ins(%cst_15 : f32) outs(%72 : tensor<1x144x65x65xf32>) -> tensor<1x144x65x65xf32>
%74 = flow.dispatch.region -> (tensor<1x144x65x65xf32>) {
%296 = linalg.depthwise_conv_2d_nchw_chw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%71, %cst_53 : tensor<1x144x131x131xf32>, tensor<144x3x3xf32>) outs(%73 : tensor<1x144x65x65xf32>) -> tensor<1x144x65x65xf32>
flow.return %296 : tensor<1x144x65x65xf32>
}
%75 = tensor.empty() : tensor<65x65x1x144xf32>
%76 = flow.dispatch.region -> (tensor<65x65x1x144xf32>) {
%296 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%74 : tensor<1x144x65x65xf32>) outs(%75 : tensor<65x65x1x144xf32>) {
^bb0(%in: f32, %out: f32):
%297 = arith.cmpf ult, %in, %cst_15 : f32
%298 = arith.select %297, %cst_15, %in : f32
%299 = arith.cmpf ugt, %298, %cst_2 : f32
%300 = arith.select %299, %cst_2, %298 : f32
%301 = arith.divf %300, %cst_11 : f32
%302 = math.round %301 : f32
%303 = arith.addf %302, %cst_15 : f32
%304 = arith.cmpf ult, %303, %cst_17 : f32
%305 = arith.cmpf ugt, %303, %cst_16 : f32
%306 = arith.select %304, %cst_17, %303 : f32
%307 = arith.select %305, %cst_16, %306 : f32
%308 = arith.fptosi %307 : f32 to i8
%309 = arith.extsi %308 : i8 to i32
%310 = arith.sitofp %309 : i32 to f32
%311 = arith.mulf %310, %cst_11 : f32
linalg.yield %311 : f32
} -> tensor<65x65x1x144xf32>
flow.return %296 : tensor<65x65x1x144xf32>
}
%collapsed_134 = tensor.collapse_shape %76 [[0], [1], [2, 3]] : tensor<65x65x1x144xf32> into tensor<65x65x144xf32>
%77 = tensor.empty() : tensor<65x65x32xf32>
%78 = tensor.empty() : tensor<68x68x144xf32>
%79 = linalg.fill ins(%cst_15 : f32) outs(%78 : tensor<68x68x144xf32>) -> tensor<68x68x144xf32>
%inserted_slice_135 = tensor.insert_slice %collapsed_134 into %79[0, 0, 0] [65, 65, 144] [1, 1, 1] : tensor<65x65x144xf32> into tensor<68x68x144xf32>
%expanded_136 = tensor.expand_shape %inserted_slice_135 [[0, 1], [2], [3]] output_shape [1, 68, 68, 144] : tensor<68x68x144xf32> into tensor<1x68x68x144xf32>
%80 = tensor.empty() : tensor<1x68x68x32xf32>
%81 = linalg.fill ins(%cst_15 : f32) outs(%80 : tensor<1x68x68x32xf32>) -> tensor<1x68x68x32xf32>
%82 = flow.dispatch.region -> (tensor<1x68x68x32xf32>) {
%296 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%expanded_136, %cst_79 : tensor<1x68x68x144xf32>, tensor<1x1x144x32xf32>) outs(%81 : tensor<1x68x68x32xf32>) -> tensor<1x68x68x32xf32>
flow.return %296 : tensor<1x68x68x32xf32>
}
%extracted_slice_137 = tensor.extract_slice %82[0, 0, 0, 0] [1, 65, 65, 32] [1, 1, 1, 1] : tensor<1x68x68x32xf32> to tensor<65x65x32xf32>
%83 = tensor.empty() : tensor<65x65x32xi8>
%84 = flow.dispatch.region -> (tensor<65x65x32xi8>) {
%296 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_137, %cst_55 : tensor<65x65x32xf32>, tensor<32xf32>) outs(%83 : tensor<65x65x32xi8>) {
^bb0(%in: f32, %in_256: f32, %out: i8):
%297 = arith.addf %in, %in_256 : f32
%298 = arith.divf %297, %cst_10 : f32
%299 = math.round %298 : f32
%300 = arith.addf %299, %cst_15 : f32
%301 = arith.cmpf ult, %300, %cst_17 : f32
%302 = arith.cmpf ugt, %300, %cst_16 : f32
%303 = arith.select %301, %cst_17, %300 : f32
%304 = arith.select %302, %cst_16, %303 : f32
%305 = arith.fptosi %304 : f32 to i8
linalg.yield %305 : i8
} -> tensor<65x65x32xi8>
flow.return %296 : tensor<65x65x32xi8>
}
%85 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%84 : tensor<65x65x32xi8>) outs(%77 : tensor<65x65x32xf32>) {
^bb0(%in: i8, %out: f32):
%296 = arith.extsi %in : i8 to i32
%297 = arith.sitofp %296 : i32 to f32
%298 = arith.mulf %297, %cst_10 : f32
linalg.yield %298 : f32
} -> tensor<65x65x32xf32>
%86 = tensor.empty() : tensor<65x65x192xf32>
%87 = tensor.empty() : tensor<68x68x32xf32>
%88 = linalg.fill ins(%cst_15 : f32) outs(%87 : tensor<68x68x32xf32>) -> tensor<68x68x32xf32>
%inserted_slice_138 = tensor.insert_slice %85 into %88[0, 0, 0] [65, 65, 32] [1, 1, 1] : tensor<65x65x32xf32> into tensor<68x68x32xf32>
%expanded_139 = tensor.expand_shape %inserted_slice_138 [[0, 1], [2], [3]] output_shape [1, 68, 68, 32] : tensor<68x68x32xf32> into tensor<1x68x68x32xf32>
%89 = tensor.empty() : tensor<1x68x68x192xf32>
%90 = linalg.fill ins(%cst_15 : f32) outs(%89 : tensor<1x68x68x192xf32>) -> tensor<1x68x68x192xf32>
%91 = flow.dispatch.region -> (tensor<1x68x68x192xf32>) {
%296 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%expanded_139, %cst_80 : tensor<1x68x68x32xf32>, tensor<1x1x32x192xf32>) outs(%90 : tensor<1x68x68x192xf32>) -> tensor<1x68x68x192xf32>
flow.return %296 : tensor<1x68x68x192xf32>
}
%extracted_slice_140 = tensor.extract_slice %91[0, 0, 0, 0] [1, 65, 65, 192] [1, 1, 1, 1] : tensor<1x68x68x192xf32> to tensor<65x65x192xf32>
%92 = tensor.empty() : tensor<67x67x192xf32>
%93 = linalg.fill ins(%cst_15 : f32) outs(%92 : tensor<67x67x192xf32>) -> tensor<67x67x192xf32>
%94 = flow.dispatch.region -> (tensor<67x67x192xf32>) {
%296 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_140, %cst_23 : tensor<65x65x192xf32>, tensor<192xf32>) outs(%86 : tensor<65x65x192xf32>) {
^bb0(%in: f32, %in_257: f32, %out: f32):
%297 = arith.addf %in, %in_257 : f32
%298 = arith.cmpf ult, %297, %cst_15 : f32
%299 = arith.select %298, %cst_15, %297 : f32
%300 = arith.cmpf ugt, %299, %cst_2 : f32
%301 = arith.select %300, %cst_2, %299 : f32
%302 = arith.divf %301, %cst_11 : f32
%303 = math.round %302 : f32
%304 = arith.addf %303, %cst_15 : f32
%305 = arith.cmpf ult, %304, %cst_17 : f32
%306 = arith.cmpf ugt, %304, %cst_16 : f32
%307 = arith.select %305, %cst_17, %304 : f32
%308 = arith.select %306, %cst_16, %307 : f32
%309 = arith.fptosi %308 : f32 to i8
%310 = arith.extsi %309 : i8 to i32
%311 = arith.sitofp %310 : i32 to f32
%312 = arith.mulf %311, %cst_11 : f32
linalg.yield %312 : f32
} -> tensor<65x65x192xf32>
%inserted_slice_256 = tensor.insert_slice %296 into %93[1, 1, 0] [65, 65, 192] [1, 1, 1] : tensor<65x65x192xf32> into tensor<67x67x192xf32>
flow.return %inserted_slice_256 : tensor<67x67x192xf32>
}
%expanded_141 = tensor.expand_shape %94 [[0], [1], [2, 3]] output_shape [67, 67, 1, 192] : tensor<67x67x192xf32> into tensor<67x67x1x192xf32>
%95 = tensor.empty() : tensor<1x192x67x67xf32>
%96 = flow.dispatch.region -> (tensor<1x192x67x67xf32>) {
%296 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_141 : tensor<67x67x1x192xf32>) outs(%95 : tensor<1x192x67x67xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<1x192x67x67xf32>
flow.return %296 : tensor<1x192x67x67xf32>
}
%97 = tensor.empty() : tensor<1x192x65x65xf32>
%98 = linalg.fill ins(%cst_15 : f32) outs(%97 : tensor<1x192x65x65xf32>) -> tensor<1x192x65x65xf32>
%99 = flow.dispatch.region -> (tensor<1x192x65x65xf32>) {
%296 = linalg.depthwise_conv_2d_nchw_chw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%96, %cst_56 : tensor<1x192x67x67xf32>, tensor<192x3x3xf32>) outs(%98 : tensor<1x192x65x65xf32>) -> tensor<1x192x65x65xf32>
flow.return %296 : tensor<1x192x65x65xf32>
}
%100 = tensor.empty() : tensor<65x65x1x192xf32>
%101 = flow.dispatch.region -> (tensor<65x65x1x192xf32>) {
%296 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%99 : tensor<1x192x65x65xf32>) outs(%100 : tensor<65x65x1x192xf32>) {
^bb0(%in: f32, %out: f32):
%297 = arith.cmpf ult, %in, %cst_15 : f32
%298 = arith.select %297, %cst_15, %in : f32
%299 = arith.cmpf ugt, %298, %cst_2 : f32
%300 = arith.select %299, %cst_2, %298 : f32
%301 = arith.divf %300, %cst_11 : f32
%302 = math.round %301 : f32
%303 = arith.addf %302, %cst_15 : f32
%304 = arith.cmpf ult, %303, %cst_17 : f32
%305 = arith.cmpf ugt, %303, %cst_16 : f32
%306 = arith.select %304, %cst_17, %303 : f32
%307 = arith.select %305, %cst_16, %306 : f32
%308 = arith.fptosi %307 : f32 to i8
%309 = arith.extsi %308 : i8 to i32
%310 = arith.sitofp %309 : i32 to f32
%311 = arith.mulf %310, %cst_11 : f32
linalg.yield %311 : f32
} -> tensor<65x65x1x192xf32>
flow.return %296 : tensor<65x65x1x192xf32>
}
%collapsed_142 = tensor.collapse_shape %101 [[0], [1], [2, 3]] : tensor<65x65x1x192xf32> into tensor<65x65x192xf32>
%102 = tensor.empty() : tensor<68x68x192xf32>
%103 = linalg.fill ins(%cst_15 : f32) outs(%102 : tensor<68x68x192xf32>) -> tensor<68x68x192xf32>
%inserted_slice_143 = tensor.insert_slice %collapsed_142 into %103[0, 0, 0] [65, 65, 192] [1, 1, 1] : tensor<65x65x192xf32> into tensor<68x68x192xf32>
%expanded_144 = tensor.expand_shape %inserted_slice_143 [[0, 1], [2], [3]] output_shape [1, 68, 68, 192] : tensor<68x68x192xf32> into tensor<1x68x68x192xf32>
%104 = flow.dispatch.region -> (tensor<1x68x68x32xf32>) {
%296 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%expanded_144, %cst_81 : tensor<1x68x68x192xf32>, tensor<1x1x192x32xf32>) outs(%81 : tensor<1x68x68x32xf32>) -> tensor<1x68x68x32xf32>
flow.return %296 : tensor<1x68x68x32xf32>
}
%extracted_slice_145 = tensor.extract_slice %104[0, 0, 0, 0] [1, 65, 65, 32] [1, 1, 1, 1] : tensor<1x68x68x32xf32> to tensor<65x65x32xf32>
%105 = flow.dispatch.region -> (tensor<65x65x32xi8>) {
%296 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%85, %extracted_slice_145, %cst_57 : tensor<65x65x32xf32>, tensor<65x65x32xf32>, tensor<32xf32>) outs(%83 : tensor<65x65x32xi8>) {
^bb0(%in: f32, %in_256: f32, %in_257: f32, %out: i8):
%297 = arith.addf %in_256, %in_257 : f32
%298 = arith.divf %297, %cst_10 : f32
%299 = math.round %298 : f32
%300 = arith.addf %299, %cst_15 : f32
%301 = arith.cmpf ult, %300, %cst_17 : f32
%302 = arith.cmpf ugt, %300, %cst_16 : f32
%303 = arith.select %301, %cst_17, %300 : f32
%304 = arith.select %302, %cst_16, %303 : f32
%305 = arith.fptosi %304 : f32 to i8
%306 = arith.extsi %305 : i8 to i32
%307 = arith.sitofp %306 : i32 to f32
%308 = arith.mulf %307, %cst_10 : f32
%309 = arith.addf %in, %308 : f32
%310 = arith.divf %309, %cst_10 : f32
%311 = math.round %310 : f32
%312 = arith.addf %311, %cst_15 : f32
%313 = arith.cmpf ult, %312, %cst_17 : f32
%314 = arith.cmpf ugt, %312, %cst_16 : f32
%315 = arith.select %313, %cst_17, %312 : f32
%316 = arith.select %314, %cst_16, %315 : f32
%317 = arith.fptosi %316 : f32 to i8
linalg.yield %317 : i8
} -> tensor<65x65x32xi8>
flow.return %296 : tensor<65x65x32xi8>
}
%106 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%105 : tensor<65x65x32xi8>) outs(%77 : tensor<65x65x32xf32>) {
^bb0(%in: i8, %out: f32):
%296 = arith.extsi %in : i8 to i32
%297 = arith.sitofp %296 : i32 to f32
%298 = arith.mulf %297, %cst_10 : f32
linalg.yield %298 : f32
} -> tensor<65x65x32xf32>
%inserted_slice_146 = tensor.insert_slice %106 into %88[0, 0, 0] [65, 65, 32] [1, 1, 1] : tensor<65x65x32xf32> into tensor<68x68x32xf32>
%expanded_147 = tensor.expand_shape %inserted_slice_146 [[0, 1], [2], [3]] output_shape [1, 68, 68, 32] : tensor<68x68x32xf32> into tensor<1x68x68x32xf32>
%107 = flow.dispatch.region -> (tensor<1x68x68x192xf32>) {
%296 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%expanded_147, %cst_82 : tensor<1x68x68x32xf32>, tensor<1x1x32x192xf32>) outs(%90 : tensor<1x68x68x192xf32>) -> tensor<1x68x68x192xf32>
flow.return %296 : tensor<1x68x68x192xf32>
}
%extracted_slice_148 = tensor.extract_slice %107[0, 0, 0, 0] [1, 65, 65, 192] [1, 1, 1, 1] : tensor<1x68x68x192xf32> to tensor<65x65x192xf32>
%108 = flow.dispatch.region -> (tensor<67x67x192xf32>) {
%296 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_148, %cst_59 : tensor<65x65x192xf32>, tensor<192xf32>) outs(%86 : tensor<65x65x192xf32>) {
^bb0(%in: f32, %in_257: f32, %out: f32):
%297 = arith.addf %in, %in_257 : f32
%298 = arith.cmpf ult, %297, %cst_15 : f32
%299 = arith.select %298, %cst_15, %297 : f32
%300 = arith.cmpf ugt, %299, %cst_2 : f32
%301 = arith.select %300, %cst_2, %299 : f32
%302 = arith.divf %301, %cst_11 : f32
%303 = math.round %302 : f32
%304 = arith.addf %303, %cst_15 : f32
%305 = arith.cmpf ult, %304, %cst_17 : f32
%306 = arith.cmpf ugt, %304, %cst_16 : f32
%307 = arith.select %305, %cst_17, %304 : f32
%308 = arith.select %306, %cst_16, %307 : f32
%309 = arith.fptosi %308 : f32 to i8
%310 = arith.extsi %309 : i8 to i32
%311 = arith.sitofp %310 : i32 to f32
%312 = arith.mulf %311, %cst_11 : f32
linalg.yield %312 : f32
} -> tensor<65x65x192xf32>
%inserted_slice_256 = tensor.insert_slice %296 into %93[1, 1, 0] [65, 65, 192] [1, 1, 1] : tensor<65x65x192xf32> into tensor<67x67x192xf32>
flow.return %inserted_slice_256 : tensor<67x67x192xf32>
}
%expanded_149 = tensor.expand_shape %108 [[0], [1], [2, 3]] output_shape [67, 67, 1, 192] : tensor<67x67x192xf32> into tensor<67x67x1x192xf32>
%109 = flow.dispatch.region -> (tensor<1x192x67x67xf32>) {
%296 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_149 : tensor<67x67x1x192xf32>) outs(%95 : tensor<1x192x67x67xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<1x192x67x67xf32>
flow.return %296 : tensor<1x192x67x67xf32>
}
%110 = flow.dispatch.region -> (tensor<1x192x65x65xf32>) {
%296 = linalg.depthwise_conv_2d_nchw_chw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%109, %cst_58 : tensor<1x192x67x67xf32>, tensor<192x3x3xf32>) outs(%98 : tensor<1x192x65x65xf32>) -> tensor<1x192x65x65xf32>
flow.return %296 : tensor<1x192x65x65xf32>
}
%111 = flow.dispatch.region -> (tensor<65x65x1x192xf32>) {
%296 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%110 : tensor<1x192x65x65xf32>) outs(%100 : tensor<65x65x1x192xf32>) {
^bb0(%in: f32, %out: f32):
%297 = arith.cmpf ult, %in, %cst_15 : f32
%298 = arith.select %297, %cst_15, %in : f32
%299 = arith.cmpf ugt, %298, %cst_2 : f32
%300 = arith.select %299, %cst_2, %298 : f32
%301 = arith.divf %300, %cst_11 : f32
%302 = math.round %301 : f32
%303 = arith.addf %302, %cst_15 : f32
%304 = arith.cmpf ult, %303, %cst_17 : f32
%305 = arith.cmpf ugt, %303, %cst_16 : f32
%306 = arith.select %304, %cst_17, %303 : f32
%307 = arith.select %305, %cst_16, %306 : f32
%308 = arith.fptosi %307 : f32 to i8
%309 = arith.extsi %308 : i8 to i32
%310 = arith.sitofp %309 : i32 to f32
%311 = arith.mulf %310, %cst_11 : f32
linalg.yield %311 : f32
} -> tensor<65x65x1x192xf32>
flow.return %296 : tensor<65x65x1x192xf32>
}
%collapsed_150 = tensor.collapse_shape %111 [[0], [1], [2, 3]] : tensor<65x65x1x192xf32> into tensor<65x65x192xf32>
%inserted_slice_151 = tensor.insert_slice %collapsed_150 into %103[0, 0, 0] [65, 65, 192] [1, 1, 1] : tensor<65x65x192xf32> into tensor<68x68x192xf32>
%expanded_152 = tensor.expand_shape %inserted_slice_151 [[0, 1], [2], [3]] output_shape [1, 68, 68, 192] : tensor<68x68x192xf32> into tensor<1x68x68x192xf32>
%112 = flow.dispatch.region -> (tensor<1x68x68x32xf32>) {
%296 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%expanded_152, %cst_83 : tensor<1x68x68x192xf32>, tensor<1x1x192x32xf32>) outs(%81 : tensor<1x68x68x32xf32>) -> tensor<1x68x68x32xf32>
flow.return %296 : tensor<1x68x68x32xf32>
}
%extracted_slice_153 = tensor.extract_slice %112[0, 0, 0, 0] [1, 65, 65, 32] [1, 1, 1, 1] : tensor<1x68x68x32xf32> to tensor<65x65x32xf32>
%113 = flow.dispatch.region -> (tensor<68x68x32xf32>) {
%296 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%106, %extracted_slice_153, %cst_60 : tensor<65x65x32xf32>, tensor<65x65x32xf32>, tensor<32xf32>) outs(%77 : tensor<65x65x32xf32>) {
^bb0(%in: f32, %in_257: f32, %in_258: f32, %out: f32):
%297 = arith.addf %in_257, %in_258 : f32
%298 = arith.divf %297, %cst_10 : f32
%299 = math.round %298 : f32
%300 = arith.addf %299, %cst_15 : f32
%301 = arith.cmpf ult, %300, %cst_17 : f32
%302 = arith.cmpf ugt, %300, %cst_16 : f32
%303 = arith.select %301, %cst_17, %300 : f32
%304 = arith.select %302, %cst_16, %303 : f32
%305 = arith.fptosi %304 : f32 to i8
%306 = arith.extsi %305 : i8 to i32
%307 = arith.sitofp %306 : i32 to f32
%308 = arith.mulf %307, %cst_10 : f32
%309 = arith.addf %in, %308 : f32
%310 = arith.divf %309, %cst_10 : f32
%311 = math.round %310 : f32
%312 = arith.addf %311, %cst_15 : f32
%313 = arith.cmpf ult, %312, %cst_17 : f32
%314 = arith.cmpf ugt, %312, %cst_16 : f32
%315 = arith.select %313, %cst_17, %312 : f32
%316 = arith.select %314, %cst_16, %315 : f32
%317 = arith.fptosi %316 : f32 to i8
%318 = arith.extsi %317 : i8 to i32
%319 = arith.sitofp %318 : i32 to f32
%320 = arith.mulf %319, %cst_10 : f32
linalg.yield %320 : f32
} -> tensor<65x65x32xf32>
%inserted_slice_256 = tensor.insert_slice %296 into %88[0, 0, 0] [65, 65, 32] [1, 1, 1] : tensor<65x65x32xf32> into tensor<68x68x32xf32>
flow.return %inserted_slice_256 : tensor<68x68x32xf32>
}
%expanded_154 = tensor.expand_shape %113 [[0, 1], [2], [3]] output_shape [1, 68, 68, 32] : tensor<68x68x32xf32> into tensor<1x68x68x32xf32>
%114 = flow.dispatch.region -> (tensor<1x68x68x192xf32>) {
%296 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%expanded_154, %cst_84 : tensor<1x68x68x32xf32>, tensor<1x1x32x192xf32>) outs(%90 : tensor<1x68x68x192xf32>) -> tensor<1x68x68x192xf32>
flow.return %296 : tensor<1x68x68x192xf32>
}
%extracted_slice_155 = tensor.extract_slice %114[0, 0, 0, 0] [1, 65, 65, 192] [1, 1, 1, 1] : tensor<1x68x68x192xf32> to tensor<65x65x192xf32>
%115 = flow.dispatch.region -> (tensor<67x67x192xf32>) {
%296 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_155, %cst_62 : tensor<65x65x192xf32>, tensor<192xf32>) outs(%86 : tensor<65x65x192xf32>) {
^bb0(%in: f32, %in_257: f32, %out: f32):
%297 = arith.addf %in, %in_257 : f32
%298 = arith.cmpf ult, %297, %cst_15 : f32
%299 = arith.select %298, %cst_15, %297 : f32
%300 = arith.cmpf ugt, %299, %cst_2 : f32
%301 = arith.select %300, %cst_2, %299 : f32
%302 = arith.divf %301, %cst_11 : f32
%303 = math.round %302 : f32
%304 = arith.addf %303, %cst_15 : f32
%305 = arith.cmpf ult, %304, %cst_17 : f32
%306 = arith.cmpf ugt, %304, %cst_16 : f32
%307 = arith.select %305, %cst_17, %304 : f32
%308 = arith.select %306, %cst_16, %307 : f32
%309 = arith.fptosi %308 : f32 to i8
%310 = arith.extsi %309 : i8 to i32
%311 = arith.sitofp %310 : i32 to f32
%312 = arith.mulf %311, %cst_11 : f32
linalg.yield %312 : f32
} -> tensor<65x65x192xf32>
%inserted_slice_256 = tensor.insert_slice %296 into %93[1, 1, 0] [65, 65, 192] [1, 1, 1] : tensor<65x65x192xf32> into tensor<67x67x192xf32>
flow.return %inserted_slice_256 : tensor<67x67x192xf32>
}
%expanded_156 = tensor.expand_shape %115 [[0], [1], [2, 3]] output_shape [67, 67, 1, 192] : tensor<67x67x192xf32> into tensor<67x67x1x192xf32>
%116 = flow.dispatch.region -> (tensor<1x192x67x67xf32>) {
%296 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_156 : tensor<67x67x1x192xf32>) outs(%95 : tensor<1x192x67x67xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<1x192x67x67xf32>
flow.return %296 : tensor<1x192x67x67xf32>
}
%117 = flow.dispatch.region -> (tensor<1x192x65x65xf32>) {
%296 = linalg.depthwise_conv_2d_nchw_chw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%116, %cst_61 : tensor<1x192x67x67xf32>, tensor<192x3x3xf32>) outs(%98 : tensor<1x192x65x65xf32>) -> tensor<1x192x65x65xf32>
flow.return %296 : tensor<1x192x65x65xf32>
}
%118 = flow.dispatch.region -> (tensor<65x65x1x192xf32>) {
%296 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%117 : tensor<1x192x65x65xf32>) outs(%100 : tensor<65x65x1x192xf32>) {
^bb0(%in: f32, %out: f32):
%297 = arith.cmpf ult, %in, %cst_15 : f32
%298 = arith.select %297, %cst_15, %in : f32
%299 = arith.cmpf ugt, %298, %cst_2 : f32
%300 = arith.select %299, %cst_2, %298 : f32
%301 = arith.divf %300, %cst_11 : f32
%302 = math.round %301 : f32
%303 = arith.addf %302, %cst_15 : f32
%304 = arith.cmpf ult, %303, %cst_17 : f32
%305 = arith.cmpf ugt, %303, %cst_16 : f32
%306 = arith.select %304, %cst_17, %303 : f32
%307 = arith.select %305, %cst_16, %306 : f32
%308 = arith.fptosi %307 : f32 to i8
%309 = arith.extsi %308 : i8 to i32
%310 = arith.sitofp %309 : i32 to f32
%311 = arith.mulf %310, %cst_11 : f32
linalg.yield %311 : f32
} -> tensor<65x65x1x192xf32>
flow.return %296 : tensor<65x65x1x192xf32>
}
%collapsed_157 = tensor.collapse_shape %118 [[0], [1], [2, 3]] : tensor<65x65x1x192xf32> into tensor<65x65x192xf32>
%119 = tensor.empty() : tensor<65x65x64xf32>
%inserted_slice_158 = tensor.insert_slice %collapsed_157 into %103[0, 0, 0] [65, 65, 192] [1, 1, 1] : tensor<65x65x192xf32> into tensor<68x68x192xf32>
%expanded_159 = tensor.expand_shape %inserted_slice_158 [[0, 1], [2], [3]] output_shape [1, 68, 68, 192] : tensor<68x68x192xf32> into tensor<1x68x68x192xf32>
%120 = tensor.empty() : tensor<1x68x68x64xf32>
%121 = linalg.fill ins(%cst_15 : f32) outs(%120 : tensor<1x68x68x64xf32>) -> tensor<1x68x68x64xf32>
%122 = flow.dispatch.region -> (tensor<1x68x68x64xf32>) {
%296 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%expanded_159, %cst_85 : tensor<1x68x68x192xf32>, tensor<1x1x192x64xf32>) outs(%121 : tensor<1x68x68x64xf32>) -> tensor<1x68x68x64xf32>
flow.return %296 : tensor<1x68x68x64xf32>
}
%extracted_slice_160 = tensor.extract_slice %122[0, 0, 0, 0] [1, 65, 65, 64] [1, 1, 1, 1] : tensor<1x68x68x64xf32> to tensor<65x65x64xf32>
%123 = tensor.empty() : tensor<65x65x64xi8>
%124 = flow.dispatch.region -> (tensor<65x65x64xi8>) {
%296 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_160, %cst_63 : tensor<65x65x64xf32>, tensor<64xf32>) outs(%123 : tensor<65x65x64xi8>) {
^bb0(%in: f32, %in_256: f32, %out: i8):
%297 = arith.addf %in, %in_256 : f32
%298 = arith.divf %297, %cst_9 : f32
%299 = math.round %298 : f32
%300 = arith.addf %299, %cst_15 : f32
%301 = arith.cmpf ult, %300, %cst_17 : f32
%302 = arith.cmpf ugt, %300, %cst_16 : f32
%303 = arith.select %301, %cst_17, %300 : f32
%304 = arith.select %302, %cst_16, %303 : f32
%305 = arith.fptosi %304 : f32 to i8
linalg.yield %305 : i8
} -> tensor<65x65x64xi8>
flow.return %296 : tensor<65x65x64xi8>
}
%125 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%124 : tensor<65x65x64xi8>) outs(%119 : tensor<65x65x64xf32>) {
^bb0(%in: i8, %out: f32):
%296 = arith.extsi %in : i8 to i32
%297 = arith.sitofp %296 : i32 to f32
%298 = arith.mulf %297, %cst_9 : f32
linalg.yield %298 : f32
} -> tensor<65x65x64xf32>
%126 = tensor.empty() : tensor<65x65x384xf32>
%127 = tensor.empty() : tensor<68x68x64xf32>
%128 = linalg.fill ins(%cst_15 : f32) outs(%127 : tensor<68x68x64xf32>) -> tensor<68x68x64xf32>
%inserted_slice_161 = tensor.insert_slice %125 into %128[0, 0, 0] [65, 65, 64] [1, 1, 1] : tensor<65x65x64xf32> into tensor<68x68x64xf32>
%expanded_162 = tensor.expand_shape %inserted_slice_161 [[0, 1], [2], [3]] output_shape [1, 68, 68, 64] : tensor<68x68x64xf32> into tensor<1x68x68x64xf32>
%129 = tensor.empty() : tensor<1x68x68x384xf32>
%130 = linalg.fill ins(%cst_15 : f32) outs(%129 : tensor<1x68x68x384xf32>) -> tensor<1x68x68x384xf32>
%131 = flow.dispatch.region -> (tensor<1x68x68x384xf32>) {
%296 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%expanded_162, %cst_86 : tensor<1x68x68x64xf32>, tensor<1x1x64x384xf32>) outs(%130 : tensor<1x68x68x384xf32>) -> tensor<1x68x68x384xf32>
flow.return %296 : tensor<1x68x68x384xf32>
}
%extracted_slice_163 = tensor.extract_slice %131[0, 0, 0, 0] [1, 65, 65, 384] [1, 1, 1, 1] : tensor<1x68x68x384xf32> to tensor<65x65x384xf32>
%132 = tensor.empty() : tensor<69x69x384xf32>
%133 = linalg.fill ins(%cst_15 : f32) outs(%132 : tensor<69x69x384xf32>) -> tensor<69x69x384xf32>
%134 = flow.dispatch.region -> (tensor<69x69x384xf32>) {
%296 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_163, %cst_24 : tensor<65x65x384xf32>, tensor<384xf32>) outs(%126 : tensor<65x65x384xf32>) {
^bb0(%in: f32, %in_257: f32, %out: f32):
%297 = arith.addf %in, %in_257 : f32
%298 = arith.cmpf ult, %297, %cst_15 : f32
%299 = arith.select %298, %cst_15, %297 : f32
%300 = arith.cmpf ugt, %299, %cst_2 : f32
%301 = arith.select %300, %cst_2, %299 : f32
%302 = arith.divf %301, %cst_11 : f32
%303 = math.round %302 : f32
%304 = arith.addf %303, %cst_15 : f32
%305 = arith.cmpf ult, %304, %cst_17 : f32
%306 = arith.cmpf ugt, %304, %cst_16 : f32
%307 = arith.select %305, %cst_17, %304 : f32
%308 = arith.select %306, %cst_16, %307 : f32
%309 = arith.fptosi %308 : f32 to i8
%310 = arith.extsi %309 : i8 to i32
%311 = arith.sitofp %310 : i32 to f32
%312 = arith.mulf %311, %cst_11 : f32
linalg.yield %312 : f32
} -> tensor<65x65x384xf32>
%inserted_slice_256 = tensor.insert_slice %296 into %133[2, 2, 0] [65, 65, 384] [1, 1, 1] : tensor<65x65x384xf32> into tensor<69x69x384xf32>
flow.return %inserted_slice_256 : tensor<69x69x384xf32>
}
%expanded_164 = tensor.expand_shape %134 [[0], [1], [2, 3]] output_shape [69, 69, 1, 384] : tensor<69x69x384xf32> into tensor<69x69x1x384xf32>
%135 = tensor.empty() : tensor<1x384x69x69xf32>
%136 = flow.dispatch.region -> (tensor<1x384x69x69xf32>) {
%296 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_164 : tensor<69x69x1x384xf32>) outs(%135 : tensor<1x384x69x69xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<1x384x69x69xf32>
flow.return %296 : tensor<1x384x69x69xf32>
}
%137 = tensor.empty() : tensor<1x384x65x65xf32>
%138 = linalg.fill ins(%cst_15 : f32) outs(%137 : tensor<1x384x65x65xf32>) -> tensor<1x384x65x65xf32>
%139 = flow.dispatch.region -> (tensor<1x384x65x65xf32>) {
%296 = linalg.depthwise_conv_2d_nchw_chw {dilations = dense<2> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%136, %cst_64 : tensor<1x384x69x69xf32>, tensor<384x3x3xf32>) outs(%138 : tensor<1x384x65x65xf32>) -> tensor<1x384x65x65xf32>
flow.return %296 : tensor<1x384x65x65xf32>
}
%140 = tensor.empty() : tensor<65x65x1x384xf32>
%141 = flow.dispatch.region -> (tensor<65x65x1x384xf32>) {
%296 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%139 : tensor<1x384x65x65xf32>) outs(%140 : tensor<65x65x1x384xf32>) {
^bb0(%in: f32, %out: f32):
%297 = arith.cmpf ult, %in, %cst_15 : f32
%298 = arith.select %297, %cst_15, %in : f32
%299 = arith.cmpf ugt, %298, %cst_2 : f32
%300 = arith.select %299, %cst_2, %298 : f32
%301 = arith.divf %300, %cst_11 : f32
%302 = math.round %301 : f32
%303 = arith.addf %302, %cst_15 : f32
%304 = arith.cmpf ult, %303, %cst_17 : f32
%305 = arith.cmpf ugt, %303, %cst_16 : f32
%306 = arith.select %304, %cst_17, %303 : f32
%307 = arith.select %305, %cst_16, %306 : f32
%308 = arith.fptosi %307 : f32 to i8
%309 = arith.extsi %308 : i8 to i32
%310 = arith.sitofp %309 : i32 to f32
%311 = arith.mulf %310, %cst_11 : f32
linalg.yield %311 : f32
} -> tensor<65x65x1x384xf32>
flow.return %296 : tensor<65x65x1x384xf32>
}
%collapsed_165 = tensor.collapse_shape %141 [[0], [1], [2, 3]] : tensor<65x65x1x384xf32> into tensor<65x65x384xf32>
%142 = tensor.empty() : tensor<68x68x384xf32>
%143 = linalg.fill ins(%cst_15 : f32) outs(%142 : tensor<68x68x384xf32>) -> tensor<68x68x384xf32>
%inserted_slice_166 = tensor.insert_slice %collapsed_165 into %143[0, 0, 0] [65, 65, 384] [1, 1, 1] : tensor<65x65x384xf32> into tensor<68x68x384xf32>
%expanded_167 = tensor.expand_shape %inserted_slice_166 [[0, 1], [2], [3]] output_shape [1, 68, 68, 384] : tensor<68x68x384xf32> into tensor<1x68x68x384xf32>
%144 = flow.dispatch.region -> (tensor<1x68x68x64xf32>) {
%296 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%expanded_167, %cst_87 : tensor<1x68x68x384xf32>, tensor<1x1x384x64xf32>) outs(%121 : tensor<1x68x68x64xf32>) -> tensor<1x68x68x64xf32>
flow.return %296 : tensor<1x68x68x64xf32>
}
%extracted_slice_168 = tensor.extract_slice %144[0, 0, 0, 0] [1, 65, 65, 64] [1, 1, 1, 1] : tensor<1x68x68x64xf32> to tensor<65x65x64xf32>
%145 = flow.dispatch.region -> (tensor<65x65x64xi8>) {
%296 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%125, %extracted_slice_168, %cst_65 : tensor<65x65x64xf32>, tensor<65x65x64xf32>, tensor<64xf32>) outs(%123 : tensor<65x65x64xi8>) {
^bb0(%in: f32, %in_256: f32, %in_257: f32, %out: i8):
%297 = arith.addf %in_256, %in_257 : f32
%298 = arith.divf %297, %cst_9 : f32
%299 = math.round %298 : f32
%300 = arith.addf %299, %cst_15 : f32
%301 = arith.cmpf ult, %300, %cst_17 : f32
%302 = arith.cmpf ugt, %300, %cst_16 : f32
%303 = arith.select %301, %cst_17, %300 : f32
%304 = arith.select %302, %cst_16, %303 : f32
%305 = arith.fptosi %304 : f32 to i8
%306 = arith.extsi %305 : i8 to i32
%307 = arith.sitofp %306 : i32 to f32
%308 = arith.mulf %307, %cst_9 : f32
%309 = arith.addf %in, %308 : f32
%310 = arith.divf %309, %cst_10 : f32
%311 = math.round %310 : f32
%312 = arith.addf %311, %cst_15 : f32
%313 = arith.cmpf ult, %312, %cst_17 : f32
%314 = arith.cmpf ugt, %312, %cst_16 : f32
%315 = arith.select %313, %cst_17, %312 : f32
%316 = arith.select %314, %cst_16, %315 : f32
%317 = arith.fptosi %316 : f32 to i8
linalg.yield %317 : i8
} -> tensor<65x65x64xi8>
flow.return %296 : tensor<65x65x64xi8>
}
%146 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%145 : tensor<65x65x64xi8>) outs(%119 : tensor<65x65x64xf32>) {
^bb0(%in: i8, %out: f32):
%296 = arith.extsi %in : i8 to i32
%297 = arith.sitofp %296 : i32 to f32
%298 = arith.mulf %297, %cst_10 : f32
linalg.yield %298 : f32
} -> tensor<65x65x64xf32>
%inserted_slice_169 = tensor.insert_slice %146 into %128[0, 0, 0] [65, 65, 64] [1, 1, 1] : tensor<65x65x64xf32> into tensor<68x68x64xf32>
%expanded_170 = tensor.expand_shape %inserted_slice_169 [[0, 1], [2], [3]] output_shape [1, 68, 68, 64] : tensor<68x68x64xf32> into tensor<1x68x68x64xf32>
%147 = flow.dispatch.region -> (tensor<1x68x68x384xf32>) {
%296 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%expanded_170, %cst_88 : tensor<1x68x68x64xf32>, tensor<1x1x64x384xf32>) outs(%130 : tensor<1x68x68x384xf32>) -> tensor<1x68x68x384xf32>
flow.return %296 : tensor<1x68x68x384xf32>
}
%extracted_slice_171 = tensor.extract_slice %147[0, 0, 0, 0] [1, 65, 65, 384] [1, 1, 1, 1] : tensor<1x68x68x384xf32> to tensor<65x65x384xf32>
%148 = flow.dispatch.region -> (tensor<69x69x384xf32>) {
%296 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_171, %cst_67 : tensor<65x65x384xf32>, tensor<384xf32>) outs(%126 : tensor<65x65x384xf32>) {
^bb0(%in: f32, %in_257: f32, %out: f32):
%297 = arith.addf %in, %in_257 : f32
%298 = arith.cmpf ult, %297, %cst_15 : f32
%299 = arith.select %298, %cst_15, %297 : f32
%300 = arith.cmpf ugt, %299, %cst_2 : f32
%301 = arith.select %300, %cst_2, %299 : f32
%302 = arith.divf %301, %cst_11 : f32
%303 = math.round %302 : f32
%304 = arith.addf %303, %cst_15 : f32
%305 = arith.cmpf ult, %304, %cst_17 : f32
%306 = arith.cmpf ugt, %304, %cst_16 : f32
%307 = arith.select %305, %cst_17, %304 : f32
%308 = arith.select %306, %cst_16, %307 : f32
%309 = arith.fptosi %308 : f32 to i8
%310 = arith.extsi %309 : i8 to i32
%311 = arith.sitofp %310 : i32 to f32
%312 = arith.mulf %311, %cst_11 : f32
linalg.yield %312 : f32
} -> tensor<65x65x384xf32>
%inserted_slice_256 = tensor.insert_slice %296 into %133[2, 2, 0] [65, 65, 384] [1, 1, 1] : tensor<65x65x384xf32> into tensor<69x69x384xf32>
flow.return %inserted_slice_256 : tensor<69x69x384xf32>
}
%expanded_172 = tensor.expand_shape %148 [[0], [1], [2, 3]] output_shape [69, 69, 1, 384] : tensor<69x69x384xf32> into tensor<69x69x1x384xf32>
%149 = flow.dispatch.region -> (tensor<1x384x69x69xf32>) {
%296 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_172 : tensor<69x69x1x384xf32>) outs(%135 : tensor<1x384x69x69xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<1x384x69x69xf32>
flow.return %296 : tensor<1x384x69x69xf32>
}
%150 = flow.dispatch.region -> (tensor<1x384x65x65xf32>) {
%296 = linalg.depthwise_conv_2d_nchw_chw {dilations = dense<2> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%149, %cst_66 : tensor<1x384x69x69xf32>, tensor<384x3x3xf32>) outs(%138 : tensor<1x384x65x65xf32>) -> tensor<1x384x65x65xf32>
flow.return %296 : tensor<1x384x65x65xf32>
}
%151 = flow.dispatch.region -> (tensor<65x65x1x384xf32>) {
%296 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%150 : tensor<1x384x65x65xf32>) outs(%140 : tensor<65x65x1x384xf32>) {
^bb0(%in: f32, %out: f32):
%297 = arith.cmpf ult, %in, %cst_15 : f32
%298 = arith.select %297, %cst_15, %in : f32
%299 = arith.cmpf ugt, %298, %cst_2 : f32
%300 = arith.select %299, %cst_2, %298 : f32
%301 = arith.divf %300, %cst_11 : f32
%302 = math.round %301 : f32
%303 = arith.addf %302, %cst_15 : f32
%304 = arith.cmpf ult, %303, %cst_17 : f32
%305 = arith.cmpf ugt, %303, %cst_16 : f32
%306 = arith.select %304, %cst_17, %303 : f32
%307 = arith.select %305, %cst_16, %306 : f32
%308 = arith.fptosi %307 : f32 to i8
%309 = arith.extsi %308 : i8 to i32
%310 = arith.sitofp %309 : i32 to f32
%311 = arith.mulf %310, %cst_11 : f32
linalg.yield %311 : f32
} -> tensor<65x65x1x384xf32>
flow.return %296 : tensor<65x65x1x384xf32>
}
%collapsed_173 = tensor.collapse_shape %151 [[0], [1], [2, 3]] : tensor<65x65x1x384xf32> into tensor<65x65x384xf32>
%inserted_slice_174 = tensor.insert_slice %collapsed_173 into %143[0, 0, 0] [65, 65, 384] [1, 1, 1] : tensor<65x65x384xf32> into tensor<68x68x384xf32>
%expanded_175 = tensor.expand_shape %inserted_slice_174 [[0, 1], [2], [3]] output_shape [1, 68, 68, 384] : tensor<68x68x384xf32> into tensor<1x68x68x384xf32>
%152 = flow.dispatch.region -> (tensor<1x68x68x64xf32>) {
%296 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%expanded_175, %cst_89 : tensor<1x68x68x384xf32>, tensor<1x1x384x64xf32>) outs(%121 : tensor<1x68x68x64xf32>) -> tensor<1x68x68x64xf32>
flow.return %296 : tensor<1x68x68x64xf32>
}
%extracted_slice_176 = tensor.extract_slice %152[0, 0, 0, 0] [1, 65, 65, 64] [1, 1, 1, 1] : tensor<1x68x68x64xf32> to tensor<65x65x64xf32>
%153 = flow.dispatch.region -> (tensor<65x65x64xi8>) {
%296 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%146, %extracted_slice_176, %cst_68 : tensor<65x65x64xf32>, tensor<65x65x64xf32>, tensor<64xf32>) outs(%123 : tensor<65x65x64xi8>) {
^bb0(%in: f32, %in_256: f32, %in_257: f32, %out: i8):
%297 = arith.addf %in_256, %in_257 : f32
%298 = arith.divf %297, %cst_9 : f32
%299 = math.round %298 : f32
%300 = arith.addf %299, %cst_15 : f32
%301 = arith.cmpf ult, %300, %cst_17 : f32
%302 = arith.cmpf ugt, %300, %cst_16 : f32
%303 = arith.select %301, %cst_17, %300 : f32
%304 = arith.select %302, %cst_16, %303 : f32
%305 = arith.fptosi %304 : f32 to i8
%306 = arith.extsi %305 : i8 to i32
%307 = arith.sitofp %306 : i32 to f32
%308 = arith.mulf %307, %cst_9 : f32
%309 = arith.addf %in, %308 : f32
%310 = arith.divf %309, %cst_10 : f32
%311 = math.round %310 : f32
%312 = arith.addf %311, %cst_15 : f32
%313 = arith.cmpf ult, %312, %cst_17 : f32
%314 = arith.cmpf ugt, %312, %cst_16 : f32
%315 = arith.select %313, %cst_17, %312 : f32
%316 = arith.select %314, %cst_16, %315 : f32
%317 = arith.fptosi %316 : f32 to i8
linalg.yield %317 : i8
} -> tensor<65x65x64xi8>
flow.return %296 : tensor<65x65x64xi8>
}
%154 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%153 : tensor<65x65x64xi8>) outs(%119 : tensor<65x65x64xf32>) {
^bb0(%in: i8, %out: f32):
%296 = arith.extsi %in : i8 to i32
%297 = arith.sitofp %296 : i32 to f32
%298 = arith.mulf %297, %cst_10 : f32
linalg.yield %298 : f32
} -> tensor<65x65x64xf32>
%inserted_slice_177 = tensor.insert_slice %154 into %128[0, 0, 0] [65, 65, 64] [1, 1, 1] : tensor<65x65x64xf32> into tensor<68x68x64xf32>
%expanded_178 = tensor.expand_shape %inserted_slice_177 [[0, 1], [2], [3]] output_shape [1, 68, 68, 64] : tensor<68x68x64xf32> into tensor<1x68x68x64xf32>
%155 = flow.dispatch.region -> (tensor<1x68x68x384xf32>) {
%296 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%expanded_178, %cst_90 : tensor<1x68x68x64xf32>, tensor<1x1x64x384xf32>) outs(%130 : tensor<1x68x68x384xf32>) -> tensor<1x68x68x384xf32>
flow.return %296 : tensor<1x68x68x384xf32>
}
%extracted_slice_179 = tensor.extract_slice %155[0, 0, 0, 0] [1, 65, 65, 384] [1, 1, 1, 1] : tensor<1x68x68x384xf32> to tensor<65x65x384xf32>
%156 = flow.dispatch.region -> (tensor<69x69x384xf32>) {
%296 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_179, %cst_70 : tensor<65x65x384xf32>, tensor<384xf32>) outs(%126 : tensor<65x65x384xf32>) {
^bb0(%in: f32, %in_257: f32, %out: f32):
%297 = arith.addf %in, %in_257 : f32
%298 = arith.cmpf ult, %297, %cst_15 : f32
%299 = arith.select %298, %cst_15, %297 : f32
%300 = arith.cmpf ugt, %299, %cst_2 : f32
%301 = arith.select %300, %cst_2, %299 : f32
%302 = arith.divf %301, %cst_11 : f32
%303 = math.round %302 : f32
%304 = arith.addf %303, %cst_15 : f32
%305 = arith.cmpf ult, %304, %cst_17 : f32
%306 = arith.cmpf ugt, %304, %cst_16 : f32
%307 = arith.select %305, %cst_17, %304 : f32
%308 = arith.select %306, %cst_16, %307 : f32
%309 = arith.fptosi %308 : f32 to i8
%310 = arith.extsi %309 : i8 to i32
%311 = arith.sitofp %310 : i32 to f32
%312 = arith.mulf %311, %cst_11 : f32
linalg.yield %312 : f32
} -> tensor<65x65x384xf32>
%inserted_slice_256 = tensor.insert_slice %296 into %133[2, 2, 0] [65, 65, 384] [1, 1, 1] : tensor<65x65x384xf32> into tensor<69x69x384xf32>
flow.return %inserted_slice_256 : tensor<69x69x384xf32>
}
%expanded_180 = tensor.expand_shape %156 [[0], [1], [2, 3]] output_shape [69, 69, 1, 384] : tensor<69x69x384xf32> into tensor<69x69x1x384xf32>
%157 = flow.dispatch.region -> (tensor<1x384x69x69xf32>) {
%296 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_180 : tensor<69x69x1x384xf32>) outs(%135 : tensor<1x384x69x69xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<1x384x69x69xf32>
flow.return %296 : tensor<1x384x69x69xf32>
}
%158 = flow.dispatch.region -> (tensor<1x384x65x65xf32>) {
%296 = linalg.depthwise_conv_2d_nchw_chw {dilations = dense<2> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%157, %cst_69 : tensor<1x384x69x69xf32>, tensor<384x3x3xf32>) outs(%138 : tensor<1x384x65x65xf32>) -> tensor<1x384x65x65xf32>
flow.return %296 : tensor<1x384x65x65xf32>
}
%159 = flow.dispatch.region -> (tensor<65x65x1x384xf32>) {
%296 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%158 : tensor<1x384x65x65xf32>) outs(%140 : tensor<65x65x1x384xf32>) {
^bb0(%in: f32, %out: f32):
%297 = arith.cmpf ult, %in, %cst_15 : f32
%298 = arith.select %297, %cst_15, %in : f32
%299 = arith.cmpf ugt, %298, %cst_2 : f32
%300 = arith.select %299, %cst_2, %298 : f32
%301 = arith.divf %300, %cst_11 : f32
%302 = math.round %301 : f32
%303 = arith.addf %302, %cst_15 : f32
%304 = arith.cmpf ult, %303, %cst_17 : f32
%305 = arith.cmpf ugt, %303, %cst_16 : f32
%306 = arith.select %304, %cst_17, %303 : f32
%307 = arith.select %305, %cst_16, %306 : f32
%308 = arith.fptosi %307 : f32 to i8
%309 = arith.extsi %308 : i8 to i32
%310 = arith.sitofp %309 : i32 to f32
%311 = arith.mulf %310, %cst_11 : f32
linalg.yield %311 : f32
} -> tensor<65x65x1x384xf32>
flow.return %296 : tensor<65x65x1x384xf32>
}
%collapsed_181 = tensor.collapse_shape %159 [[0], [1], [2, 3]] : tensor<65x65x1x384xf32> into tensor<65x65x384xf32>
%inserted_slice_182 = tensor.insert_slice %collapsed_181 into %143[0, 0, 0] [65, 65, 384] [1, 1, 1] : tensor<65x65x384xf32> into tensor<68x68x384xf32>
%expanded_183 = tensor.expand_shape %inserted_slice_182 [[0, 1], [2], [3]] output_shape [1, 68, 68, 384] : tensor<68x68x384xf32> into tensor<1x68x68x384xf32>
%160 = flow.dispatch.region -> (tensor<1x68x68x64xf32>) {
%296 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%expanded_183, %cst_91 : tensor<1x68x68x384xf32>, tensor<1x1x384x64xf32>) outs(%121 : tensor<1x68x68x64xf32>) -> tensor<1x68x68x64xf32>
flow.return %296 : tensor<1x68x68x64xf32>
}
%extracted_slice_184 = tensor.extract_slice %160[0, 0, 0, 0] [1, 65, 65, 64] [1, 1, 1, 1] : tensor<1x68x68x64xf32> to tensor<65x65x64xf32>
%161 = flow.dispatch.region -> (tensor<68x68x64xf32>) {
%296 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%154, %extracted_slice_184, %cst_71 : tensor<65x65x64xf32>, tensor<65x65x64xf32>, tensor<64xf32>) outs(%119 : tensor<65x65x64xf32>) {
^bb0(%in: f32, %in_257: f32, %in_258: f32, %out: f32):
%297 = arith.addf %in_257, %in_258 : f32
%298 = arith.divf %297, %cst_9 : f32
%299 = math.round %298 : f32
%300 = arith.addf %299, %cst_15 : f32
%301 = arith.cmpf ult, %300, %cst_17 : f32
%302 = arith.cmpf ugt, %300, %cst_16 : f32
%303 = arith.select %301, %cst_17, %300 : f32
%304 = arith.select %302, %cst_16, %303 : f32
%305 = arith.fptosi %304 : f32 to i8
%306 = arith.extsi %305 : i8 to i32
%307 = arith.sitofp %306 : i32 to f32
%308 = arith.mulf %307, %cst_9 : f32
%309 = arith.addf %in, %308 : f32
%310 = arith.divf %309, %cst_10 : f32
%311 = math.round %310 : f32
%312 = arith.addf %311, %cst_15 : f32
%313 = arith.cmpf ult, %312, %cst_17 : f32
%314 = arith.cmpf ugt, %312, %cst_16 : f32
%315 = arith.select %313, %cst_17, %312 : f32
%316 = arith.select %314, %cst_16, %315 : f32
%317 = arith.fptosi %316 : f32 to i8
%318 = arith.extsi %317 : i8 to i32
%319 = arith.sitofp %318 : i32 to f32
%320 = arith.mulf %319, %cst_10 : f32
linalg.yield %320 : f32
} -> tensor<65x65x64xf32>
%inserted_slice_256 = tensor.insert_slice %296 into %128[0, 0, 0] [65, 65, 64] [1, 1, 1] : tensor<65x65x64xf32> into tensor<68x68x64xf32>
flow.return %inserted_slice_256 : tensor<68x68x64xf32>
}
%expanded_185 = tensor.expand_shape %161 [[0, 1], [2], [3]] output_shape [1, 68, 68, 64] : tensor<68x68x64xf32> into tensor<1x68x68x64xf32>
%162 = flow.dispatch.region -> (tensor<1x68x68x384xf32>) {
%296 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%expanded_185, %cst_92 : tensor<1x68x68x64xf32>, tensor<1x1x64x384xf32>) outs(%130 : tensor<1x68x68x384xf32>) -> tensor<1x68x68x384xf32>
flow.return %296 : tensor<1x68x68x384xf32>
}
%extracted_slice_186 = tensor.extract_slice %162[0, 0, 0, 0] [1, 65, 65, 384] [1, 1, 1, 1] : tensor<1x68x68x384xf32> to tensor<65x65x384xf32>
%163 = flow.dispatch.region -> (tensor<69x69x384xf32>) {
%296 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_186, %cst_33 : tensor<65x65x384xf32>, tensor<384xf32>) outs(%126 : tensor<65x65x384xf32>) {
^bb0(%in: f32, %in_257: f32, %out: f32):
%297 = arith.addf %in, %in_257 : f32
%298 = arith.cmpf ult, %297, %cst_15 : f32
%299 = arith.select %298, %cst_15, %297 : f32
%300 = arith.cmpf ugt, %299, %cst_2 : f32
%301 = arith.select %300, %cst_2, %299 : f32
%302 = arith.divf %301, %cst_11 : f32
%303 = math.round %302 : f32
%304 = arith.addf %303, %cst_15 : f32
%305 = arith.cmpf ult, %304, %cst_17 : f32
%306 = arith.cmpf ugt, %304, %cst_16 : f32
%307 = arith.select %305, %cst_17, %304 : f32
%308 = arith.select %306, %cst_16, %307 : f32
%309 = arith.fptosi %308 : f32 to i8
%310 = arith.extsi %309 : i8 to i32
%311 = arith.sitofp %310 : i32 to f32
%312 = arith.mulf %311, %cst_11 : f32
linalg.yield %312 : f32
} -> tensor<65x65x384xf32>
%inserted_slice_256 = tensor.insert_slice %296 into %133[2, 2, 0] [65, 65, 384] [1, 1, 1] : tensor<65x65x384xf32> into tensor<69x69x384xf32>
flow.return %inserted_slice_256 : tensor<69x69x384xf32>
}
%expanded_187 = tensor.expand_shape %163 [[0], [1], [2, 3]] output_shape [69, 69, 1, 384] : tensor<69x69x384xf32> into tensor<69x69x1x384xf32>
%164 = flow.dispatch.region -> (tensor<1x384x69x69xf32>) {
%296 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_187 : tensor<69x69x1x384xf32>) outs(%135 : tensor<1x384x69x69xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<1x384x69x69xf32>
flow.return %296 : tensor<1x384x69x69xf32>
}
%165 = flow.dispatch.region -> (tensor<1x384x65x65xf32>) {
%296 = linalg.depthwise_conv_2d_nchw_chw {dilations = dense<2> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%164, %cst_32 : tensor<1x384x69x69xf32>, tensor<384x3x3xf32>) outs(%138 : tensor<1x384x65x65xf32>) -> tensor<1x384x65x65xf32>
flow.return %296 : tensor<1x384x65x65xf32>
}
%166 = flow.dispatch.region -> (tensor<65x65x1x384xf32>) {
%296 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%165 : tensor<1x384x65x65xf32>) outs(%140 : tensor<65x65x1x384xf32>) {
^bb0(%in: f32, %out: f32):
%297 = arith.cmpf ult, %in, %cst_15 : f32
%298 = arith.select %297, %cst_15, %in : f32
%299 = arith.cmpf ugt, %298, %cst_2 : f32
%300 = arith.select %299, %cst_2, %298 : f32
%301 = arith.divf %300, %cst_11 : f32
%302 = math.round %301 : f32
%303 = arith.addf %302, %cst_15 : f32
%304 = arith.cmpf ult, %303, %cst_17 : f32
%305 = arith.cmpf ugt, %303, %cst_16 : f32
%306 = arith.select %304, %cst_17, %303 : f32
%307 = arith.select %305, %cst_16, %306 : f32
%308 = arith.fptosi %307 : f32 to i8
%309 = arith.extsi %308 : i8 to i32
%310 = arith.sitofp %309 : i32 to f32
%311 = arith.mulf %310, %cst_11 : f32
linalg.yield %311 : f32
} -> tensor<65x65x1x384xf32>
flow.return %296 : tensor<65x65x1x384xf32>
}
%collapsed_188 = tensor.collapse_shape %166 [[0], [1], [2, 3]] : tensor<65x65x1x384xf32> into tensor<65x65x384xf32>
%167 = tensor.empty() : tensor<65x65x96xf32>
%inserted_slice_189 = tensor.insert_slice %collapsed_188 into %143[0, 0, 0] [65, 65, 384] [1, 1, 1] : tensor<65x65x384xf32> into tensor<68x68x384xf32>
%expanded_190 = tensor.expand_shape %inserted_slice_189 [[0, 1], [2], [3]] output_shape [1, 68, 68, 384] : tensor<68x68x384xf32> into tensor<1x68x68x384xf32>
%168 = tensor.empty() : tensor<1x68x68x96xf32>
%169 = linalg.fill ins(%cst_15 : f32) outs(%168 : tensor<1x68x68x96xf32>) -> tensor<1x68x68x96xf32>
%170 = flow.dispatch.region -> (tensor<1x68x68x96xf32>) {
%296 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%expanded_190, %cst_93 : tensor<1x68x68x384xf32>, tensor<1x1x384x96xf32>) outs(%169 : tensor<1x68x68x96xf32>) -> tensor<1x68x68x96xf32>
flow.return %296 : tensor<1x68x68x96xf32>
}
%extracted_slice_191 = tensor.extract_slice %170[0, 0, 0, 0] [1, 65, 65, 96] [1, 1, 1, 1] : tensor<1x68x68x96xf32> to tensor<65x65x96xf32>
%171 = tensor.empty() : tensor<65x65x96xi8>
%172 = flow.dispatch.region -> (tensor<65x65x96xi8>) {
%296 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_191, %cst_34 : tensor<65x65x96xf32>, tensor<96xf32>) outs(%171 : tensor<65x65x96xi8>) {
^bb0(%in: f32, %in_256: f32, %out: i8):
%297 = arith.addf %in, %in_256 : f32
%298 = arith.divf %297, %cst_9 : f32
%299 = math.round %298 : f32
%300 = arith.addf %299, %cst_15 : f32
%301 = arith.cmpf ult, %300, %cst_17 : f32
%302 = arith.cmpf ugt, %300, %cst_16 : f32
%303 = arith.select %301, %cst_17, %300 : f32
%304 = arith.select %302, %cst_16, %303 : f32
%305 = arith.fptosi %304 : f32 to i8
linalg.yield %305 : i8
} -> tensor<65x65x96xi8>
flow.return %296 : tensor<65x65x96xi8>
}
%173 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%172 : tensor<65x65x96xi8>) outs(%167 : tensor<65x65x96xf32>) {
^bb0(%in: i8, %out: f32):
%296 = arith.extsi %in : i8 to i32
%297 = arith.sitofp %296 : i32 to f32
%298 = arith.mulf %297, %cst_9 : f32
linalg.yield %298 : f32
} -> tensor<65x65x96xf32>
%174 = tensor.empty() : tensor<65x65x576xf32>
%175 = tensor.empty() : tensor<68x68x96xf32>
%176 = linalg.fill ins(%cst_15 : f32) outs(%175 : tensor<68x68x96xf32>) -> tensor<68x68x96xf32>
%inserted_slice_192 = tensor.insert_slice %173 into %176[0, 0, 0] [65, 65, 96] [1, 1, 1] : tensor<65x65x96xf32> into tensor<68x68x96xf32>
%expanded_193 = tensor.expand_shape %inserted_slice_192 [[0, 1], [2], [3]] output_shape [1, 68, 68, 96] : tensor<68x68x96xf32> into tensor<1x68x68x96xf32>
%177 = tensor.empty() : tensor<1x68x68x576xf32>
%178 = linalg.fill ins(%cst_15 : f32) outs(%177 : tensor<1x68x68x576xf32>) -> tensor<1x68x68x576xf32>
%179 = flow.dispatch.region -> (tensor<1x68x68x576xf32>) {
%296 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%expanded_193, %cst_94 : tensor<1x68x68x96xf32>, tensor<1x1x96x576xf32>) outs(%178 : tensor<1x68x68x576xf32>) -> tensor<1x68x68x576xf32>
flow.return %296 : tensor<1x68x68x576xf32>
}
%extracted_slice_194 = tensor.extract_slice %179[0, 0, 0, 0] [1, 65, 65, 576] [1, 1, 1, 1] : tensor<1x68x68x576xf32> to tensor<65x65x576xf32>
%180 = tensor.empty() : tensor<69x69x576xf32>
%181 = linalg.fill ins(%cst_15 : f32) outs(%180 : tensor<69x69x576xf32>) -> tensor<69x69x576xf32>
%182 = flow.dispatch.region -> (tensor<69x69x576xf32>) {
%296 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_194, %cst_20 : tensor<65x65x576xf32>, tensor<576xf32>) outs(%174 : tensor<65x65x576xf32>) {
^bb0(%in: f32, %in_257: f32, %out: f32):
%297 = arith.addf %in, %in_257 : f32
%298 = arith.cmpf ult, %297, %cst_15 : f32
%299 = arith.select %298, %cst_15, %297 : f32
%300 = arith.cmpf ugt, %299, %cst_2 : f32
%301 = arith.select %300, %cst_2, %299 : f32
%302 = arith.divf %301, %cst_11 : f32
%303 = math.round %302 : f32
%304 = arith.addf %303, %cst_15 : f32
%305 = arith.cmpf ult, %304, %cst_17 : f32
%306 = arith.cmpf ugt, %304, %cst_16 : f32
%307 = arith.select %305, %cst_17, %304 : f32
%308 = arith.select %306, %cst_16, %307 : f32
%309 = arith.fptosi %308 : f32 to i8
%310 = arith.extsi %309 : i8 to i32
%311 = arith.sitofp %310 : i32 to f32
%312 = arith.mulf %311, %cst_11 : f32
linalg.yield %312 : f32
} -> tensor<65x65x576xf32>
%inserted_slice_256 = tensor.insert_slice %296 into %181[2, 2, 0] [65, 65, 576] [1, 1, 1] : tensor<65x65x576xf32> into tensor<69x69x576xf32>
flow.return %inserted_slice_256 : tensor<69x69x576xf32>
}
%expanded_195 = tensor.expand_shape %182 [[0], [1], [2, 3]] output_shape [69, 69, 1, 576] : tensor<69x69x576xf32> into tensor<69x69x1x576xf32>
%183 = tensor.empty() : tensor<1x576x69x69xf32>
%184 = flow.dispatch.region -> (tensor<1x576x69x69xf32>) {
%296 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_195 : tensor<69x69x1x576xf32>) outs(%183 : tensor<1x576x69x69xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<1x576x69x69xf32>
flow.return %296 : tensor<1x576x69x69xf32>
}
%185 = tensor.empty() : tensor<1x576x65x65xf32>
%186 = linalg.fill ins(%cst_15 : f32) outs(%185 : tensor<1x576x65x65xf32>) -> tensor<1x576x65x65xf32>
%187 = flow.dispatch.region -> (tensor<1x576x65x65xf32>) {
%296 = linalg.depthwise_conv_2d_nchw_chw {dilations = dense<2> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%184, %cst_35 : tensor<1x576x69x69xf32>, tensor<576x3x3xf32>) outs(%186 : tensor<1x576x65x65xf32>) -> tensor<1x576x65x65xf32>
flow.return %296 : tensor<1x576x65x65xf32>
}
%188 = tensor.empty() : tensor<65x65x1x576xf32>
%189 = flow.dispatch.region -> (tensor<65x65x1x576xf32>) {
%296 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%187 : tensor<1x576x65x65xf32>) outs(%188 : tensor<65x65x1x576xf32>) {
^bb0(%in: f32, %out: f32):
%297 = arith.cmpf ult, %in, %cst_15 : f32
%298 = arith.select %297, %cst_15, %in : f32
%299 = arith.cmpf ugt, %298, %cst_2 : f32
%300 = arith.select %299, %cst_2, %298 : f32
%301 = arith.divf %300, %cst_11 : f32
%302 = math.round %301 : f32
%303 = arith.addf %302, %cst_15 : f32
%304 = arith.cmpf ult, %303, %cst_17 : f32
%305 = arith.cmpf ugt, %303, %cst_16 : f32
%306 = arith.select %304, %cst_17, %303 : f32
%307 = arith.select %305, %cst_16, %306 : f32
%308 = arith.fptosi %307 : f32 to i8
%309 = arith.extsi %308 : i8 to i32
%310 = arith.sitofp %309 : i32 to f32
%311 = arith.mulf %310, %cst_11 : f32
linalg.yield %311 : f32
} -> tensor<65x65x1x576xf32>
flow.return %296 : tensor<65x65x1x576xf32>
}
%collapsed_196 = tensor.collapse_shape %189 [[0], [1], [2, 3]] : tensor<65x65x1x576xf32> into tensor<65x65x576xf32>
%190 = tensor.empty() : tensor<68x68x576xf32>
%191 = linalg.fill ins(%cst_15 : f32) outs(%190 : tensor<68x68x576xf32>) -> tensor<68x68x576xf32>
%inserted_slice_197 = tensor.insert_slice %collapsed_196 into %191[0, 0, 0] [65, 65, 576] [1, 1, 1] : tensor<65x65x576xf32> into tensor<68x68x576xf32>
%expanded_198 = tensor.expand_shape %inserted_slice_197 [[0, 1], [2], [3]] output_shape [1, 68, 68, 576] : tensor<68x68x576xf32> into tensor<1x68x68x576xf32>
%192 = flow.dispatch.region -> (tensor<1x68x68x96xf32>) {
%296 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%expanded_198, %cst_95 : tensor<1x68x68x576xf32>, tensor<1x1x576x96xf32>) outs(%169 : tensor<1x68x68x96xf32>) -> tensor<1x68x68x96xf32>
flow.return %296 : tensor<1x68x68x96xf32>
}
%extracted_slice_199 = tensor.extract_slice %192[0, 0, 0, 0] [1, 65, 65, 96] [1, 1, 1, 1] : tensor<1x68x68x96xf32> to tensor<65x65x96xf32>
%193 = flow.dispatch.region -> (tensor<65x65x96xi8>) {
%296 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%173, %extracted_slice_199, %cst_36 : tensor<65x65x96xf32>, tensor<65x65x96xf32>, tensor<96xf32>) outs(%171 : tensor<65x65x96xi8>) {
^bb0(%in: f32, %in_256: f32, %in_257: f32, %out: i8):
%297 = arith.addf %in_256, %in_257 : f32
%298 = arith.divf %297, %cst_9 : f32
%299 = math.round %298 : f32
%300 = arith.addf %299, %cst_15 : f32
%301 = arith.cmpf ult, %300, %cst_17 : f32
%302 = arith.cmpf ugt, %300, %cst_16 : f32
%303 = arith.select %301, %cst_17, %300 : f32
%304 = arith.select %302, %cst_16, %303 : f32
%305 = arith.fptosi %304 : f32 to i8
%306 = arith.extsi %305 : i8 to i32
%307 = arith.sitofp %306 : i32 to f32
%308 = arith.mulf %307, %cst_9 : f32
%309 = arith.addf %in, %308 : f32
%310 = arith.divf %309, %cst_9 : f32
%311 = math.round %310 : f32
%312 = arith.addf %311, %cst_15 : f32
%313 = arith.cmpf ult, %312, %cst_17 : f32
%314 = arith.cmpf ugt, %312, %cst_16 : f32
%315 = arith.select %313, %cst_17, %312 : f32
%316 = arith.select %314, %cst_16, %315 : f32
%317 = arith.fptosi %316 : f32 to i8
linalg.yield %317 : i8
} -> tensor<65x65x96xi8>
flow.return %296 : tensor<65x65x96xi8>
}
%194 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%193 : tensor<65x65x96xi8>) outs(%167 : tensor<65x65x96xf32>) {
^bb0(%in: i8, %out: f32):
%296 = arith.extsi %in : i8 to i32
%297 = arith.sitofp %296 : i32 to f32
%298 = arith.mulf %297, %cst_9 : f32
linalg.yield %298 : f32
} -> tensor<65x65x96xf32>
%inserted_slice_200 = tensor.insert_slice %194 into %176[0, 0, 0] [65, 65, 96] [1, 1, 1] : tensor<65x65x96xf32> into tensor<68x68x96xf32>
%expanded_201 = tensor.expand_shape %inserted_slice_200 [[0, 1], [2], [3]] output_shape [1, 68, 68, 96] : tensor<68x68x96xf32> into tensor<1x68x68x96xf32>
%195 = flow.dispatch.region -> (tensor<1x68x68x576xf32>) {
%296 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%expanded_201, %cst_96 : tensor<1x68x68x96xf32>, tensor<1x1x96x576xf32>) outs(%178 : tensor<1x68x68x576xf32>) -> tensor<1x68x68x576xf32>
flow.return %296 : tensor<1x68x68x576xf32>
}
%extracted_slice_202 = tensor.extract_slice %195[0, 0, 0, 0] [1, 65, 65, 576] [1, 1, 1, 1] : tensor<1x68x68x576xf32> to tensor<65x65x576xf32>
%196 = flow.dispatch.region -> (tensor<69x69x576xf32>) {
%296 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_202, %cst_38 : tensor<65x65x576xf32>, tensor<576xf32>) outs(%174 : tensor<65x65x576xf32>) {
^bb0(%in: f32, %in_257: f32, %out: f32):
%297 = arith.addf %in, %in_257 : f32
%298 = arith.cmpf ult, %297, %cst_15 : f32
%299 = arith.select %298, %cst_15, %297 : f32
%300 = arith.cmpf ugt, %299, %cst_2 : f32
%301 = arith.select %300, %cst_2, %299 : f32
%302 = arith.divf %301, %cst_11 : f32
%303 = math.round %302 : f32
%304 = arith.addf %303, %cst_15 : f32
%305 = arith.cmpf ult, %304, %cst_17 : f32
%306 = arith.cmpf ugt, %304, %cst_16 : f32
%307 = arith.select %305, %cst_17, %304 : f32
%308 = arith.select %306, %cst_16, %307 : f32
%309 = arith.fptosi %308 : f32 to i8
%310 = arith.extsi %309 : i8 to i32
%311 = arith.sitofp %310 : i32 to f32
%312 = arith.mulf %311, %cst_11 : f32
linalg.yield %312 : f32
} -> tensor<65x65x576xf32>
%inserted_slice_256 = tensor.insert_slice %296 into %181[2, 2, 0] [65, 65, 576] [1, 1, 1] : tensor<65x65x576xf32> into tensor<69x69x576xf32>
flow.return %inserted_slice_256 : tensor<69x69x576xf32>
}
%expanded_203 = tensor.expand_shape %196 [[0], [1], [2, 3]] output_shape [69, 69, 1, 576] : tensor<69x69x576xf32> into tensor<69x69x1x576xf32>
%197 = flow.dispatch.region -> (tensor<1x576x69x69xf32>) {
%296 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_203 : tensor<69x69x1x576xf32>) outs(%183 : tensor<1x576x69x69xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<1x576x69x69xf32>
flow.return %296 : tensor<1x576x69x69xf32>
}
%198 = flow.dispatch.region -> (tensor<1x576x65x65xf32>) {
%296 = linalg.depthwise_conv_2d_nchw_chw {dilations = dense<2> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%197, %cst_37 : tensor<1x576x69x69xf32>, tensor<576x3x3xf32>) outs(%186 : tensor<1x576x65x65xf32>) -> tensor<1x576x65x65xf32>
flow.return %296 : tensor<1x576x65x65xf32>
}
%199 = flow.dispatch.region -> (tensor<65x65x1x576xf32>) {
%296 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%198 : tensor<1x576x65x65xf32>) outs(%188 : tensor<65x65x1x576xf32>) {
^bb0(%in: f32, %out: f32):
%297 = arith.cmpf ult, %in, %cst_15 : f32
%298 = arith.select %297, %cst_15, %in : f32
%299 = arith.cmpf ugt, %298, %cst_2 : f32
%300 = arith.select %299, %cst_2, %298 : f32
%301 = arith.divf %300, %cst_11 : f32
%302 = math.round %301 : f32
%303 = arith.addf %302, %cst_15 : f32
%304 = arith.cmpf ult, %303, %cst_17 : f32
%305 = arith.cmpf ugt, %303, %cst_16 : f32
%306 = arith.select %304, %cst_17, %303 : f32
%307 = arith.select %305, %cst_16, %306 : f32
%308 = arith.fptosi %307 : f32 to i8
%309 = arith.extsi %308 : i8 to i32
%310 = arith.sitofp %309 : i32 to f32
%311 = arith.mulf %310, %cst_11 : f32
linalg.yield %311 : f32
} -> tensor<65x65x1x576xf32>
flow.return %296 : tensor<65x65x1x576xf32>
}
%collapsed_204 = tensor.collapse_shape %199 [[0], [1], [2, 3]] : tensor<65x65x1x576xf32> into tensor<65x65x576xf32>
%inserted_slice_205 = tensor.insert_slice %collapsed_204 into %191[0, 0, 0] [65, 65, 576] [1, 1, 1] : tensor<65x65x576xf32> into tensor<68x68x576xf32>
%expanded_206 = tensor.expand_shape %inserted_slice_205 [[0, 1], [2], [3]] output_shape [1, 68, 68, 576] : tensor<68x68x576xf32> into tensor<1x68x68x576xf32>
%200 = flow.dispatch.region -> (tensor<1x68x68x96xf32>) {
%296 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%expanded_206, %cst_97 : tensor<1x68x68x576xf32>, tensor<1x1x576x96xf32>) outs(%169 : tensor<1x68x68x96xf32>) -> tensor<1x68x68x96xf32>
flow.return %296 : tensor<1x68x68x96xf32>
}
%extracted_slice_207 = tensor.extract_slice %200[0, 0, 0, 0] [1, 65, 65, 96] [1, 1, 1, 1] : tensor<1x68x68x96xf32> to tensor<65x65x96xf32>
%201 = flow.dispatch.region -> (tensor<68x68x96xf32>) {
%296 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%194, %extracted_slice_207, %cst_39 : tensor<65x65x96xf32>, tensor<65x65x96xf32>, tensor<96xf32>) outs(%167 : tensor<65x65x96xf32>) {
^bb0(%in: f32, %in_257: f32, %in_258: f32, %out: f32):
%297 = arith.addf %in_257, %in_258 : f32
%298 = arith.divf %297, %cst_9 : f32
%299 = math.round %298 : f32
%300 = arith.addf %299, %cst_15 : f32
%301 = arith.cmpf ult, %300, %cst_17 : f32
%302 = arith.cmpf ugt, %300, %cst_16 : f32
%303 = arith.select %301, %cst_17, %300 : f32
%304 = arith.select %302, %cst_16, %303 : f32
%305 = arith.fptosi %304 : f32 to i8
%306 = arith.extsi %305 : i8 to i32
%307 = arith.sitofp %306 : i32 to f32
%308 = arith.mulf %307, %cst_9 : f32
%309 = arith.addf %in, %308 : f32
%310 = arith.divf %309, %cst_9 : f32
%311 = math.round %310 : f32
%312 = arith.addf %311, %cst_15 : f32
%313 = arith.cmpf ult, %312, %cst_17 : f32
%314 = arith.cmpf ugt, %312, %cst_16 : f32
%315 = arith.select %313, %cst_17, %312 : f32
%316 = arith.select %314, %cst_16, %315 : f32
%317 = arith.fptosi %316 : f32 to i8
%318 = arith.extsi %317 : i8 to i32
%319 = arith.sitofp %318 : i32 to f32
%320 = arith.mulf %319, %cst_9 : f32
linalg.yield %320 : f32
} -> tensor<65x65x96xf32>
%inserted_slice_256 = tensor.insert_slice %296 into %176[0, 0, 0] [65, 65, 96] [1, 1, 1] : tensor<65x65x96xf32> into tensor<68x68x96xf32>
flow.return %inserted_slice_256 : tensor<68x68x96xf32>
}
%expanded_208 = tensor.expand_shape %201 [[0, 1], [2], [3]] output_shape [1, 68, 68, 96] : tensor<68x68x96xf32> into tensor<1x68x68x96xf32>
%202 = flow.dispatch.region -> (tensor<1x68x68x576xf32>) {
%296 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%expanded_208, %cst_98 : tensor<1x68x68x96xf32>, tensor<1x1x96x576xf32>) outs(%178 : tensor<1x68x68x576xf32>) -> tensor<1x68x68x576xf32>
flow.return %296 : tensor<1x68x68x576xf32>
}
%extracted_slice_209 = tensor.extract_slice %202[0, 0, 0, 0] [1, 65, 65, 576] [1, 1, 1, 1] : tensor<1x68x68x576xf32> to tensor<65x65x576xf32>
%203 = flow.dispatch.region -> (tensor<69x69x576xf32>) {
%296 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_209, %cst_41 : tensor<65x65x576xf32>, tensor<576xf32>) outs(%174 : tensor<65x65x576xf32>) {
^bb0(%in: f32, %in_257: f32, %out: f32):
%297 = arith.addf %in, %in_257 : f32
%298 = arith.cmpf ult, %297, %cst_15 : f32
%299 = arith.select %298, %cst_15, %297 : f32
%300 = arith.cmpf ugt, %299, %cst_2 : f32
%301 = arith.select %300, %cst_2, %299 : f32
%302 = arith.divf %301, %cst_11 : f32
%303 = math.round %302 : f32
%304 = arith.addf %303, %cst_15 : f32
%305 = arith.cmpf ult, %304, %cst_17 : f32
%306 = arith.cmpf ugt, %304, %cst_16 : f32
%307 = arith.select %305, %cst_17, %304 : f32
%308 = arith.select %306, %cst_16, %307 : f32
%309 = arith.fptosi %308 : f32 to i8
%310 = arith.extsi %309 : i8 to i32
%311 = arith.sitofp %310 : i32 to f32
%312 = arith.mulf %311, %cst_11 : f32
linalg.yield %312 : f32
} -> tensor<65x65x576xf32>
%inserted_slice_256 = tensor.insert_slice %296 into %181[2, 2, 0] [65, 65, 576] [1, 1, 1] : tensor<65x65x576xf32> into tensor<69x69x576xf32>
flow.return %inserted_slice_256 : tensor<69x69x576xf32>
}
%expanded_210 = tensor.expand_shape %203 [[0], [1], [2, 3]] output_shape [69, 69, 1, 576] : tensor<69x69x576xf32> into tensor<69x69x1x576xf32>
%204 = flow.dispatch.region -> (tensor<1x576x69x69xf32>) {
%296 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_210 : tensor<69x69x1x576xf32>) outs(%183 : tensor<1x576x69x69xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<1x576x69x69xf32>
flow.return %296 : tensor<1x576x69x69xf32>
}
%205 = flow.dispatch.region -> (tensor<1x576x65x65xf32>) {
%296 = linalg.depthwise_conv_2d_nchw_chw {dilations = dense<2> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%204, %cst_40 : tensor<1x576x69x69xf32>, tensor<576x3x3xf32>) outs(%186 : tensor<1x576x65x65xf32>) -> tensor<1x576x65x65xf32>
flow.return %296 : tensor<1x576x65x65xf32>
}
%206 = flow.dispatch.region -> (tensor<65x65x1x576xf32>) {
%296 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%205 : tensor<1x576x65x65xf32>) outs(%188 : tensor<65x65x1x576xf32>) {
^bb0(%in: f32, %out: f32):
%297 = arith.cmpf ult, %in, %cst_15 : f32
%298 = arith.select %297, %cst_15, %in : f32
%299 = arith.cmpf ugt, %298, %cst_2 : f32
%300 = arith.select %299, %cst_2, %298 : f32
%301 = arith.divf %300, %cst_11 : f32
%302 = math.round %301 : f32
%303 = arith.addf %302, %cst_15 : f32
%304 = arith.cmpf ult, %303, %cst_17 : f32
%305 = arith.cmpf ugt, %303, %cst_16 : f32
%306 = arith.select %304, %cst_17, %303 : f32
%307 = arith.select %305, %cst_16, %306 : f32
%308 = arith.fptosi %307 : f32 to i8
%309 = arith.extsi %308 : i8 to i32
%310 = arith.sitofp %309 : i32 to f32
%311 = arith.mulf %310, %cst_11 : f32
linalg.yield %311 : f32
} -> tensor<65x65x1x576xf32>
flow.return %296 : tensor<65x65x1x576xf32>
}
%collapsed_211 = tensor.collapse_shape %206 [[0], [1], [2, 3]] : tensor<65x65x1x576xf32> into tensor<65x65x576xf32>
%207 = tensor.empty() : tensor<65x65x160xf32>
%inserted_slice_212 = tensor.insert_slice %collapsed_211 into %191[0, 0, 0] [65, 65, 576] [1, 1, 1] : tensor<65x65x576xf32> into tensor<68x68x576xf32>
%expanded_213 = tensor.expand_shape %inserted_slice_212 [[0, 1], [2], [3]] output_shape [1, 68, 68, 576] : tensor<68x68x576xf32> into tensor<1x68x68x576xf32>
%208 = tensor.empty() : tensor<1x68x68x160xf32>
%209 = linalg.fill ins(%cst_15 : f32) outs(%208 : tensor<1x68x68x160xf32>) -> tensor<1x68x68x160xf32>
%210 = flow.dispatch.region -> (tensor<1x68x68x160xf32>) {
%296 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%expanded_213, %cst_99 : tensor<1x68x68x576xf32>, tensor<1x1x576x160xf32>) outs(%209 : tensor<1x68x68x160xf32>) -> tensor<1x68x68x160xf32>
flow.return %296 : tensor<1x68x68x160xf32>
}
%extracted_slice_214 = tensor.extract_slice %210[0, 0, 0, 0] [1, 65, 65, 160] [1, 1, 1, 1] : tensor<1x68x68x160xf32> to tensor<65x65x160xf32>
%211 = tensor.empty() : tensor<65x65x160xi8>
%212 = flow.dispatch.region -> (tensor<65x65x160xi8>) {
%296 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_214, %cst_42 : tensor<65x65x160xf32>, tensor<160xf32>) outs(%211 : tensor<65x65x160xi8>) {
^bb0(%in: f32, %in_256: f32, %out: i8):
%297 = arith.addf %in, %in_256 : f32
%298 = arith.divf %297, %cst_9 : f32
%299 = math.round %298 : f32
%300 = arith.addf %299, %cst_15 : f32
%301 = arith.cmpf ult, %300, %cst_17 : f32
%302 = arith.cmpf ugt, %300, %cst_16 : f32
%303 = arith.select %301, %cst_17, %300 : f32
%304 = arith.select %302, %cst_16, %303 : f32
%305 = arith.fptosi %304 : f32 to i8
linalg.yield %305 : i8
} -> tensor<65x65x160xi8>
flow.return %296 : tensor<65x65x160xi8>
}
%213 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%212 : tensor<65x65x160xi8>) outs(%207 : tensor<65x65x160xf32>) {
^bb0(%in: i8, %out: f32):
%296 = arith.extsi %in : i8 to i32
%297 = arith.sitofp %296 : i32 to f32
%298 = arith.mulf %297, %cst_9 : f32
linalg.yield %298 : f32
} -> tensor<65x65x160xf32>
%214 = tensor.empty() : tensor<65x65x960xf32>
%215 = tensor.empty() : tensor<68x68x160xf32>
%216 = linalg.fill ins(%cst_15 : f32) outs(%215 : tensor<68x68x160xf32>) -> tensor<68x68x160xf32>
%inserted_slice_215 = tensor.insert_slice %213 into %216[0, 0, 0] [65, 65, 160] [1, 1, 1] : tensor<65x65x160xf32> into tensor<68x68x160xf32>
%expanded_216 = tensor.expand_shape %inserted_slice_215 [[0, 1], [2], [3]] output_shape [1, 68, 68, 160] : tensor<68x68x160xf32> into tensor<1x68x68x160xf32>
%217 = tensor.empty() : tensor<1x68x68x960xf32>
%218 = linalg.fill ins(%cst_15 : f32) outs(%217 : tensor<1x68x68x960xf32>) -> tensor<1x68x68x960xf32>
%219 = flow.dispatch.region -> (tensor<1x68x68x960xf32>) {
%296 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%expanded_216, %cst_100 : tensor<1x68x68x160xf32>, tensor<1x1x160x960xf32>) outs(%218 : tensor<1x68x68x960xf32>) -> tensor<1x68x68x960xf32>
flow.return %296 : tensor<1x68x68x960xf32>
}
%extracted_slice_217 = tensor.extract_slice %219[0, 0, 0, 0] [1, 65, 65, 960] [1, 1, 1, 1] : tensor<1x68x68x960xf32> to tensor<65x65x960xf32>
%220 = tensor.empty() : tensor<73x73x960xf32>
%221 = linalg.fill ins(%cst_15 : f32) outs(%220 : tensor<73x73x960xf32>) -> tensor<73x73x960xf32>
%222 = flow.dispatch.region -> (tensor<73x73x960xf32>) {
%296 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_217, %cst_21 : tensor<65x65x960xf32>, tensor<960xf32>) outs(%214 : tensor<65x65x960xf32>) {
^bb0(%in: f32, %in_257: f32, %out: f32):
%297 = arith.addf %in, %in_257 : f32
%298 = arith.cmpf ult, %297, %cst_15 : f32
%299 = arith.select %298, %cst_15, %297 : f32
%300 = arith.cmpf ugt, %299, %cst_2 : f32
%301 = arith.select %300, %cst_2, %299 : f32
%302 = arith.divf %301, %cst_11 : f32
%303 = math.round %302 : f32
%304 = arith.addf %303, %cst_15 : f32
%305 = arith.cmpf ult, %304, %cst_17 : f32
%306 = arith.cmpf ugt, %304, %cst_16 : f32
%307 = arith.select %305, %cst_17, %304 : f32
%308 = arith.select %306, %cst_16, %307 : f32
%309 = arith.fptosi %308 : f32 to i8
%310 = arith.extsi %309 : i8 to i32
%311 = arith.sitofp %310 : i32 to f32
%312 = arith.mulf %311, %cst_11 : f32
linalg.yield %312 : f32
} -> tensor<65x65x960xf32>
%inserted_slice_256 = tensor.insert_slice %296 into %221[4, 4, 0] [65, 65, 960] [1, 1, 1] : tensor<65x65x960xf32> into tensor<73x73x960xf32>
flow.return %inserted_slice_256 : tensor<73x73x960xf32>
}
%expanded_218 = tensor.expand_shape %222 [[0], [1], [2, 3]] output_shape [73, 73, 1, 960] : tensor<73x73x960xf32> into tensor<73x73x1x960xf32>
%223 = tensor.empty() : tensor<1x960x73x73xf32>
%224 = flow.dispatch.region -> (tensor<1x960x73x73xf32>) {
%296 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_218 : tensor<73x73x1x960xf32>) outs(%223 : tensor<1x960x73x73xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<1x960x73x73xf32>
flow.return %296 : tensor<1x960x73x73xf32>
}
%225 = tensor.empty() : tensor<1x960x65x65xf32>
%226 = linalg.fill ins(%cst_15 : f32) outs(%225 : tensor<1x960x65x65xf32>) -> tensor<1x960x65x65xf32>
%227 = flow.dispatch.region -> (tensor<1x960x65x65xf32>) {
%296 = linalg.depthwise_conv_2d_nchw_chw {dilations = dense<4> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%224, %cst_43 : tensor<1x960x73x73xf32>, tensor<960x3x3xf32>) outs(%226 : tensor<1x960x65x65xf32>) -> tensor<1x960x65x65xf32>
flow.return %296 : tensor<1x960x65x65xf32>
}
%228 = tensor.empty() : tensor<65x65x1x960xf32>
%229 = flow.dispatch.region -> (tensor<65x65x1x960xf32>) {
%296 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%227 : tensor<1x960x65x65xf32>) outs(%228 : tensor<65x65x1x960xf32>) {
^bb0(%in: f32, %out: f32):
%297 = arith.cmpf ult, %in, %cst_15 : f32
%298 = arith.select %297, %cst_15, %in : f32
%299 = arith.cmpf ugt, %298, %cst_2 : f32
%300 = arith.select %299, %cst_2, %298 : f32
%301 = arith.divf %300, %cst_11 : f32
%302 = math.round %301 : f32
%303 = arith.addf %302, %cst_15 : f32
%304 = arith.cmpf ult, %303, %cst_17 : f32
%305 = arith.cmpf ugt, %303, %cst_16 : f32
%306 = arith.select %304, %cst_17, %303 : f32
%307 = arith.select %305, %cst_16, %306 : f32
%308 = arith.fptosi %307 : f32 to i8
%309 = arith.extsi %308 : i8 to i32
%310 = arith.sitofp %309 : i32 to f32
%311 = arith.mulf %310, %cst_11 : f32
linalg.yield %311 : f32
} -> tensor<65x65x1x960xf32>
flow.return %296 : tensor<65x65x1x960xf32>
}
%collapsed_219 = tensor.collapse_shape %229 [[0], [1], [2, 3]] : tensor<65x65x1x960xf32> into tensor<65x65x960xf32>
%230 = tensor.empty() : tensor<68x68x960xf32>
%231 = linalg.fill ins(%cst_15 : f32) outs(%230 : tensor<68x68x960xf32>) -> tensor<68x68x960xf32>
%inserted_slice_220 = tensor.insert_slice %collapsed_219 into %231[0, 0, 0] [65, 65, 960] [1, 1, 1] : tensor<65x65x960xf32> into tensor<68x68x960xf32>
%expanded_221 = tensor.expand_shape %inserted_slice_220 [[0, 1], [2], [3]] output_shape [1, 68, 68, 960] : tensor<68x68x960xf32> into tensor<1x68x68x960xf32>
%232 = flow.dispatch.region -> (tensor<1x68x68x160xf32>) {
%296 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%expanded_221, %cst_101 : tensor<1x68x68x960xf32>, tensor<1x1x960x160xf32>) outs(%209 : tensor<1x68x68x160xf32>) -> tensor<1x68x68x160xf32>
flow.return %296 : tensor<1x68x68x160xf32>
}
%extracted_slice_222 = tensor.extract_slice %232[0, 0, 0, 0] [1, 65, 65, 160] [1, 1, 1, 1] : tensor<1x68x68x160xf32> to tensor<65x65x160xf32>
%233 = flow.dispatch.region -> (tensor<65x65x160xi8>) {
%296 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%213, %extracted_slice_222, %cst_44 : tensor<65x65x160xf32>, tensor<65x65x160xf32>, tensor<160xf32>) outs(%211 : tensor<65x65x160xi8>) {
^bb0(%in: f32, %in_256: f32, %in_257: f32, %out: i8):
%297 = arith.addf %in_256, %in_257 : f32
%298 = arith.divf %297, %cst_11 : f32
%299 = math.round %298 : f32
%300 = arith.addf %299, %cst_15 : f32
%301 = arith.cmpf ult, %300, %cst_17 : f32
%302 = arith.cmpf ugt, %300, %cst_16 : f32
%303 = arith.select %301, %cst_17, %300 : f32
%304 = arith.select %302, %cst_16, %303 : f32
%305 = arith.fptosi %304 : f32 to i8
%306 = arith.extsi %305 : i8 to i32
%307 = arith.sitofp %306 : i32 to f32
%308 = arith.mulf %307, %cst_11 : f32
%309 = arith.addf %in, %308 : f32
%310 = arith.divf %309, %cst_9 : f32
%311 = math.round %310 : f32
%312 = arith.addf %311, %cst_15 : f32
%313 = arith.cmpf ult, %312, %cst_17 : f32
%314 = arith.cmpf ugt, %312, %cst_16 : f32
%315 = arith.select %313, %cst_17, %312 : f32
%316 = arith.select %314, %cst_16, %315 : f32
%317 = arith.fptosi %316 : f32 to i8
linalg.yield %317 : i8
} -> tensor<65x65x160xi8>
flow.return %296 : tensor<65x65x160xi8>
}
%234 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%233 : tensor<65x65x160xi8>) outs(%207 : tensor<65x65x160xf32>) {
^bb0(%in: i8, %out: f32):
%296 = arith.extsi %in : i8 to i32
%297 = arith.sitofp %296 : i32 to f32
%298 = arith.mulf %297, %cst_9 : f32
linalg.yield %298 : f32
} -> tensor<65x65x160xf32>
%inserted_slice_223 = tensor.insert_slice %234 into %216[0, 0, 0] [65, 65, 160] [1, 1, 1] : tensor<65x65x160xf32> into tensor<68x68x160xf32>
%expanded_224 = tensor.expand_shape %inserted_slice_223 [[0, 1], [2], [3]] output_shape [1, 68, 68, 160] : tensor<68x68x160xf32> into tensor<1x68x68x160xf32>
%235 = flow.dispatch.region -> (tensor<1x68x68x960xf32>) {
%296 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%expanded_224, %cst_102 : tensor<1x68x68x160xf32>, tensor<1x1x160x960xf32>) outs(%218 : tensor<1x68x68x960xf32>) -> tensor<1x68x68x960xf32>
flow.return %296 : tensor<1x68x68x960xf32>
}
%extracted_slice_225 = tensor.extract_slice %235[0, 0, 0, 0] [1, 65, 65, 960] [1, 1, 1, 1] : tensor<1x68x68x960xf32> to tensor<65x65x960xf32>
%236 = flow.dispatch.region -> (tensor<73x73x960xf32>) {
%296 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_225, %cst_46 : tensor<65x65x960xf32>, tensor<960xf32>) outs(%214 : tensor<65x65x960xf32>) {
^bb0(%in: f32, %in_257: f32, %out: f32):
%297 = arith.addf %in, %in_257 : f32
%298 = arith.cmpf ult, %297, %cst_15 : f32
%299 = arith.select %298, %cst_15, %297 : f32
%300 = arith.cmpf ugt, %299, %cst_2 : f32
%301 = arith.select %300, %cst_2, %299 : f32
%302 = arith.divf %301, %cst_11 : f32
%303 = math.round %302 : f32
%304 = arith.addf %303, %cst_15 : f32
%305 = arith.cmpf ult, %304, %cst_17 : f32
%306 = arith.cmpf ugt, %304, %cst_16 : f32
%307 = arith.select %305, %cst_17, %304 : f32
%308 = arith.select %306, %cst_16, %307 : f32
%309 = arith.fptosi %308 : f32 to i8
%310 = arith.extsi %309 : i8 to i32
%311 = arith.sitofp %310 : i32 to f32
%312 = arith.mulf %311, %cst_11 : f32
linalg.yield %312 : f32
} -> tensor<65x65x960xf32>
%inserted_slice_256 = tensor.insert_slice %296 into %221[4, 4, 0] [65, 65, 960] [1, 1, 1] : tensor<65x65x960xf32> into tensor<73x73x960xf32>
flow.return %inserted_slice_256 : tensor<73x73x960xf32>
}
%expanded_226 = tensor.expand_shape %236 [[0], [1], [2, 3]] output_shape [73, 73, 1, 960] : tensor<73x73x960xf32> into tensor<73x73x1x960xf32>
%237 = flow.dispatch.region -> (tensor<1x960x73x73xf32>) {
%296 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_226 : tensor<73x73x1x960xf32>) outs(%223 : tensor<1x960x73x73xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<1x960x73x73xf32>
flow.return %296 : tensor<1x960x73x73xf32>
}
%238 = flow.dispatch.region -> (tensor<1x960x65x65xf32>) {
%296 = linalg.depthwise_conv_2d_nchw_chw {dilations = dense<4> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%237, %cst_45 : tensor<1x960x73x73xf32>, tensor<960x3x3xf32>) outs(%226 : tensor<1x960x65x65xf32>) -> tensor<1x960x65x65xf32>
flow.return %296 : tensor<1x960x65x65xf32>
}
%239 = flow.dispatch.region -> (tensor<65x65x1x960xf32>) {
%296 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%238 : tensor<1x960x65x65xf32>) outs(%228 : tensor<65x65x1x960xf32>) {
^bb0(%in: f32, %out: f32):
%297 = arith.cmpf ult, %in, %cst_15 : f32
%298 = arith.select %297, %cst_15, %in : f32
%299 = arith.cmpf ugt, %298, %cst_2 : f32
%300 = arith.select %299, %cst_2, %298 : f32
%301 = arith.divf %300, %cst_11 : f32
%302 = math.round %301 : f32
%303 = arith.addf %302, %cst_15 : f32
%304 = arith.cmpf ult, %303, %cst_17 : f32
%305 = arith.cmpf ugt, %303, %cst_16 : f32
%306 = arith.select %304, %cst_17, %303 : f32
%307 = arith.select %305, %cst_16, %306 : f32
%308 = arith.fptosi %307 : f32 to i8
%309 = arith.extsi %308 : i8 to i32
%310 = arith.sitofp %309 : i32 to f32
%311 = arith.mulf %310, %cst_11 : f32
linalg.yield %311 : f32
} -> tensor<65x65x1x960xf32>
flow.return %296 : tensor<65x65x1x960xf32>
}
%collapsed_227 = tensor.collapse_shape %239 [[0], [1], [2, 3]] : tensor<65x65x1x960xf32> into tensor<65x65x960xf32>
%inserted_slice_228 = tensor.insert_slice %collapsed_227 into %231[0, 0, 0] [65, 65, 960] [1, 1, 1] : tensor<65x65x960xf32> into tensor<68x68x960xf32>
%expanded_229 = tensor.expand_shape %inserted_slice_228 [[0, 1], [2], [3]] output_shape [1, 68, 68, 960] : tensor<68x68x960xf32> into tensor<1x68x68x960xf32>
%240 = flow.dispatch.region -> (tensor<1x68x68x160xf32>) {
%296 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%expanded_229, %cst_103 : tensor<1x68x68x960xf32>, tensor<1x1x960x160xf32>) outs(%209 : tensor<1x68x68x160xf32>) -> tensor<1x68x68x160xf32>
flow.return %296 : tensor<1x68x68x160xf32>
}
%extracted_slice_230 = tensor.extract_slice %240[0, 0, 0, 0] [1, 65, 65, 160] [1, 1, 1, 1] : tensor<1x68x68x160xf32> to tensor<65x65x160xf32>
%241 = flow.dispatch.region -> (tensor<68x68x160xf32>) {
%296 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%234, %extracted_slice_230, %cst_47 : tensor<65x65x160xf32>, tensor<65x65x160xf32>, tensor<160xf32>) outs(%207 : tensor<65x65x160xf32>) {
^bb0(%in: f32, %in_257: f32, %in_258: f32, %out: f32):
%297 = arith.addf %in_257, %in_258 : f32
%298 = arith.divf %297, %cst_9 : f32
%299 = math.round %298 : f32
%300 = arith.addf %299, %cst_15 : f32
%301 = arith.cmpf ult, %300, %cst_17 : f32
%302 = arith.cmpf ugt, %300, %cst_16 : f32
%303 = arith.select %301, %cst_17, %300 : f32
%304 = arith.select %302, %cst_16, %303 : f32
%305 = arith.fptosi %304 : f32 to i8
%306 = arith.extsi %305 : i8 to i32
%307 = arith.sitofp %306 : i32 to f32
%308 = arith.mulf %307, %cst_9 : f32
%309 = arith.addf %in, %308 : f32
%310 = arith.divf %309, %cst_9 : f32
%311 = math.round %310 : f32
%312 = arith.addf %311, %cst_15 : f32
%313 = arith.cmpf ult, %312, %cst_17 : f32
%314 = arith.cmpf ugt, %312, %cst_16 : f32
%315 = arith.select %313, %cst_17, %312 : f32
%316 = arith.select %314, %cst_16, %315 : f32
%317 = arith.fptosi %316 : f32 to i8
%318 = arith.extsi %317 : i8 to i32
%319 = arith.sitofp %318 : i32 to f32
%320 = arith.mulf %319, %cst_9 : f32
linalg.yield %320 : f32
} -> tensor<65x65x160xf32>
%inserted_slice_256 = tensor.insert_slice %296 into %216[0, 0, 0] [65, 65, 160] [1, 1, 1] : tensor<65x65x160xf32> into tensor<68x68x160xf32>
flow.return %inserted_slice_256 : tensor<68x68x160xf32>
}
%expanded_231 = tensor.expand_shape %241 [[0, 1], [2], [3]] output_shape [1, 68, 68, 160] : tensor<68x68x160xf32> into tensor<1x68x68x160xf32>
%242 = flow.dispatch.region -> (tensor<1x68x68x960xf32>) {
%296 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%expanded_231, %cst_104 : tensor<1x68x68x160xf32>, tensor<1x1x160x960xf32>) outs(%218 : tensor<1x68x68x960xf32>) -> tensor<1x68x68x960xf32>
flow.return %296 : tensor<1x68x68x960xf32>
}
%extracted_slice_232 = tensor.extract_slice %242[0, 0, 0, 0] [1, 65, 65, 960] [1, 1, 1, 1] : tensor<1x68x68x960xf32> to tensor<65x65x960xf32>
%243 = flow.dispatch.region -> (tensor<73x73x960xf32>) {
%296 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_232, %cst_49 : tensor<65x65x960xf32>, tensor<960xf32>) outs(%214 : tensor<65x65x960xf32>) {
^bb0(%in: f32, %in_257: f32, %out: f32):
%297 = arith.addf %in, %in_257 : f32
%298 = arith.cmpf ult, %297, %cst_15 : f32
%299 = arith.select %298, %cst_15, %297 : f32
%300 = arith.cmpf ugt, %299, %cst_2 : f32
%301 = arith.select %300, %cst_2, %299 : f32
%302 = arith.divf %301, %cst_11 : f32
%303 = math.round %302 : f32
%304 = arith.addf %303, %cst_15 : f32
%305 = arith.cmpf ult, %304, %cst_17 : f32
%306 = arith.cmpf ugt, %304, %cst_16 : f32
%307 = arith.select %305, %cst_17, %304 : f32
%308 = arith.select %306, %cst_16, %307 : f32
%309 = arith.fptosi %308 : f32 to i8
%310 = arith.extsi %309 : i8 to i32
%311 = arith.sitofp %310 : i32 to f32
%312 = arith.mulf %311, %cst_11 : f32
linalg.yield %312 : f32
} -> tensor<65x65x960xf32>
%inserted_slice_256 = tensor.insert_slice %296 into %221[4, 4, 0] [65, 65, 960] [1, 1, 1] : tensor<65x65x960xf32> into tensor<73x73x960xf32>
flow.return %inserted_slice_256 : tensor<73x73x960xf32>
}
%expanded_233 = tensor.expand_shape %243 [[0], [1], [2, 3]] output_shape [73, 73, 1, 960] : tensor<73x73x960xf32> into tensor<73x73x1x960xf32>
%244 = flow.dispatch.region -> (tensor<1x960x73x73xf32>) {
%296 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_233 : tensor<73x73x1x960xf32>) outs(%223 : tensor<1x960x73x73xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<1x960x73x73xf32>
flow.return %296 : tensor<1x960x73x73xf32>
}
%245 = flow.dispatch.region -> (tensor<1x960x65x65xf32>) {
%296 = linalg.depthwise_conv_2d_nchw_chw {dilations = dense<4> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%244, %cst_48 : tensor<1x960x73x73xf32>, tensor<960x3x3xf32>) outs(%226 : tensor<1x960x65x65xf32>) -> tensor<1x960x65x65xf32>
flow.return %296 : tensor<1x960x65x65xf32>
}
%246 = flow.dispatch.region -> (tensor<65x65x1x960xf32>) {
%296 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%245 : tensor<1x960x65x65xf32>) outs(%228 : tensor<65x65x1x960xf32>) {
^bb0(%in: f32, %out: f32):
%297 = arith.cmpf ult, %in, %cst_15 : f32
%298 = arith.select %297, %cst_15, %in : f32
%299 = arith.cmpf ugt, %298, %cst_2 : f32
%300 = arith.select %299, %cst_2, %298 : f32
%301 = arith.divf %300, %cst_11 : f32
%302 = math.round %301 : f32
%303 = arith.addf %302, %cst_15 : f32
%304 = arith.cmpf ult, %303, %cst_17 : f32
%305 = arith.cmpf ugt, %303, %cst_16 : f32
%306 = arith.select %304, %cst_17, %303 : f32
%307 = arith.select %305, %cst_16, %306 : f32
%308 = arith.fptosi %307 : f32 to i8
%309 = arith.extsi %308 : i8 to i32
%310 = arith.sitofp %309 : i32 to f32
%311 = arith.mulf %310, %cst_11 : f32
linalg.yield %311 : f32
} -> tensor<65x65x1x960xf32>
flow.return %296 : tensor<65x65x1x960xf32>
}
%collapsed_234 = tensor.collapse_shape %246 [[0], [1], [2, 3]] : tensor<65x65x1x960xf32> into tensor<65x65x960xf32>
%247 = tensor.empty() : tensor<65x65x320xf32>
%inserted_slice_235 = tensor.insert_slice %collapsed_234 into %231[0, 0, 0] [65, 65, 960] [1, 1, 1] : tensor<65x65x960xf32> into tensor<68x68x960xf32>
%expanded_236 = tensor.expand_shape %inserted_slice_235 [[0, 1], [2], [3]] output_shape [1, 68, 68, 960] : tensor<68x68x960xf32> into tensor<1x68x68x960xf32>
%248 = tensor.empty() : tensor<1x68x68x320xf32>
%249 = linalg.fill ins(%cst_15 : f32) outs(%248 : tensor<1x68x68x320xf32>) -> tensor<1x68x68x320xf32>
%250 = flow.dispatch.region -> (tensor<1x68x68x320xf32>) {
%296 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%expanded_236, %cst_105 : tensor<1x68x68x960xf32>, tensor<1x1x960x320xf32>) outs(%249 : tensor<1x68x68x320xf32>) -> tensor<1x68x68x320xf32>
flow.return %296 : tensor<1x68x68x320xf32>
}
%extracted_slice_237 = tensor.extract_slice %250[0, 0, 0, 0] [1, 65, 65, 320] [1, 1, 1, 1] : tensor<1x68x68x320xf32> to tensor<65x65x320xf32>
%251 = tensor.empty() : tensor<65x65x320xi8>
%252 = flow.dispatch.region -> (tensor<65x65x320xi8>) {
%296 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_237, %cst_50 : tensor<65x65x320xf32>, tensor<320xf32>) outs(%251 : tensor<65x65x320xi8>) {
^bb0(%in: f32, %in_256: f32, %out: i8):
%297 = arith.addf %in, %in_256 : f32
%298 = arith.divf %297, %cst_9 : f32
%299 = math.round %298 : f32
%300 = arith.addf %299, %cst_15 : f32
%301 = arith.cmpf ult, %300, %cst_17 : f32
%302 = arith.cmpf ugt, %300, %cst_16 : f32
%303 = arith.select %301, %cst_17, %300 : f32
%304 = arith.select %302, %cst_16, %303 : f32
%305 = arith.fptosi %304 : f32 to i8
linalg.yield %305 : i8
} -> tensor<65x65x320xi8>
flow.return %296 : tensor<65x65x320xi8>
}
%253 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%252 : tensor<65x65x320xi8>) outs(%247 : tensor<65x65x320xf32>) {
^bb0(%in: i8, %out: f32):
%296 = arith.extsi %in : i8 to i32
%297 = arith.sitofp %296 : i32 to f32
%298 = arith.mulf %297, %cst_9 : f32
linalg.yield %298 : f32
} -> tensor<65x65x320xf32>
%254 = tensor.empty() : tensor<320x65x65xf32>
%255 = flow.dispatch.region -> (tensor<320x65x65xf32>) {
%296 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d1, d2, d0)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%253 : tensor<65x65x320xf32>) outs(%254 : tensor<320x65x65xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<320x65x65xf32>
flow.return %296 : tensor<320x65x65xf32>
}
%expanded_238 = tensor.expand_shape %255 [[0, 1], [2], [3]] output_shape [1, 320, 65, 65] : tensor<320x65x65xf32> into tensor<1x320x65x65xf32>
%256 = tensor.empty() : tensor<1x320x1x1xf32>
%257 = linalg.fill ins(%cst_15 : f32) outs(%256 : tensor<1x320x1x1xf32>) -> tensor<1x320x1x1xf32>
%258 = tensor.empty() : tensor<65x65xf32>
%259 = flow.dispatch.region -> (tensor<1x320x1x1xf32>) {
%296 = linalg.pooling_nchw_sum {dilations = dense<1> : vector<2xi64>, strides = dense<65> : vector<2xi64>} ins(%expanded_238, %258 : tensor<1x320x65x65xf32>, tensor<65x65xf32>) outs(%257 : tensor<1x320x1x1xf32>) -> tensor<1x320x1x1xf32>
flow.return %296 : tensor<1x320x1x1xf32>
}
%collapsed_239 = tensor.collapse_shape %259 [[0, 1, 2, 3]] : tensor<1x320x1x1xf32> into tensor<320xf32>
%260 = tensor.empty() : tensor<65x65x256xf32>
%261 = tensor.empty() : tensor<68x68x320xf32>
%262 = linalg.fill ins(%cst_15 : f32) outs(%261 : tensor<68x68x320xf32>) -> tensor<68x68x320xf32>
%inserted_slice_240 = tensor.insert_slice %253 into %262[0, 0, 0] [65, 65, 320] [1, 1, 1] : tensor<65x65x320xf32> into tensor<68x68x320xf32>
%expanded_241 = tensor.expand_shape %inserted_slice_240 [[0, 1], [2], [3]] output_shape [1, 68, 68, 320] : tensor<68x68x320xf32> into tensor<1x68x68x320xf32>
%263 = tensor.empty() : tensor<1x68x68x256xf32>
%264 = linalg.fill ins(%cst_15 : f32) outs(%263 : tensor<1x68x68x256xf32>) -> tensor<1x68x68x256xf32>
%265 = flow.dispatch.region -> (tensor<1x68x68x256xf32>) {
%296 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%expanded_241, %cst_106 : tensor<1x68x68x320xf32>, tensor<1x1x320x256xf32>) outs(%264 : tensor<1x68x68x256xf32>) -> tensor<1x68x68x256xf32>
flow.return %296 : tensor<1x68x68x256xf32>
}
%extracted_slice_242 = tensor.extract_slice %265[0, 0, 0, 0] [1, 65, 65, 256] [1, 1, 1, 1] : tensor<1x68x68x256xf32> to tensor<65x65x256xf32>
%expanded_243 = tensor.expand_shape %collapsed_239 [[0, 1, 2]] output_shape [1, 1, 320] : tensor<320xf32> into tensor<1x1x320xf32>
%266 = tensor.empty() : tensor<1x1x320xf32>
%267 = tensor.empty() : tensor<256x65x65xf32>
%268 = flow.dispatch.region -> (tensor<256x65x65xf32>) {
%296 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d1, d2, d0)>, affine_map<(d0, d1, d2) -> (d0)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_242, %cst_25 : tensor<65x65x256xf32>, tensor<256xf32>) outs(%267 : tensor<256x65x65xf32>) {
^bb0(%in: f32, %in_256: f32, %out: f32):
%297 = arith.addf %in, %in_256 : f32
%298 = arith.cmpf ugt, %297, %cst_15 : f32
%299 = arith.select %298, %297, %cst_15 : f32
%300 = arith.divf %299, %cst_12 : f32
%301 = math.round %300 : f32
%302 = arith.addf %301, %cst_15 : f32
%303 = arith.cmpf ult, %302, %cst_17 : f32
%304 = arith.cmpf ugt, %302, %cst_16 : f32
%305 = arith.select %303, %cst_17, %302 : f32
%306 = arith.select %304, %cst_16, %305 : f32
%307 = arith.fptosi %306 : f32 to i8
%308 = arith.extsi %307 : i8 to i32
%309 = arith.sitofp %308 : i32 to f32
%310 = arith.mulf %309, %cst_12 : f32
linalg.yield %310 : f32
} -> tensor<256x65x65xf32>
flow.return %296 : tensor<256x65x65xf32>
}
%269 = tensor.empty() : tensor<4x4x320xf32>
%270 = linalg.fill ins(%cst_15 : f32) outs(%269 : tensor<4x4x320xf32>) -> tensor<4x4x320xf32>
%271 = flow.dispatch.region -> (tensor<4x4x320xf32>) {
%296 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%expanded_243 : tensor<1x1x320xf32>) outs(%266 : tensor<1x1x320xf32>) {
^bb0(%in: f32, %out: f32):
%297 = arith.divf %in, %cst_18 : f32
%298 = arith.mulf %297, %cst_1 : f32
%299 = arith.divf %298, %cst_9 : f32
%300 = math.round %299 : f32
%301 = arith.addf %300, %cst_15 : f32
%302 = arith.cmpf ult, %301, %cst_17 : f32
%303 = arith.cmpf ugt, %301, %cst_16 : f32
%304 = arith.select %302, %cst_17, %301 : f32
%305 = arith.select %303, %cst_16, %304 : f32
%306 = arith.fptosi %305 : f32 to i8
%307 = arith.extsi %306 : i8 to i32
%308 = arith.sitofp %307 : i32 to f32
%309 = arith.mulf %308, %cst_9 : f32
linalg.yield %309 : f32
} -> tensor<1x1x320xf32>
%inserted_slice_256 = tensor.insert_slice %296 into %270[0, 0, 0] [1, 1, 320] [1, 1, 1] : tensor<1x1x320xf32> into tensor<4x4x320xf32>
flow.return %inserted_slice_256 : tensor<4x4x320xf32>
}
%expanded_244 = tensor.expand_shape %271 [[0, 1], [2], [3]] output_shape [1, 4, 4, 320] : tensor<4x4x320xf32> into tensor<1x4x4x320xf32>
%272 = tensor.empty() : tensor<1x4x4x256xf32>
%273 = linalg.fill ins(%cst_15 : f32) outs(%272 : tensor<1x4x4x256xf32>) -> tensor<1x4x4x256xf32>
%274 = flow.dispatch.region -> (tensor<1x4x4x256xf32>) {
%296 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%expanded_244, %cst_107 : tensor<1x4x4x320xf32>, tensor<1x1x320x256xf32>) outs(%273 : tensor<1x4x4x256xf32>) -> tensor<1x4x4x256xf32>
flow.return %296 : tensor<1x4x4x256xf32>
}
%extracted_slice_245 = tensor.extract_slice %274[0, 0, 0, 0] [1, 1, 1, 256] [1, 1, 1, 1] : tensor<1x4x4x256xf32> to tensor<256xf32>
%expanded_246 = tensor.expand_shape %extracted_slice_245 [[0, 1, 2, 3]] output_shape [1, 256, 1, 1] : tensor<256xf32> into tensor<1x256x1x1xf32>
%275 = tensor.empty() : tensor<1x256x1x1xf32>
%276 = flow.dispatch.region -> (tensor<1x256x1x1xf32>) {
%296 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_246, %cst_0 : tensor<1x256x1x1xf32>, tensor<1x256x1x1xf32>) outs(%275 : tensor<1x256x1x1xf32>) {
^bb0(%in: f32, %in_256: f32, %out: f32):
%297 = arith.addf %in, %in_256 : f32
%298 = arith.cmpf ugt, %297, %cst_15 : f32
%299 = arith.select %298, %297, %cst_15 : f32
%300 = arith.divf %299, %cst_12 : f32
%301 = math.round %300 : f32
%302 = arith.addf %301, %cst_15 : f32
%303 = arith.cmpf ult, %302, %cst_17 : f32
%304 = arith.cmpf ugt, %302, %cst_16 : f32
%305 = arith.select %303, %cst_17, %302 : f32
%306 = arith.select %304, %cst_16, %305 : f32
%307 = arith.fptosi %306 : f32 to i8
%308 = arith.extsi %307 : i8 to i32
%309 = arith.sitofp %308 : i32 to f32
%310 = arith.mulf %309, %cst_12 : f32
linalg.yield %310 : f32
} -> tensor<1x256x1x1xf32>
flow.return %296 : tensor<1x256x1x1xf32>
}
%277 = flow.dispatch.region -> (tensor<256x65x65xf32>) {
%296 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} outs(%267 : tensor<256x65x65xf32>) {
^bb0(%out: f32):
%297 = linalg.index 0 : index
%298 = linalg.index 1 : index
%299 = linalg.index 2 : index
%300 = arith.index_cast %298 : index to i64
%301 = arith.sitofp %300 : i64 to f32
%302 = arith.addf %301, %cst_13 : f32
%303 = arith.divf %302, %cst_7 : f32
%304 = arith.subf %303, %cst_13 : f32
%305 = arith.maximumf %304, %cst_15 : f32
%306 = arith.minimumf %305, %cst_5 : f32
%307 = arith.minimumf %305, %cst_15 : f32
%308 = math.floor %306 : f32
%309 = arith.addf %306, %cst_14 : f32
%310 = math.floor %309 : f32
%311 = arith.fptosi %308 : f32 to i64
%312 = arith.index_cast %311 : i64 to index
%313 = arith.fptosi %310 : f32 to i64
%314 = arith.index_cast %313 : i64 to index
%315 = arith.index_cast %299 : index to i64
%316 = arith.sitofp %315 : i64 to f32
%317 = arith.addf %316, %cst_13 : f32
%318 = arith.divf %317, %cst_7 : f32
%319 = arith.subf %318, %cst_13 : f32
%320 = arith.maximumf %319, %cst_15 : f32
%321 = arith.minimumf %320, %cst_5 : f32
%322 = arith.minimumf %320, %cst_15 : f32
%323 = math.floor %321 : f32
%324 = arith.addf %321, %cst_14 : f32
%325 = math.floor %324 : f32
%326 = arith.fptosi %323 : f32 to i64
%327 = arith.index_cast %326 : i64 to index
%328 = arith.fptosi %325 : f32 to i64
%329 = arith.index_cast %328 : i64 to index
%extracted = tensor.extract %276[%c0, %297, %312, %327] : tensor<1x256x1x1xf32>
%extracted_256 = tensor.extract %276[%c0, %297, %312, %329] : tensor<1x256x1x1xf32>
%extracted_257 = tensor.extract %276[%c0, %297, %314, %327] : tensor<1x256x1x1xf32>
%extracted_258 = tensor.extract %276[%c0, %297, %314, %329] : tensor<1x256x1x1xf32>
%330 = arith.subf %310, %307 : f32
%331 = arith.subf %307, %308 : f32
%332 = arith.subf %325, %322 : f32
%333 = arith.subf %322, %323 : f32
%334 = arith.mulf %332, %extracted : f32
%335 = arith.mulf %333, %extracted_256 : f32
%336 = arith.addf %334, %335 : f32
%337 = arith.mulf %330, %336 : f32
%338 = arith.mulf %332, %extracted_257 : f32
%339 = arith.mulf %333, %extracted_258 : f32
%340 = arith.addf %338, %339 : f32
%341 = arith.mulf %331, %340 : f32
%342 = arith.addf %337, %341 : f32
%343 = arith.divf %342, %cst_12 : f32
%344 = math.round %343 : f32
%345 = arith.addf %344, %cst_15 : f32
%346 = arith.cmpf ult, %345, %cst_17 : f32
%347 = arith.cmpf ugt, %345, %cst_16 : f32
%348 = arith.select %346, %cst_17, %345 : f32
%349 = arith.select %347, %cst_16, %348 : f32
%350 = arith.fptosi %349 : f32 to i8
%351 = arith.extsi %350 : i8 to i32
%352 = arith.sitofp %351 : i32 to f32
%353 = arith.mulf %352, %cst_12 : f32
linalg.yield %353 : f32
} -> tensor<256x65x65xf32>
flow.return %296 : tensor<256x65x65xf32>
}
%278 = tensor.empty() : tensor<1x512x65x65xf32>
%inserted_slice_247 = tensor.insert_slice %277 into %278[0, 0, 0, 0] [1, 256, 65, 65] [1, 1, 1, 1] : tensor<256x65x65xf32> into tensor<1x512x65x65xf32>
%inserted_slice_248 = tensor.insert_slice %268 into %inserted_slice_247[0, 256, 0, 0] [1, 256, 65, 65] [1, 1, 1, 1] : tensor<256x65x65xf32> into tensor<1x512x65x65xf32>
%279 = tensor.empty() : tensor<65x65x1x512xf32>
%280 = flow.dispatch.region -> (tensor<65x65x1x512xf32>) {
%296 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%inserted_slice_248 : tensor<1x512x65x65xf32>) outs(%279 : tensor<65x65x1x512xf32>) {
^bb0(%in: f32, %out: f32):
%297 = arith.divf %in, %cst_12 : f32
%298 = math.round %297 : f32
%299 = arith.addf %298, %cst_15 : f32
%300 = arith.cmpf ult, %299, %cst_17 : f32
%301 = arith.cmpf ugt, %299, %cst_16 : f32
%302 = arith.select %300, %cst_17, %299 : f32
%303 = arith.select %301, %cst_16, %302 : f32
%304 = arith.fptosi %303 : f32 to i8
%305 = arith.extsi %304 : i8 to i32
%306 = arith.sitofp %305 : i32 to f32
%307 = arith.mulf %306, %cst_12 : f32
linalg.yield %307 : f32
} -> tensor<65x65x1x512xf32>
flow.return %296 : tensor<65x65x1x512xf32>
}
%collapsed_249 = tensor.collapse_shape %280 [[0], [1], [2, 3]] : tensor<65x65x1x512xf32> into tensor<65x65x512xf32>
%281 = tensor.empty() : tensor<68x68x512xf32>
%282 = linalg.fill ins(%cst_15 : f32) outs(%281 : tensor<68x68x512xf32>) -> tensor<68x68x512xf32>
%inserted_slice_250 = tensor.insert_slice %collapsed_249 into %282[0, 0, 0] [65, 65, 512] [1, 1, 1] : tensor<65x65x512xf32> into tensor<68x68x512xf32>
%expanded_251 = tensor.expand_shape %inserted_slice_250 [[0, 1], [2], [3]] output_shape [1, 68, 68, 512] : tensor<68x68x512xf32> into tensor<1x68x68x512xf32>
%283 = flow.dispatch.region -> (tensor<1x68x68x256xf32>) {
%296 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%expanded_251, %cst_108 : tensor<1x68x68x512xf32>, tensor<1x1x512x256xf32>) outs(%264 : tensor<1x68x68x256xf32>) -> tensor<1x68x68x256xf32>
flow.return %296 : tensor<1x68x68x256xf32>
}
%extracted_slice_252 = tensor.extract_slice %283[0, 0, 0, 0] [1, 65, 65, 256] [1, 1, 1, 1] : tensor<1x68x68x256xf32> to tensor<65x65x256xf32>
%284 = tensor.empty() : tensor<68x68x256xf32>
%285 = linalg.fill ins(%cst_15 : f32) outs(%284 : tensor<68x68x256xf32>) -> tensor<68x68x256xf32>
%286 = flow.dispatch.region -> (tensor<68x68x256xf32>) {
%296 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_252, %cst_72 : tensor<65x65x256xf32>, tensor<256xf32>) outs(%260 : tensor<65x65x256xf32>) {
^bb0(%in: f32, %in_257: f32, %out: f32):
%297 = arith.addf %in, %in_257 : f32
%298 = arith.cmpf ugt, %297, %cst_15 : f32
%299 = arith.select %298, %297, %cst_15 : f32
%300 = arith.divf %299, %cst_11 : f32
%301 = math.round %300 : f32
%302 = arith.addf %301, %cst_15 : f32
%303 = arith.cmpf ult, %302, %cst_17 : f32
%304 = arith.cmpf ugt, %302, %cst_16 : f32
%305 = arith.select %303, %cst_17, %302 : f32
%306 = arith.select %304, %cst_16, %305 : f32
%307 = arith.fptosi %306 : f32 to i8
%308 = arith.extsi %307 : i8 to i32
%309 = arith.sitofp %308 : i32 to f32
%310 = arith.mulf %309, %cst_11 : f32
linalg.yield %310 : f32
} -> tensor<65x65x256xf32>
%inserted_slice_256 = tensor.insert_slice %296 into %285[0, 0, 0] [65, 65, 256] [1, 1, 1] : tensor<65x65x256xf32> into tensor<68x68x256xf32>
flow.return %inserted_slice_256 : tensor<68x68x256xf32>
}
%expanded_253 = tensor.expand_shape %286 [[0, 1], [2], [3]] output_shape [1, 68, 68, 256] : tensor<68x68x256xf32> into tensor<1x68x68x256xf32>
%287 = tensor.empty() : tensor<1x68x68x24xf32>
%288 = linalg.fill ins(%cst_15 : f32) outs(%287 : tensor<1x68x68x24xf32>) -> tensor<1x68x68x24xf32>
%289 = flow.dispatch.region -> (tensor<1x68x68x24xf32>) {
%296 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%expanded_253, %cst_3 : tensor<1x68x68x256xf32>, tensor<1x1x256x24xf32>) outs(%288 : tensor<1x68x68x24xf32>) -> tensor<1x68x68x24xf32>
flow.return %296 : tensor<1x68x68x24xf32>
}
%extracted_slice_254 = tensor.extract_slice %289[0, 0, 0, 0] [1, 65, 65, 21] [1, 1, 1, 1] : tensor<1x68x68x24xf32> to tensor<65x65x21xf32>
%expanded_255 = tensor.expand_shape %extracted_slice_254 [[0], [1], [2, 3]] output_shape [65, 65, 1, 21] : tensor<65x65x21xf32> into tensor<65x65x1x21xf32>
%290 = tensor.empty() : tensor<1x21x65x65xi8>
%291 = flow.dispatch.region -> (tensor<1x21x65x65xi8>) {
%296 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>, affine_map<(d0, d1, d2, d3) -> (d0, d1)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_255, %cst : tensor<65x65x1x21xf32>, tensor<1x21xf32>) outs(%290 : tensor<1x21x65x65xi8>) {
^bb0(%in: f32, %in_256: f32, %out: i8):
%297 = arith.addf %in, %in_256 : f32
%298 = arith.divf %297, %cst_10 : f32
%299 = math.round %298 : f32
%300 = arith.addf %299, %cst_15 : f32
%301 = arith.cmpf ult, %300, %cst_17 : f32
%302 = arith.cmpf ugt, %300, %cst_16 : f32
%303 = arith.select %301, %cst_17, %300 : f32
%304 = arith.select %302, %cst_16, %303 : f32
%305 = arith.fptosi %304 : f32 to i8
linalg.yield %305 : i8
} -> tensor<1x21x65x65xi8>
flow.return %296 : tensor<1x21x65x65xi8>
}
%292 = tensor.empty() : tensor<1x513x513x21xf32>
%293 = flow.dispatch.region -> (tensor<1x513x513x21xf32>) {
%296 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} outs(%292 : tensor<1x513x513x21xf32>) {
^bb0(%out: f32):
%297 = linalg.index 0 : index
%298 = linalg.index 1 : index
%299 = linalg.index 2 : index
%300 = linalg.index 3 : index
%301 = affine.apply affine_map<(d0, d1) -> (d0 + d1 * 513)>(%298, %297)
%302 = arith.index_cast %301 : index to i64
%303 = arith.sitofp %302 : i64 to f32
%304 = arith.addf %303, %cst_13 : f32
%305 = arith.divf %304, %cst_6 : f32
%306 = arith.subf %305, %cst_13 : f32
%307 = arith.maximumf %306, %cst_15 : f32
%308 = arith.minimumf %307, %cst_4 : f32
%309 = math.floor %308 : f32
%310 = arith.addf %308, %cst_14 : f32
%311 = math.floor %310 : f32
%312 = arith.fptosi %309 : f32 to i64
%313 = arith.index_cast %312 : i64 to index
%314 = arith.fptosi %311 : f32 to i64
%315 = arith.index_cast %314 : i64 to index
%316 = arith.index_cast %299 : index to i64
%317 = arith.sitofp %316 : i64 to f32
%318 = arith.addf %317, %cst_13 : f32
%319 = arith.divf %318, %cst_6 : f32
%320 = arith.subf %319, %cst_13 : f32
%321 = arith.maximumf %320, %cst_15 : f32
%322 = arith.minimumf %321, %cst_4 : f32
%323 = math.floor %322 : f32
%324 = arith.addf %322, %cst_14 : f32
%325 = math.floor %324 : f32
%326 = arith.fptosi %323 : f32 to i64
%327 = arith.index_cast %326 : i64 to index
%328 = arith.fptosi %325 : f32 to i64
%329 = arith.index_cast %328 : i64 to index
%extracted = tensor.extract %291[%c0, %300, %313, %327] : tensor<1x21x65x65xi8>
%330 = arith.extsi %extracted : i8 to i32
%331 = arith.sitofp %330 : i32 to f32
%332 = arith.mulf %331, %cst_10 : f32
%extracted_256 = tensor.extract %291[%c0, %300, %313, %329] : tensor<1x21x65x65xi8>
%333 = arith.extsi %extracted_256 : i8 to i32
%334 = arith.sitofp %333 : i32 to f32
%335 = arith.mulf %334, %cst_10 : f32
%extracted_257 = tensor.extract %291[%c0, %300, %315, %327] : tensor<1x21x65x65xi8>
%336 = arith.extsi %extracted_257 : i8 to i32
%337 = arith.sitofp %336 : i32 to f32
%338 = arith.mulf %337, %cst_10 : f32
%extracted_258 = tensor.extract %291[%c0, %300, %315, %329] : tensor<1x21x65x65xi8>
%339 = arith.extsi %extracted_258 : i8 to i32
%340 = arith.sitofp %339 : i32 to f32
%341 = arith.mulf %340, %cst_10 : f32
%342 = arith.subf %311, %308 : f32
%343 = arith.subf %308, %309 : f32
%344 = arith.subf %325, %322 : f32
%345 = arith.subf %322, %323 : f32
%346 = arith.mulf %344, %332 : f32
%347 = arith.mulf %345, %335 : f32
%348 = arith.addf %346, %347 : f32
%349 = arith.mulf %342, %348 : f32
%350 = arith.mulf %344, %338 : f32
%351 = arith.mulf %345, %341 : f32
%352 = arith.addf %350, %351 : f32
%353 = arith.mulf %343, %352 : f32
%354 = arith.addf %349, %353 : f32
%355 = arith.divf %354, %cst_10 : f32
%356 = math.round %355 : f32
%357 = arith.addf %356, %cst_15 : f32
%358 = arith.cmpf ult, %357, %cst_17 : f32
%359 = arith.cmpf ugt, %357, %cst_16 : f32
%360 = arith.select %358, %cst_17, %357 : f32
%361 = arith.select %359, %cst_16, %360 : f32
%362 = arith.fptosi %361 : f32 to i8
%363 = arith.extsi %362 : i8 to i32
%364 = arith.sitofp %363 : i32 to f32
%365 = arith.mulf %364, %cst_10 : f32
%366 = arith.divf %365, %cst_10 : f32
%367 = math.round %366 : f32
%368 = arith.addf %367, %cst_15 : f32
%369 = arith.cmpf ult, %368, %cst_17 : f32
%370 = arith.cmpf ugt, %368, %cst_16 : f32
%371 = arith.select %369, %cst_17, %368 : f32
%372 = arith.select %370, %cst_16, %371 : f32
%373 = arith.fptosi %372 : f32 to i8
%374 = arith.extsi %373 : i8 to i32
%375 = arith.sitofp %374 : i32 to f32
%376 = arith.mulf %375, %cst_10 : f32
linalg.yield %376 : f32
} -> tensor<1x513x513x21xf32>
flow.return %296 : tensor<1x513x513x21xf32>
}
%294 = hal.tensor.barrier join(%293 : tensor<1x513x513x21xf32>) => %arg2 : !hal.fence
%295 = hal.tensor.export %294 : tensor<1x513x513x21xf32> -> !hal.buffer_view
util.return %295 : !hal.buffer_view
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment