Skip to content

Instantly share code, notes, and snippets.

@yzhang93
Created July 9, 2024 00:04
Show Gist options
  • Save yzhang93/456640440608e48550308bf87245523c to your computer and use it in GitHub Desktop.
Save yzhang93/456640440608e48550308bf87245523c to your computer and use it in GitHub Desktop.
deeplab_dispatches
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_0(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_0(%input0: tensor<576xi8>) -> (%output0: tensor<576xf32>)"}} {
%cst = arith.constant 1.562500e-02 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<576xi8>
%1 = tensor.empty() : tensor<576xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<576xi8>) outs(%1 : tensor<576xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<576xf32>
%3 = hal.tensor.export %2 "output0" : tensor<576xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_3(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_3(%input0: tensor<192xi8>) -> (%output0: tensor<192xf32>)"}} {
%cst = arith.constant 3.125000e-02 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<192xi8>
%1 = tensor.empty() : tensor<192xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<192xi8>) outs(%1 : tensor<192xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<192xf32>
%3 = hal.tensor.export %2 "output0" : tensor<192xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_10(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_10(%input0: tensor<96x3x3xi8>) -> (%output0: tensor<96x3x3xf32>)"}} {
%cst = arith.constant 3.125000e-02 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<96x3x3xi8>
%1 = tensor.empty() : tensor<96x3x3xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%0 : tensor<96x3x3xi8>) outs(%1 : tensor<96x3x3xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<96x3x3xf32>
%3 = hal.tensor.export %2 "output0" : tensor<96x3x3xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_2(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_2(%input0: tensor<144xi8>) -> (%output0: tensor<144xf32>)"}} {
%cst = arith.constant 3.125000e-02 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<144xi8>
%1 = tensor.empty() : tensor<144xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<144xi8>) outs(%1 : tensor<144xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<144xf32>
%3 = hal.tensor.export %2 "output0" : tensor<144xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_1(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_1(%input0: tensor<960xi8>) -> (%output0: tensor<960xf32>)"}} {
%cst = arith.constant 3.125000e-02 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<960xi8>
%1 = tensor.empty() : tensor<960xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<960xi8>) outs(%1 : tensor<960xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<960xf32>
%3 = hal.tensor.export %2 "output0" : tensor<960xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_4(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_4(%input0: tensor<384xi8>) -> (%output0: tensor<384xf32>)"}} {
%cst = arith.constant 3.125000e-02 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<384xi8>
%1 = tensor.empty() : tensor<384xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<384xi8>) outs(%1 : tensor<384xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<384xf32>
%3 = hal.tensor.export %2 "output0" : tensor<384xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_6(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_6(%input0: tensor<32xi8>) -> (%output0: tensor<32xf32>)"}} {
%cst = arith.constant 3.125000e-02 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<32xi8>
%1 = tensor.empty() : tensor<32xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<32xi8>) outs(%1 : tensor<32xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<32xf32>
%3 = hal.tensor.export %2 "output0" : tensor<32xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_7(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_7(%input0: tensor<32x3x3x3xi8>) -> (%output0: tensor<3x3x3x32xf32>)"}} {
%cst = arith.constant 3.125000e-02 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<32x3x3x3xi8>
%1 = tensor.empty() : tensor<3x3x3x32xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d2, d0, d1)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<32x3x3x3xi8>) outs(%1 : tensor<3x3x3x32xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<3x3x3x32xf32>
%3 = hal.tensor.export %2 "output0" : tensor<3x3x3x32xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval(%input0: tensor<96xi8>) -> (%output0: tensor<96xf32>)"}} {
%cst = arith.constant 3.125000e-02 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<96xi8>
%1 = tensor.empty() : tensor<96xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<96xi8>) outs(%1 : tensor<96xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<96xf32>
%3 = hal.tensor.export %2 "output0" : tensor<96xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_5(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_5(%input0: tensor<256xi8>) -> (%output0: tensor<256xf32>)"}} {
%cst = arith.constant 9.765625E-4 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<256xi8>
%1 = tensor.empty() : tensor<256xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<256xi8>) outs(%1 : tensor<256xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<256xf32>
%3 = hal.tensor.export %2 "output0" : tensor<256xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_9(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_9(%input0: tensor<16xi8>) -> (%output0: tensor<16xf32>)"}} {
%cst = arith.constant 2.500000e-01 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<16xi8>
%1 = tensor.empty() : tensor<16xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<16xi8>) outs(%1 : tensor<16xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<16xf32>
%3 = hal.tensor.export %2 "output0" : tensor<16xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_11(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_11(%input0: tensor<24xi8>) -> (%output0: tensor<24xf32>)"}} {
%cst = arith.constant 5.000000e-01 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<24xi8>
%1 = tensor.empty() : tensor<24xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<24xi8>) outs(%1 : tensor<24xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<24xf32>
%3 = hal.tensor.export %2 "output0" : tensor<24xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_8(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_8(%input0: tensor<32x3x3xi8>) -> (%output0: tensor<32x3x3xf32>)"}} {
%cst = arith.constant 5.000000e-01 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<32x3x3xi8>
%1 = tensor.empty() : tensor<32x3x3xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%0 : tensor<32x3x3xi8>) outs(%1 : tensor<32x3x3xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<32x3x3xf32>
%3 = hal.tensor.export %2 "output0" : tensor<32x3x3xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_12(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_12(%input0: tensor<384x3x3xi8>) -> (%output0: tensor<384x3x3xf32>)"}} {
%cst = arith.constant 6.250000e-02 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<384x3x3xi8>
%1 = tensor.empty() : tensor<384x3x3xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%0 : tensor<384x3x3xi8>) outs(%1 : tensor<384x3x3xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<384x3x3xf32>
%3 = hal.tensor.export %2 "output0" : tensor<384x3x3xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_13(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_13(%input0: tensor<384xi8>) -> (%output0: tensor<384xf32>)"}} {
%cst = arith.constant 3.125000e-02 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<384xi8>
%1 = tensor.empty() : tensor<384xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<384xi8>) outs(%1 : tensor<384xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<384xf32>
%3 = hal.tensor.export %2 "output0" : tensor<384xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_18(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_18(%input0: tensor<576xi8>) -> (%output0: tensor<576xf32>)"}} {
%cst = arith.constant 3.125000e-02 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<576xi8>
%1 = tensor.empty() : tensor<576xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<576xi8>) outs(%1 : tensor<576xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<576xf32>
%3 = hal.tensor.export %2 "output0" : tensor<576xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_21(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_21(%input0: tensor<576xi8>) -> (%output0: tensor<576xf32>)"}} {
%cst = arith.constant 3.125000e-02 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<576xi8>
%1 = tensor.empty() : tensor<576xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<576xi8>) outs(%1 : tensor<576xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<576xf32>
%3 = hal.tensor.export %2 "output0" : tensor<576xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_14(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_14(%input0: tensor<96xi8>) -> (%output0: tensor<96xf32>)"}} {
%cst = arith.constant 1.250000e-01 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<96xi8>
%1 = tensor.empty() : tensor<96xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<96xi8>) outs(%1 : tensor<96xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<96xf32>
%3 = hal.tensor.export %2 "output0" : tensor<96xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_23(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_23(%input0: tensor<960x3x3xi8>) -> (%output0: tensor<960x3x3xf32>)"}} {
%cst = arith.constant 6.250000e-02 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<960x3x3xi8>
%1 = tensor.empty() : tensor<960x3x3xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%0 : tensor<960x3x3xi8>) outs(%1 : tensor<960x3x3xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<960x3x3xf32>
%3 = hal.tensor.export %2 "output0" : tensor<960x3x3xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_15(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_15(%input0: tensor<576x3x3xi8>) -> (%output0: tensor<576x3x3xf32>)"}} {
%cst = arith.constant 6.250000e-02 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<576x3x3xi8>
%1 = tensor.empty() : tensor<576x3x3xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%0 : tensor<576x3x3xi8>) outs(%1 : tensor<576x3x3xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<576x3x3xf32>
%3 = hal.tensor.export %2 "output0" : tensor<576x3x3xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_19(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_19(%input0: tensor<96xi8>) -> (%output0: tensor<96xf32>)"}} {
%cst = arith.constant 1.250000e-01 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<96xi8>
%1 = tensor.empty() : tensor<96xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<96xi8>) outs(%1 : tensor<96xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<96xf32>
%3 = hal.tensor.export %2 "output0" : tensor<96xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_26(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_26(%input0: tensor<960xi8>) -> (%output0: tensor<960xf32>)"}} {
%cst = arith.constant 1.562500e-02 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<960xi8>
%1 = tensor.empty() : tensor<960xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<960xi8>) outs(%1 : tensor<960xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<960xf32>
%3 = hal.tensor.export %2 "output0" : tensor<960xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_27(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_27(%input0: tensor<160xi8>) -> (%output0: tensor<160xf32>)"}} {
%cst = arith.constant 1.250000e-01 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<160xi8>
%1 = tensor.empty() : tensor<160xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<160xi8>) outs(%1 : tensor<160xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<160xf32>
%3 = hal.tensor.export %2 "output0" : tensor<160xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_17(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_17(%input0: tensor<576x3x3xi8>) -> (%output0: tensor<576x3x3xf32>)"}} {
%cst = arith.constant 6.250000e-02 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<576x3x3xi8>
%1 = tensor.empty() : tensor<576x3x3xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%0 : tensor<576x3x3xi8>) outs(%1 : tensor<576x3x3xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<576x3x3xf32>
%3 = hal.tensor.export %2 "output0" : tensor<576x3x3xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_16(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_16(%input0: tensor<96xi8>) -> (%output0: tensor<96xf32>)"}} {
%cst = arith.constant 6.250000e-02 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<96xi8>
%1 = tensor.empty() : tensor<96xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<96xi8>) outs(%1 : tensor<96xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<96xf32>
%3 = hal.tensor.export %2 "output0" : tensor<96xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_28(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_28(%input0: tensor<960x3x3xi8>) -> (%output0: tensor<960x3x3xf32>)"}} {
%cst = arith.constant 1.250000e-01 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<960x3x3xi8>
%1 = tensor.empty() : tensor<960x3x3xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%0 : tensor<960x3x3xi8>) outs(%1 : tensor<960x3x3xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<960x3x3xf32>
%3 = hal.tensor.export %2 "output0" : tensor<960x3x3xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_22(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_22(%input0: tensor<160xi8>) -> (%output0: tensor<160xf32>)"}} {
%cst = arith.constant 1.250000e-01 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<160xi8>
%1 = tensor.empty() : tensor<160xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<160xi8>) outs(%1 : tensor<160xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<160xf32>
%3 = hal.tensor.export %2 "output0" : tensor<160xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_20(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_20(%input0: tensor<576x3x3xi8>) -> (%output0: tensor<576x3x3xf32>)"}} {
%cst = arith.constant 3.125000e-02 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<576x3x3xi8>
%1 = tensor.empty() : tensor<576x3x3xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%0 : tensor<576x3x3xi8>) outs(%1 : tensor<576x3x3xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<576x3x3xf32>
%3 = hal.tensor.export %2 "output0" : tensor<576x3x3xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_25(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_25(%input0: tensor<960x3x3xi8>) -> (%output0: tensor<960x3x3xf32>)"}} {
%cst = arith.constant 6.250000e-02 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<960x3x3xi8>
%1 = tensor.empty() : tensor<960x3x3xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%0 : tensor<960x3x3xi8>) outs(%1 : tensor<960x3x3xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<960x3x3xf32>
%3 = hal.tensor.export %2 "output0" : tensor<960x3x3xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_24(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_24(%input0: tensor<160xi8>) -> (%output0: tensor<160xf32>)"}} {
%cst = arith.constant 6.250000e-02 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<160xi8>
%1 = tensor.empty() : tensor<160xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<160xi8>) outs(%1 : tensor<160xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<160xf32>
%3 = hal.tensor.export %2 "output0" : tensor<160xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_31(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_31(%input0: tensor<144x3x3xi8>) -> (%output0: tensor<144x3x3xf32>)"}} {
%cst = arith.constant 1.250000e-01 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<144x3x3xi8>
%1 = tensor.empty() : tensor<144x3x3xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%0 : tensor<144x3x3xi8>) outs(%1 : tensor<144x3x3xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<144x3x3xf32>
%3 = hal.tensor.export %2 "output0" : tensor<144x3x3xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_29(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_29(%input0: tensor<960xi8>) -> (%output0: tensor<960xf32>)"}} {
%cst = arith.constant 1.562500e-02 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<960xi8>
%1 = tensor.empty() : tensor<960xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<960xi8>) outs(%1 : tensor<960xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<960xf32>
%3 = hal.tensor.export %2 "output0" : tensor<960xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_33(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_33(%input0: tensor<144x3x3xi8>) -> (%output0: tensor<144x3x3xf32>)"}} {
%cst = arith.constant 3.125000e-02 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<144x3x3xi8>
%1 = tensor.empty() : tensor<144x3x3xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%0 : tensor<144x3x3xi8>) outs(%1 : tensor<144x3x3xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<144x3x3xf32>
%3 = hal.tensor.export %2 "output0" : tensor<144x3x3xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_34(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_34(%input0: tensor<144xi8>) -> (%output0: tensor<144xf32>)"}} {
%cst = arith.constant 3.125000e-02 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<144xi8>
%1 = tensor.empty() : tensor<144xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<144xi8>) outs(%1 : tensor<144xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<144xf32>
%3 = hal.tensor.export %2 "output0" : tensor<144xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_39(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_39(%input0: tensor<192xi8>) -> (%output0: tensor<192xf32>)"}} {
%cst = arith.constant 3.125000e-02 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<192xi8>
%1 = tensor.empty() : tensor<192xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<192xi8>) outs(%1 : tensor<192xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<192xf32>
%3 = hal.tensor.export %2 "output0" : tensor<192xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_41(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_41(%input0: tensor<192x3x3xi8>) -> (%output0: tensor<192x3x3xf32>)"}} {
%cst = arith.constant 1.562500e-02 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<192x3x3xi8>
%1 = tensor.empty() : tensor<192x3x3xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%0 : tensor<192x3x3xi8>) outs(%1 : tensor<192x3x3xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<192x3x3xf32>
%3 = hal.tensor.export %2 "output0" : tensor<192x3x3xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_40(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_40(%input0: tensor<32xi8>) -> (%output0: tensor<32xf32>)"}} {
%cst = arith.constant 1.250000e-01 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<32xi8>
%1 = tensor.empty() : tensor<32xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<32xi8>) outs(%1 : tensor<32xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<32xf32>
%3 = hal.tensor.export %2 "output0" : tensor<32xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_30(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_30(%input0: tensor<320xi8>) -> (%output0: tensor<320xf32>)"}} {
%cst = arith.constant 1.250000e-01 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<320xi8>
%1 = tensor.empty() : tensor<320xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<320xi8>) outs(%1 : tensor<320xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<320xf32>
%3 = hal.tensor.export %2 "output0" : tensor<320xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_32(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_32(%input0: tensor<24xi8>) -> (%output0: tensor<24xf32>)"}} {
%cst = arith.constant 1.250000e-01 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<24xi8>
%1 = tensor.empty() : tensor<24xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<24xi8>) outs(%1 : tensor<24xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<24xf32>
%3 = hal.tensor.export %2 "output0" : tensor<24xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_35(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_35(%input0: tensor<32xi8>) -> (%output0: tensor<32xf32>)"}} {
%cst = arith.constant 2.500000e-01 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<32xi8>
%1 = tensor.empty() : tensor<32xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<32xi8>) outs(%1 : tensor<32xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<32xf32>
%3 = hal.tensor.export %2 "output0" : tensor<32xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_36(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_36(%input0: tensor<192x3x3xi8>) -> (%output0: tensor<192x3x3xf32>)"}} {
%cst = arith.constant 1.250000e-01 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<192x3x3xi8>
%1 = tensor.empty() : tensor<192x3x3xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%0 : tensor<192x3x3xi8>) outs(%1 : tensor<192x3x3xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<192x3x3xf32>
%3 = hal.tensor.export %2 "output0" : tensor<192x3x3xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_38(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_38(%input0: tensor<192x3x3xi8>) -> (%output0: tensor<192x3x3xf32>)"}} {
%cst = arith.constant 6.250000e-02 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<192x3x3xi8>
%1 = tensor.empty() : tensor<192x3x3xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%0 : tensor<192x3x3xi8>) outs(%1 : tensor<192x3x3xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<192x3x3xf32>
%3 = hal.tensor.export %2 "output0" : tensor<192x3x3xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_37(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_37(%input0: tensor<32xi8>) -> (%output0: tensor<32xf32>)"}} {
%cst = arith.constant 1.250000e-01 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<32xi8>
%1 = tensor.empty() : tensor<32xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<32xi8>) outs(%1 : tensor<32xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<32xf32>
%3 = hal.tensor.export %2 "output0" : tensor<32xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_45(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_45(%input0: tensor<64xi8>) -> (%output0: tensor<64xf32>)"}} {
%cst = arith.constant 6.250000e-02 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<64xi8>
%1 = tensor.empty() : tensor<64xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<64xi8>) outs(%1 : tensor<64xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<64xf32>
%3 = hal.tensor.export %2 "output0" : tensor<64xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_46(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_46(%input0: tensor<384x3x3xi8>) -> (%output0: tensor<384x3x3xf32>)"}} {
%cst = arith.constant 6.250000e-02 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<384x3x3xi8>
%1 = tensor.empty() : tensor<384x3x3xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%0 : tensor<384x3x3xi8>) outs(%1 : tensor<384x3x3xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<384x3x3xf32>
%3 = hal.tensor.export %2 "output0" : tensor<384x3x3xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_43(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_43(%input0: tensor<64xi8>) -> (%output0: tensor<64xf32>)"}} {
%cst = arith.constant 2.500000e-01 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<64xi8>
%1 = tensor.empty() : tensor<64xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<64xi8>) outs(%1 : tensor<64xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<64xf32>
%3 = hal.tensor.export %2 "output0" : tensor<64xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_42(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_42(%input0: tensor<192xi8>) -> (%output0: tensor<192xf32>)"}} {
%cst = arith.constant 3.125000e-02 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<192xi8>
%1 = tensor.empty() : tensor<192xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<192xi8>) outs(%1 : tensor<192xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<192xf32>
%3 = hal.tensor.export %2 "output0" : tensor<192xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_47(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_47(%input0: tensor<384xi8>) -> (%output0: tensor<384xf32>)"}} {
%cst = arith.constant 1.562500e-02 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<384xi8>
%1 = tensor.empty() : tensor<384xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<384xi8>) outs(%1 : tensor<384xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<384xf32>
%3 = hal.tensor.export %2 "output0" : tensor<384xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_48(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_48(%input0: tensor<64xi8>) -> (%output0: tensor<64xf32>)"}} {
%cst = arith.constant 6.250000e-02 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<64xi8>
%1 = tensor.empty() : tensor<64xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<64xi8>) outs(%1 : tensor<64xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<64xf32>
%3 = hal.tensor.export %2 "output0" : tensor<64xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_44(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_44(%input0: tensor<384x3x3xi8>) -> (%output0: tensor<384x3x3xf32>)"}} {
%cst = arith.constant 1.250000e-01 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<384x3x3xi8>
%1 = tensor.empty() : tensor<384x3x3xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%0 : tensor<384x3x3xi8>) outs(%1 : tensor<384x3x3xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<384x3x3xf32>
%3 = hal.tensor.export %2 "output0" : tensor<384x3x3xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_49(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_49(%input0: tensor<384x3x3xi8>) -> (%output0: tensor<384x3x3xf32>)"}} {
%cst = arith.constant 6.250000e-02 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<384x3x3xi8>
%1 = tensor.empty() : tensor<384x3x3xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%0 : tensor<384x3x3xi8>) outs(%1 : tensor<384x3x3xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<384x3x3xf32>
%3 = hal.tensor.export %2 "output0" : tensor<384x3x3xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_51(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_51(%input0: tensor<64xi8>) -> (%output0: tensor<64xf32>)"}} {
%cst = arith.constant 1.250000e-01 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<64xi8>
%1 = tensor.empty() : tensor<64xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<64xi8>) outs(%1 : tensor<64xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<64xf32>
%3 = hal.tensor.export %2 "output0" : tensor<64xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_52(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_52(%input0: tensor<256xi8>) -> (%output0: tensor<256xf32>)"}} {
%cst = arith.constant 1.562500e-02 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<256xi8>
%1 = tensor.empty() : tensor<256xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<256xi8>) outs(%1 : tensor<256xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<256xf32>
%3 = hal.tensor.export %2 "output0" : tensor<256xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_58(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_58(%input0: tensor<144x1x1x24xi8>) -> (%output0: tensor<1x1x24x144xf32>)"}} {
%cst = arith.constant 3.906250e-03 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<144x1x1x24xi8>
%1 = tensor.empty() : tensor<1x1x24x144xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<144x1x1x24xi8>) outs(%1 : tensor<1x1x24x144xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<1x1x24x144xf32>
%3 = hal.tensor.export %2 "output0" : tensor<1x1x24x144xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_50(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_50(%input0: tensor<384xi8>) -> (%output0: tensor<384xf32>)"}} {
%cst = arith.constant 1.562500e-02 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<384xi8>
%1 = tensor.empty() : tensor<384xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<384xi8>) outs(%1 : tensor<384xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<384xf32>
%3 = hal.tensor.export %2 "output0" : tensor<384xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_56(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_56(%input0: tensor<96x1x1x16xi8>) -> (%output0: tensor<1x1x16x96xf32>)"}} {
%cst = arith.constant 3.906250e-03 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<96x1x1x16xi8>
%1 = tensor.empty() : tensor<1x1x16x96xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<96x1x1x16xi8>) outs(%1 : tensor<1x1x16x96xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<1x1x16x96xf32>
%3 = hal.tensor.export %2 "output0" : tensor<1x1x16x96xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_60(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_60(%input0: tensor<144x1x1x24xi8>) -> (%output0: tensor<1x1x24x144xf32>)"}} {
%cst = arith.constant 3.906250e-03 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<144x1x1x24xi8>
%1 = tensor.empty() : tensor<1x1x24x144xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<144x1x1x24xi8>) outs(%1 : tensor<1x1x24x144xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<1x1x24x144xf32>
%3 = hal.tensor.export %2 "output0" : tensor<1x1x24x144xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_62(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_62(%input0: tensor<192x1x1x32xi8>) -> (%output0: tensor<1x1x32x192xf32>)"}} {
%cst = arith.constant 0.001953125 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<192x1x1x32xi8>
%1 = tensor.empty() : tensor<1x1x32x192xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<192x1x1x32xi8>) outs(%1 : tensor<1x1x32x192xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<1x1x32x192xf32>
%3 = hal.tensor.export %2 "output0" : tensor<1x1x32x192xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_55(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_55(%input0: tensor<16x1x1x32xi8>) -> (%output0: tensor<1x1x32x16xf32>)"}} {
%cst = arith.constant 6.250000e-02 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<16x1x1x32xi8>
%1 = tensor.empty() : tensor<1x1x32x16xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<16x1x1x32xi8>) outs(%1 : tensor<1x1x32x16xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<1x1x32x16xf32>
%3 = hal.tensor.export %2 "output0" : tensor<1x1x32x16xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_61(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_61(%input0: tensor<32x1x1x144xi8>) -> (%output0: tensor<1x1x144x32xf32>)"}} {
%cst = arith.constant 1.562500e-02 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<32x1x1x144xi8>
%1 = tensor.empty() : tensor<1x1x144x32xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<32x1x1x144xi8>) outs(%1 : tensor<1x1x144x32xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<1x1x144x32xf32>
%3 = hal.tensor.export %2 "output0" : tensor<1x1x144x32xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_54(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_54(%input0: tensor<21xi8>) -> (%output0: tensor<21xf32>)"}} {
%cst = arith.constant 1.562500e-02 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<21xi8>
%1 = tensor.empty() : tensor<21xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<21xi8>) outs(%1 : tensor<21xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<21xf32>
%3 = hal.tensor.export %2 "output0" : tensor<21xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_53(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_53(%input0: tensor<256xi8>) -> (%output0: tensor<256xf32>)"}} {
%cst = arith.constant 0.001953125 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<256xi8>
%1 = tensor.empty() : tensor<256xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<256xi8>) outs(%1 : tensor<256xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<256xf32>
%3 = hal.tensor.export %2 "output0" : tensor<256xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_64(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_64(%input0: tensor<192x1x1x32xi8>) -> (%output0: tensor<1x1x32x192xf32>)"}} {
%cst = arith.constant 9.765625E-4 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<192x1x1x32xi8>
%1 = tensor.empty() : tensor<1x1x32x192xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<192x1x1x32xi8>) outs(%1 : tensor<1x1x32x192xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<1x1x32x192xf32>
%3 = hal.tensor.export %2 "output0" : tensor<1x1x32x192xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_63(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_63(%input0: tensor<32x1x1x192xi8>) -> (%output0: tensor<1x1x192x32xf32>)"}} {
%cst = arith.constant 1.562500e-02 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<32x1x1x192xi8>
%1 = tensor.empty() : tensor<1x1x192x32xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<32x1x1x192xi8>) outs(%1 : tensor<1x1x192x32xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<1x1x192x32xf32>
%3 = hal.tensor.export %2 "output0" : tensor<1x1x192x32xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_59(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_59(%input0: tensor<24x1x1x144xi8>) -> (%output0: tensor<1x1x144x24xf32>)"}} {
%cst = arith.constant 3.125000e-02 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<24x1x1x144xi8>
%1 = tensor.empty() : tensor<1x1x144x24xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<24x1x1x144xi8>) outs(%1 : tensor<1x1x144x24xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<1x1x144x24xf32>
%3 = hal.tensor.export %2 "output0" : tensor<1x1x144x24xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_57(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_57(%input0: tensor<24x1x1x96xi8>) -> (%output0: tensor<1x1x96x24xf32>)"}} {
%cst = arith.constant 3.125000e-02 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<24x1x1x96xi8>
%1 = tensor.empty() : tensor<1x1x96x24xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<24x1x1x96xi8>) outs(%1 : tensor<1x1x96x24xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<1x1x96x24xf32>
%3 = hal.tensor.export %2 "output0" : tensor<1x1x96x24xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_67(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_67(%input0: tensor<64x1x1x192xi8>) -> (%output0: tensor<1x1x192x64xf32>)"}} {
%cst = arith.constant 1.562500e-02 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<64x1x1x192xi8>
%1 = tensor.empty() : tensor<1x1x192x64xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<64x1x1x192xi8>) outs(%1 : tensor<1x1x192x64xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<1x1x192x64xf32>
%3 = hal.tensor.export %2 "output0" : tensor<1x1x192x64xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_65(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_65(%input0: tensor<32x1x1x192xi8>) -> (%output0: tensor<1x1x192x32xf32>)"}} {
%cst = arith.constant 3.125000e-02 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<32x1x1x192xi8>
%1 = tensor.empty() : tensor<1x1x192x32xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<32x1x1x192xi8>) outs(%1 : tensor<1x1x192x32xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<1x1x192x32xf32>
%3 = hal.tensor.export %2 "output0" : tensor<1x1x192x32xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_68(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_68(%input0: tensor<384x1x1x64xi8>) -> (%output0: tensor<1x1x64x384xf32>)"}} {
%cst = arith.constant 9.765625E-4 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<384x1x1x64xi8>
%1 = tensor.empty() : tensor<1x1x64x384xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<384x1x1x64xi8>) outs(%1 : tensor<1x1x64x384xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<1x1x64x384xf32>
%3 = hal.tensor.export %2 "output0" : tensor<1x1x64x384xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_71(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_71(%input0: tensor<64x1x1x384xi8>) -> (%output0: tensor<1x1x384x64xf32>)"}} {
%cst = arith.constant 7.812500e-03 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<64x1x1x384xi8>
%1 = tensor.empty() : tensor<1x1x384x64xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<64x1x1x384xi8>) outs(%1 : tensor<1x1x384x64xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<1x1x384x64xf32>
%3 = hal.tensor.export %2 "output0" : tensor<1x1x384x64xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_70(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_70(%input0: tensor<384x1x1x64xi8>) -> (%output0: tensor<1x1x64x384xf32>)"}} {
%cst = arith.constant 9.765625E-4 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<384x1x1x64xi8>
%1 = tensor.empty() : tensor<1x1x64x384xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<384x1x1x64xi8>) outs(%1 : tensor<1x1x64x384xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<1x1x64x384xf32>
%3 = hal.tensor.export %2 "output0" : tensor<1x1x64x384xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_66(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_66(%input0: tensor<192x1x1x32xi8>) -> (%output0: tensor<1x1x32x192xf32>)"}} {
%cst = arith.constant 0.001953125 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<192x1x1x32xi8>
%1 = tensor.empty() : tensor<1x1x32x192xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<192x1x1x32xi8>) outs(%1 : tensor<1x1x32x192xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<1x1x32x192xf32>
%3 = hal.tensor.export %2 "output0" : tensor<1x1x32x192xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_78(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_78(%input0: tensor<576x1x1x96xi8>) -> (%output0: tensor<1x1x96x576xf32>)"}} {
%cst = arith.constant 9.765625E-4 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<576x1x1x96xi8>
%1 = tensor.empty() : tensor<1x1x96x576xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<576x1x1x96xi8>) outs(%1 : tensor<1x1x96x576xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<1x1x96x576xf32>
%3 = hal.tensor.export %2 "output0" : tensor<1x1x96x576xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_77(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_77(%input0: tensor<96x1x1x576xi8>) -> (%output0: tensor<1x1x576x96xf32>)"}} {
%cst = arith.constant 7.812500e-03 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<96x1x1x576xi8>
%1 = tensor.empty() : tensor<1x1x576x96xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<96x1x1x576xi8>) outs(%1 : tensor<1x1x576x96xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<1x1x576x96xf32>
%3 = hal.tensor.export %2 "output0" : tensor<1x1x576x96xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_80(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_80(%input0: tensor<576x1x1x96xi8>) -> (%output0: tensor<1x1x96x576xf32>)"}} {
%cst = arith.constant 0.001953125 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<576x1x1x96xi8>
%1 = tensor.empty() : tensor<1x1x96x576xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<576x1x1x96xi8>) outs(%1 : tensor<1x1x96x576xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<1x1x96x576xf32>
%3 = hal.tensor.export %2 "output0" : tensor<1x1x96x576xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_79(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_79(%input0: tensor<96x1x1x576xi8>) -> (%output0: tensor<1x1x576x96xf32>)"}} {
%cst = arith.constant 7.812500e-03 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<96x1x1x576xi8>
%1 = tensor.empty() : tensor<1x1x576x96xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<96x1x1x576xi8>) outs(%1 : tensor<1x1x576x96xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<1x1x576x96xf32>
%3 = hal.tensor.export %2 "output0" : tensor<1x1x576x96xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_69(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_69(%input0: tensor<64x1x1x384xi8>) -> (%output0: tensor<1x1x384x64xf32>)"}} {
%cst = arith.constant 7.812500e-03 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<64x1x1x384xi8>
%1 = tensor.empty() : tensor<1x1x384x64xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<64x1x1x384xi8>) outs(%1 : tensor<1x1x384x64xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<1x1x384x64xf32>
%3 = hal.tensor.export %2 "output0" : tensor<1x1x384x64xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_81(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_81(%input0: tensor<160x1x1x576xi8>) -> (%output0: tensor<1x1x576x160xf32>)"}} {
%cst = arith.constant 3.906250e-03 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<160x1x1x576xi8>
%1 = tensor.empty() : tensor<1x1x576x160xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<160x1x1x576xi8>) outs(%1 : tensor<1x1x576x160xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<1x1x576x160xf32>
%3 = hal.tensor.export %2 "output0" : tensor<1x1x576x160xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_76(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_76(%input0: tensor<576x1x1x96xi8>) -> (%output0: tensor<1x1x96x576xf32>)"}} {
%cst = arith.constant 0.001953125 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<576x1x1x96xi8>
%1 = tensor.empty() : tensor<1x1x96x576xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<576x1x1x96xi8>) outs(%1 : tensor<1x1x96x576xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<1x1x96x576xf32>
%3 = hal.tensor.export %2 "output0" : tensor<1x1x96x576xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_72(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_72(%input0: tensor<384x1x1x64xi8>) -> (%output0: tensor<1x1x64x384xf32>)"}} {
%cst = arith.constant 9.765625E-4 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<384x1x1x64xi8>
%1 = tensor.empty() : tensor<1x1x64x384xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<384x1x1x64xi8>) outs(%1 : tensor<1x1x64x384xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<1x1x64x384xf32>
%3 = hal.tensor.export %2 "output0" : tensor<1x1x64x384xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_75(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_75(%input0: tensor<96x1x1x384xi8>) -> (%output0: tensor<1x1x384x96xf32>)"}} {
%cst = arith.constant 7.812500e-03 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<96x1x1x384xi8>
%1 = tensor.empty() : tensor<1x1x384x96xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<96x1x1x384xi8>) outs(%1 : tensor<1x1x384x96xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<1x1x384x96xf32>
%3 = hal.tensor.export %2 "output0" : tensor<1x1x384x96xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_74(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_74(%input0: tensor<384x1x1x64xi8>) -> (%output0: tensor<1x1x64x384xf32>)"}} {
%cst = arith.constant 0.001953125 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<384x1x1x64xi8>
%1 = tensor.empty() : tensor<1x1x64x384xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<384x1x1x64xi8>) outs(%1 : tensor<1x1x64x384xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<1x1x64x384xf32>
%3 = hal.tensor.export %2 "output0" : tensor<1x1x64x384xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_83(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_83(%input0: tensor<160x1x1x960xi8>) -> (%output0: tensor<1x1x960x160xf32>)"}} {
%cst = arith.constant 7.812500e-03 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<160x1x1x960xi8>
%1 = tensor.empty() : tensor<1x1x960x160xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<160x1x1x960xi8>) outs(%1 : tensor<1x1x960x160xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<1x1x960x160xf32>
%3 = hal.tensor.export %2 "output0" : tensor<1x1x960x160xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_73(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_73(%input0: tensor<64x1x1x384xi8>) -> (%output0: tensor<1x1x384x64xf32>)"}} {
%cst = arith.constant 7.812500e-03 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<64x1x1x384xi8>
%1 = tensor.empty() : tensor<1x1x384x64xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<64x1x1x384xi8>) outs(%1 : tensor<1x1x384x64xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<1x1x384x64xf32>
%3 = hal.tensor.export %2 "output0" : tensor<1x1x384x64xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_84(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_84(%input0: tensor<960x1x1x160xi8>) -> (%output0: tensor<1x1x160x960xf32>)"}} {
%cst = arith.constant 9.765625E-4 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<960x1x1x160xi8>
%1 = tensor.empty() : tensor<1x1x160x960xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<960x1x1x160xi8>) outs(%1 : tensor<1x1x160x960xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<1x1x160x960xf32>
%3 = hal.tensor.export %2 "output0" : tensor<1x1x160x960xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_82(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_82(%input0: tensor<960x1x1x160xi8>) -> (%output0: tensor<1x1x160x960xf32>)"}} {
%cst = arith.constant 0.001953125 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<960x1x1x160xi8>
%1 = tensor.empty() : tensor<1x1x160x960xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<960x1x1x160xi8>) outs(%1 : tensor<1x1x160x960xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<1x1x160x960xf32>
%3 = hal.tensor.export %2 "output0" : tensor<1x1x160x960xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_85(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_85(%input0: tensor<160x1x1x960xi8>) -> (%output0: tensor<1x1x960x160xf32>)"}} {
%cst = arith.constant 7.812500e-03 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<160x1x1x960xi8>
%1 = tensor.empty() : tensor<1x1x960x160xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<160x1x1x960xi8>) outs(%1 : tensor<1x1x960x160xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<1x1x960x160xf32>
%3 = hal.tensor.export %2 "output0" : tensor<1x1x960x160xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_86(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_86(%input0: tensor<960x1x1x160xi8>) -> (%output0: tensor<1x1x160x960xf32>)"}} {
%cst = arith.constant 0.001953125 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<960x1x1x160xi8>
%1 = tensor.empty() : tensor<1x1x160x960xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<960x1x1x160xi8>) outs(%1 : tensor<1x1x160x960xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<1x1x160x960xf32>
%3 = hal.tensor.export %2 "output0" : tensor<1x1x160x960xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_87(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_87(%input0: tensor<320x1x1x960xi8>) -> (%output0: tensor<1x1x960x320xf32>)"}} {
%cst = arith.constant 3.906250e-03 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<320x1x1x960xi8>
%1 = tensor.empty() : tensor<1x1x960x320xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<320x1x1x960xi8>) outs(%1 : tensor<1x1x960x320xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<1x1x960x320xf32>
%3 = hal.tensor.export %2 "output0" : tensor<1x1x960x320xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_89(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_89(%input0: tensor<256x1x1x320xi8>) -> (%output0: tensor<1x1x320x256xf32>)"}} {
%cst = arith.constant 0.001953125 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<256x1x1x320xi8>
%1 = tensor.empty() : tensor<1x1x320x256xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<256x1x1x320xi8>) outs(%1 : tensor<1x1x320x256xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<1x1x320x256xf32>
%3 = hal.tensor.export %2 "output0" : tensor<1x1x320x256xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_88(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_88(%input0: tensor<256x1x1x320xi8>) -> (%output0: tensor<1x1x320x256xf32>)"}} {
%cst = arith.constant 9.765625E-4 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<256x1x1x320xi8>
%1 = tensor.empty() : tensor<1x1x320x256xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<256x1x1x320xi8>) outs(%1 : tensor<1x1x320x256xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<1x1x320x256xf32>
%3 = hal.tensor.export %2 "output0" : tensor<1x1x320x256xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_91(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_91(%input0: tensor<21x1x1x256xi8>) -> (%output0: tensor<1x1x256x21xf32>)"}} {
%cst = arith.constant 3.906250e-03 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<21x1x1x256xi8>
%1 = tensor.empty() : tensor<1x1x256x21xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<21x1x1x256xi8>) outs(%1 : tensor<1x1x256x21xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<1x1x256x21xf32>
%3 = hal.tensor.export %2 "output0" : tensor<1x1x256x21xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @jit_eval_90(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @jit_eval_90(%input0: tensor<256x1x1x512xi8>) -> (%output0: tensor<1x1x512x256xf32>)"}} {
%cst = arith.constant 0.001953125 : f32
%0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<256x1x1x512xi8>
%1 = tensor.empty() : tensor<1x1x512x256xf32>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<256x1x1x512xi8>) outs(%1 : tensor<1x1x512x256xf32>) {
^bb0(%in: i8, %out: f32):
%4 = arith.extsi %in : i8 to i32
%5 = arith.sitofp %4 : i32 to f32
%6 = arith.mulf %5, %cst : f32
linalg.yield %6 : f32
} -> tensor<1x1x512x256xf32>
%3 = hal.tensor.export %2 "output0" : tensor<1x1x512x256xf32> -> !hal.buffer_view
util.return %3 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @tf2onnx(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} {
%c-1_i32 = arith.constant -1 : i32
%c0 = arith.constant 0 : index
%device_0 = hal.devices.get %c0 : !hal.device
%0 = util.null : !hal.fence
%fence = hal.fence.create device(%device_0 : !hal.device) flags("None") : !hal.fence
%1 = util.call @tf2onnx$async(%arg0, %0, %fence) : (!hal.buffer_view, !hal.fence, !hal.fence) -> !hal.buffer_view
%status = hal.fence.await until([%fence]) timeout_millis(%c-1_i32) : i32
util.return %1 : !hal.buffer_view
}
// -----// IR Dump After FormDispatchRegionsPass (iree-flow-form-dispatch-regions) //----- //
util.func public @tf2onnx$async(%arg0: !hal.buffer_view, %arg1: !hal.fence, %arg2: !hal.fence) -> !hal.buffer_view attributes {inlining_policy = #util.inline.never, iree.abi.model = "coarse-fences", iree.abi.stub} {
%cst = arith.constant dense_resource<__elided__> : tensor<1x21xf32>
%cst_0 = arith.constant dense_resource<__elided__> : tensor<1x256x1x1xf32>
%cst_1 = arith.constant 0.999259948 : f32
%cst_2 = arith.constant 6.000000e+00 : f32
%cst_3 = arith.constant 6.400000e+01 : f32
%cst_4 = arith.constant -9.53674316E-7 : f32
%cst_5 = arith.constant 7.89230776 : f32
%cst_6 = arith.constant 6.500000e+01 : f32
%cst_7 = arith.constant 7.812500e-03 : f32
%cst_8 = arith.constant 1.250000e-01 : f32
%cst_9 = arith.constant 2.500000e-01 : f32
%cst_10 = arith.constant 6.250000e-02 : f32
%cst_11 = arith.constant 3.125000e-02 : f32
%cst_12 = arith.constant 5.000000e-01 : f32
%cst_13 = arith.constant 1.000000e+00 : f32
%cst_14 = arith.constant 0.000000e+00 : f32
%cst_15 = arith.constant 1.270000e+02 : f32
%cst_16 = arith.constant -1.280000e+02 : f32
%cst_17 = arith.constant 4.225000e+03 : f32
%c0 = arith.constant 0 : index
%cst_18 = arith.constant dense_resource<__elided__> : tensor<96xf32>
%cst_19 = arith.constant dense_resource<__elided__> : tensor<576xf32>
%cst_20 = arith.constant dense_resource<__elided__> : tensor<960xf32>
%cst_21 = arith.constant dense_resource<__elided__> : tensor<144xf32>
%cst_22 = arith.constant dense_resource<__elided__> : tensor<192xf32>
%cst_23 = arith.constant dense_resource<__elided__> : tensor<384xf32>
%cst_24 = arith.constant dense_resource<__elided__> : tensor<256xf32>
%cst_25 = arith.constant dense_resource<__elided__> : tensor<32xf32>
%cst_26 = arith.constant dense_resource<__elided__> : tensor<3x3x3x32xf32>
%cst_27 = arith.constant dense_resource<__elided__> : tensor<32x3x3xf32>
%cst_28 = arith.constant dense_resource<__elided__> : tensor<16xf32>
%cst_29 = arith.constant dense_resource<__elided__> : tensor<96x3x3xf32>
%cst_30 = arith.constant dense_resource<__elided__> : tensor<24xf32>
%cst_31 = arith.constant dense_resource<__elided__> : tensor<384x3x3xf32>
%cst_32 = arith.constant dense_resource<__elided__> : tensor<384xf32>
%cst_33 = arith.constant dense_resource<__elided__> : tensor<96xf32>
%cst_34 = arith.constant dense_resource<__elided__> : tensor<576x3x3xf32>
%cst_35 = arith.constant dense_resource<__elided__> : tensor<96xf32>
%cst_36 = arith.constant dense_resource<__elided__> : tensor<576x3x3xf32>
%cst_37 = arith.constant dense_resource<__elided__> : tensor<576xf32>
%cst_38 = arith.constant dense_resource<__elided__> : tensor<96xf32>
%cst_39 = arith.constant dense_resource<__elided__> : tensor<576x3x3xf32>
%cst_40 = arith.constant dense_resource<__elided__> : tensor<576xf32>
%cst_41 = arith.constant dense_resource<__elided__> : tensor<160xf32>
%cst_42 = arith.constant dense_resource<__elided__> : tensor<960x3x3xf32>
%cst_43 = arith.constant dense_resource<__elided__> : tensor<160xf32>
%cst_44 = arith.constant dense_resource<__elided__> : tensor<960x3x3xf32>
%cst_45 = arith.constant dense_resource<__elided__> : tensor<960xf32>
%cst_46 = arith.constant dense_resource<__elided__> : tensor<160xf32>
%cst_47 = arith.constant dense_resource<__elided__> : tensor<960x3x3xf32>
%cst_48 = arith.constant dense_resource<__elided__> : tensor<960xf32>
%cst_49 = arith.constant dense_resource<__elided__> : tensor<320xf32>
%cst_50 = arith.constant dense_resource<__elided__> : tensor<144x3x3xf32>
%cst_51 = arith.constant dense_resource<__elided__> : tensor<24xf32>
%cst_52 = arith.constant dense_resource<__elided__> : tensor<144x3x3xf32>
%cst_53 = arith.constant dense_resource<__elided__> : tensor<144xf32>
%cst_54 = arith.constant dense_resource<__elided__> : tensor<32xf32>
%cst_55 = arith.constant dense_resource<__elided__> : tensor<192x3x3xf32>
%cst_56 = arith.constant dense_resource<__elided__> : tensor<32xf32>
%cst_57 = arith.constant dense_resource<__elided__> : tensor<192x3x3xf32>
%cst_58 = arith.constant dense_resource<__elided__> : tensor<192xf32>
%cst_59 = arith.constant dense_resource<__elided__> : tensor<32xf32>
%cst_60 = arith.constant dense_resource<__elided__> : tensor<192x3x3xf32>
%cst_61 = arith.constant dense_resource<__elided__> : tensor<192xf32>
%cst_62 = arith.constant dense_resource<__elided__> : tensor<64xf32>
%cst_63 = arith.constant dense_resource<__elided__> : tensor<384x3x3xf32>
%cst_64 = arith.constant dense_resource<__elided__> : tensor<64xf32>
%cst_65 = arith.constant dense_resource<__elided__> : tensor<384x3x3xf32>
%cst_66 = arith.constant dense_resource<__elided__> : tensor<384xf32>
%cst_67 = arith.constant dense_resource<__elided__> : tensor<64xf32>
%cst_68 = arith.constant dense_resource<__elided__> : tensor<384x3x3xf32>
%cst_69 = arith.constant dense_resource<__elided__> : tensor<384xf32>
%cst_70 = arith.constant dense_resource<__elided__> : tensor<64xf32>
%cst_71 = arith.constant dense_resource<__elided__> : tensor<256xf32>
%cst_72 = arith.constant dense_resource<__elided__> : tensor<1x1x32x16xf32>
%cst_73 = arith.constant dense_resource<__elided__> : tensor<1x1x16x96xf32>
%cst_74 = arith.constant dense_resource<__elided__> : tensor<1x1x96x24xf32>
%cst_75 = arith.constant dense_resource<__elided__> : tensor<1x1x24x144xf32>
%cst_76 = arith.constant dense_resource<__elided__> : tensor<1x1x144x24xf32>
%cst_77 = arith.constant dense_resource<__elided__> : tensor<1x1x24x144xf32>
%cst_78 = arith.constant dense_resource<__elided__> : tensor<1x1x144x32xf32>
%cst_79 = arith.constant dense_resource<__elided__> : tensor<1x1x32x192xf32>
%cst_80 = arith.constant dense_resource<__elided__> : tensor<1x1x192x32xf32>
%cst_81 = arith.constant dense_resource<__elided__> : tensor<1x1x32x192xf32>
%cst_82 = arith.constant dense_resource<__elided__> : tensor<1x1x192x32xf32>
%cst_83 = arith.constant dense_resource<__elided__> : tensor<1x1x32x192xf32>
%cst_84 = arith.constant dense_resource<__elided__> : tensor<1x1x192x64xf32>
%cst_85 = arith.constant dense_resource<__elided__> : tensor<1x1x64x384xf32>
%cst_86 = arith.constant dense_resource<__elided__> : tensor<1x1x384x64xf32>
%cst_87 = arith.constant dense_resource<__elided__> : tensor<1x1x64x384xf32>
%cst_88 = arith.constant dense_resource<__elided__> : tensor<1x1x384x64xf32>
%cst_89 = arith.constant dense_resource<__elided__> : tensor<1x1x64x384xf32>
%cst_90 = arith.constant dense_resource<__elided__> : tensor<1x1x384x64xf32>
%cst_91 = arith.constant dense_resource<__elided__> : tensor<1x1x64x384xf32>
%cst_92 = arith.constant dense_resource<__elided__> : tensor<1x1x384x96xf32>
%cst_93 = arith.constant dense_resource<__elided__> : tensor<1x1x96x576xf32>
%cst_94 = arith.constant dense_resource<__elided__> : tensor<1x1x576x96xf32>
%cst_95 = arith.constant dense_resource<__elided__> : tensor<1x1x96x576xf32>
%cst_96 = arith.constant dense_resource<__elided__> : tensor<1x1x576x96xf32>
%cst_97 = arith.constant dense_resource<__elided__> : tensor<1x1x96x576xf32>
%cst_98 = arith.constant dense_resource<__elided__> : tensor<1x1x576x160xf32>
%cst_99 = arith.constant dense_resource<__elided__> : tensor<1x1x160x960xf32>
%cst_100 = arith.constant dense_resource<__elided__> : tensor<1x1x960x160xf32>
%cst_101 = arith.constant dense_resource<__elided__> : tensor<1x1x160x960xf32>
%cst_102 = arith.constant dense_resource<__elided__> : tensor<1x1x960x160xf32>
%cst_103 = arith.constant dense_resource<__elided__> : tensor<1x1x160x960xf32>
%cst_104 = arith.constant dense_resource<__elided__> : tensor<1x1x960x320xf32>
%cst_105 = arith.constant dense_resource<__elided__> : tensor<1x1x320x256xf32>
%cst_106 = arith.constant dense_resource<__elided__> : tensor<1x1x320x256xf32>
%cst_107 = arith.constant dense_resource<__elided__> : tensor<1x1x512x256xf32>
%cst_108 = arith.constant dense_resource<__elided__> : tensor<1x1x256x21xf32>
%0 = hal.tensor.import wait(%arg1) => %arg0 : !hal.buffer_view -> tensor<1x513x513x3xf32>
%expanded = tensor.expand_shape %0 [[0], [1], [2], [3, 4]] output_shape [1, 513, 513, 1, 3] : tensor<1x513x513x3xf32> into tensor<1x513x513x1x3xf32>
%1 = tensor.empty() : tensor<1x513x513x1x3xf32>
%2 = flow.dispatch.region -> (tensor<1x513x513x1x3xf32>) {
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2, d3, d4)>, affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2, d3, d4)>], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel"]} ins(%expanded : tensor<1x513x513x1x3xf32>) outs(%1 : tensor<1x513x513x1x3xf32>) {
^bb0(%in: f32, %out: f32):
%238 = arith.divf %in, %cst_7 : f32
%239 = math.round %238 : f32
%240 = arith.addf %239, %cst_14 : f32
%241 = arith.cmpf ult, %240, %cst_16 : f32
%242 = arith.cmpf ugt, %240, %cst_15 : f32
%243 = arith.select %241, %cst_16, %240 : f32
%244 = arith.select %242, %cst_15, %243 : f32
%245 = arith.fptosi %244 : f32 to i8
%246 = arith.extsi %245 : i8 to i32
%247 = arith.sitofp %246 : i32 to f32
%248 = arith.mulf %247, %cst_7 : f32
linalg.yield %248 : f32
} -> tensor<1x513x513x1x3xf32>
flow.return %237 : tensor<1x513x513x1x3xf32>
}
%collapsed = tensor.collapse_shape %2 [[0, 1], [2, 3], [4]] : tensor<1x513x513x1x3xf32> into tensor<513x513x3xf32>
%3 = tensor.empty() : tensor<515x515x3xf32>
%4 = linalg.fill ins(%cst_14 : f32) outs(%3 : tensor<515x515x3xf32>) -> tensor<515x515x3xf32>
%inserted_slice = tensor.insert_slice %collapsed into %4[1, 1, 0] [513, 513, 3] [1, 1, 1] : tensor<513x513x3xf32> into tensor<515x515x3xf32>
%expanded_109 = tensor.expand_shape %inserted_slice [[0, 1], [2], [3]] output_shape [1, 515, 515, 3] : tensor<515x515x3xf32> into tensor<1x515x515x3xf32>
%5 = tensor.empty() : tensor<1x257x257x32xf32>
%6 = linalg.fill ins(%cst_14 : f32) outs(%5 : tensor<1x257x257x32xf32>) -> tensor<1x257x257x32xf32>
%7 = flow.dispatch.region -> (tensor<1x257x257x32xf32>) {
%237 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%expanded_109, %cst_26 : tensor<1x515x515x3xf32>, tensor<3x3x3x32xf32>) outs(%6 : tensor<1x257x257x32xf32>) -> tensor<1x257x257x32xf32>
%238 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%237, %cst_25 : tensor<1x257x257x32xf32>, tensor<32xf32>) outs(%5 : tensor<1x257x257x32xf32>) {
^bb0(%in: f32, %in_229: f32, %out: f32):
%239 = arith.addf %in, %in_229 : f32
%240 = arith.cmpf ult, %239, %cst_14 : f32
%241 = arith.select %240, %cst_14, %239 : f32
%242 = arith.cmpf ugt, %241, %cst_2 : f32
%243 = arith.select %242, %cst_2, %241 : f32
%244 = arith.divf %243, %cst_10 : f32
%245 = math.round %244 : f32
%246 = arith.addf %245, %cst_14 : f32
%247 = arith.cmpf ult, %246, %cst_16 : f32
%248 = arith.cmpf ugt, %246, %cst_15 : f32
%249 = arith.select %247, %cst_16, %246 : f32
%250 = arith.select %248, %cst_15, %249 : f32
%251 = arith.fptosi %250 : f32 to i8
%252 = arith.extsi %251 : i8 to i32
%253 = arith.sitofp %252 : i32 to f32
%254 = arith.mulf %253, %cst_10 : f32
linalg.yield %254 : f32
} -> tensor<1x257x257x32xf32>
flow.return %238 : tensor<1x257x257x32xf32>
}
%collapsed_110 = tensor.collapse_shape %7 [[0, 1], [2], [3]] : tensor<1x257x257x32xf32> into tensor<257x257x32xf32>
%8 = tensor.empty() : tensor<259x259x32xf32>
%9 = linalg.fill ins(%cst_14 : f32) outs(%8 : tensor<259x259x32xf32>) -> tensor<259x259x32xf32>
%inserted_slice_111 = tensor.insert_slice %collapsed_110 into %9[1, 1, 0] [257, 257, 32] [1, 1, 1] : tensor<257x257x32xf32> into tensor<259x259x32xf32>
%expanded_112 = tensor.expand_shape %inserted_slice_111 [[0], [1], [2, 3]] output_shape [259, 259, 1, 32] : tensor<259x259x32xf32> into tensor<259x259x1x32xf32>
%10 = tensor.empty() : tensor<1x32x259x259xf32>
%11 = flow.dispatch.region -> (tensor<1x32x259x259xf32>) {
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_112 : tensor<259x259x1x32xf32>) outs(%10 : tensor<1x32x259x259xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<1x32x259x259xf32>
flow.return %237 : tensor<1x32x259x259xf32>
}
%12 = tensor.empty() : tensor<1x32x257x257xf32>
%13 = linalg.fill ins(%cst_14 : f32) outs(%12 : tensor<1x32x257x257xf32>) -> tensor<1x32x257x257xf32>
%14 = flow.dispatch.region -> (tensor<1x32x257x257xf32>) {
%237 = linalg.depthwise_conv_2d_nchw_chw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%11, %cst_27 : tensor<1x32x259x259xf32>, tensor<32x3x3xf32>) outs(%13 : tensor<1x32x257x257xf32>) -> tensor<1x32x257x257xf32>
flow.return %237 : tensor<1x32x257x257xf32>
}
%15 = tensor.empty() : tensor<257x257x1x32xf32>
%16 = flow.dispatch.region -> (tensor<257x257x1x32xf32>) {
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%14 : tensor<1x32x257x257xf32>) outs(%15 : tensor<257x257x1x32xf32>) {
^bb0(%in: f32, %out: f32):
%238 = arith.cmpf ult, %in, %cst_14 : f32
%239 = arith.select %238, %cst_14, %in : f32
%240 = arith.cmpf ugt, %239, %cst_2 : f32
%241 = arith.select %240, %cst_2, %239 : f32
linalg.yield %241 : f32
} -> tensor<257x257x1x32xf32>
flow.return %237 : tensor<257x257x1x32xf32>
}
%collapsed_113 = tensor.collapse_shape %16 [[0], [1], [2, 3]] : tensor<257x257x1x32xf32> into tensor<257x257x32xf32>
%expanded_114 = tensor.expand_shape %collapsed_113 [[0, 1], [2], [3]] output_shape [1, 257, 257, 32] : tensor<257x257x32xf32> into tensor<1x257x257x32xf32>
%17 = flow.dispatch.region -> (tensor<1x257x257x32xf32>) {
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_114 : tensor<1x257x257x32xf32>) outs(%5 : tensor<1x257x257x32xf32>) {
^bb0(%in: f32, %out: f32):
%238 = arith.divf %in, %cst_10 : f32
%239 = math.round %238 : f32
%240 = arith.addf %239, %cst_14 : f32
%241 = arith.cmpf ult, %240, %cst_16 : f32
%242 = arith.cmpf ugt, %240, %cst_15 : f32
%243 = arith.select %241, %cst_16, %240 : f32
%244 = arith.select %242, %cst_15, %243 : f32
%245 = arith.fptosi %244 : f32 to i8
%246 = arith.extsi %245 : i8 to i32
%247 = arith.sitofp %246 : i32 to f32
%248 = arith.mulf %247, %cst_10 : f32
linalg.yield %248 : f32
} -> tensor<1x257x257x32xf32>
flow.return %237 : tensor<1x257x257x32xf32>
}
%18 = tensor.empty() : tensor<1x257x257x16xf32>
%19 = linalg.fill ins(%cst_14 : f32) outs(%18 : tensor<1x257x257x16xf32>) -> tensor<1x257x257x16xf32>
%20 = flow.dispatch.region -> (tensor<1x257x257x16xf32>) {
%237 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%17, %cst_72 : tensor<1x257x257x32xf32>, tensor<1x1x32x16xf32>) outs(%19 : tensor<1x257x257x16xf32>) -> tensor<1x257x257x16xf32>
%238 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%237, %cst_28 : tensor<1x257x257x16xf32>, tensor<16xf32>) outs(%18 : tensor<1x257x257x16xf32>) {
^bb0(%in: f32, %in_229: f32, %out: f32):
%239 = arith.addf %in, %in_229 : f32
%240 = arith.divf %239, %cst_9 : f32
%241 = math.round %240 : f32
%242 = arith.addf %241, %cst_14 : f32
%243 = arith.cmpf ult, %242, %cst_16 : f32
%244 = arith.cmpf ugt, %242, %cst_15 : f32
%245 = arith.select %243, %cst_16, %242 : f32
%246 = arith.select %244, %cst_15, %245 : f32
%247 = arith.fptosi %246 : f32 to i8
%248 = arith.extsi %247 : i8 to i32
%249 = arith.sitofp %248 : i32 to f32
%250 = arith.mulf %249, %cst_9 : f32
linalg.yield %250 : f32
} -> tensor<1x257x257x16xf32>
flow.return %238 : tensor<1x257x257x16xf32>
}
%21 = tensor.empty() : tensor<1x257x257x96xf32>
%22 = linalg.fill ins(%cst_14 : f32) outs(%21 : tensor<1x257x257x96xf32>) -> tensor<1x257x257x96xf32>
%23 = flow.dispatch.region -> (tensor<1x257x257x96xf32>) {
%237 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%20, %cst_73 : tensor<1x257x257x16xf32>, tensor<1x1x16x96xf32>) outs(%22 : tensor<1x257x257x96xf32>) -> tensor<1x257x257x96xf32>
%238 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%237, %cst_18 : tensor<1x257x257x96xf32>, tensor<96xf32>) outs(%21 : tensor<1x257x257x96xf32>) {
^bb0(%in: f32, %in_229: f32, %out: f32):
%239 = arith.addf %in, %in_229 : f32
%240 = arith.cmpf ult, %239, %cst_14 : f32
%241 = arith.select %240, %cst_14, %239 : f32
%242 = arith.cmpf ugt, %241, %cst_2 : f32
%243 = arith.select %242, %cst_2, %241 : f32
%244 = arith.divf %243, %cst_10 : f32
%245 = math.round %244 : f32
%246 = arith.addf %245, %cst_14 : f32
%247 = arith.cmpf ult, %246, %cst_16 : f32
%248 = arith.cmpf ugt, %246, %cst_15 : f32
%249 = arith.select %247, %cst_16, %246 : f32
%250 = arith.select %248, %cst_15, %249 : f32
%251 = arith.fptosi %250 : f32 to i8
%252 = arith.extsi %251 : i8 to i32
%253 = arith.sitofp %252 : i32 to f32
%254 = arith.mulf %253, %cst_10 : f32
linalg.yield %254 : f32
} -> tensor<1x257x257x96xf32>
flow.return %238 : tensor<1x257x257x96xf32>
}
%collapsed_115 = tensor.collapse_shape %23 [[0, 1], [2], [3]] : tensor<1x257x257x96xf32> into tensor<257x257x96xf32>
%24 = tensor.empty() : tensor<259x259x96xf32>
%25 = linalg.fill ins(%cst_14 : f32) outs(%24 : tensor<259x259x96xf32>) -> tensor<259x259x96xf32>
%inserted_slice_116 = tensor.insert_slice %collapsed_115 into %25[1, 1, 0] [257, 257, 96] [1, 1, 1] : tensor<257x257x96xf32> into tensor<259x259x96xf32>
%expanded_117 = tensor.expand_shape %inserted_slice_116 [[0], [1], [2, 3]] output_shape [259, 259, 1, 96] : tensor<259x259x96xf32> into tensor<259x259x1x96xf32>
%26 = tensor.empty() : tensor<1x96x259x259xf32>
%27 = flow.dispatch.region -> (tensor<1x96x259x259xf32>) {
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_117 : tensor<259x259x1x96xf32>) outs(%26 : tensor<1x96x259x259xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<1x96x259x259xf32>
flow.return %237 : tensor<1x96x259x259xf32>
}
%28 = tensor.empty() : tensor<1x96x129x129xf32>
%29 = linalg.fill ins(%cst_14 : f32) outs(%28 : tensor<1x96x129x129xf32>) -> tensor<1x96x129x129xf32>
%30 = flow.dispatch.region -> (tensor<1x96x129x129xf32>) {
%237 = linalg.depthwise_conv_2d_nchw_chw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%27, %cst_29 : tensor<1x96x259x259xf32>, tensor<96x3x3xf32>) outs(%29 : tensor<1x96x129x129xf32>) -> tensor<1x96x129x129xf32>
flow.return %237 : tensor<1x96x129x129xf32>
}
%31 = tensor.empty() : tensor<129x129x1x96xf32>
%32 = flow.dispatch.region -> (tensor<129x129x1x96xf32>) {
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%30 : tensor<1x96x129x129xf32>) outs(%31 : tensor<129x129x1x96xf32>) {
^bb0(%in: f32, %out: f32):
%238 = arith.cmpf ult, %in, %cst_14 : f32
%239 = arith.select %238, %cst_14, %in : f32
%240 = arith.cmpf ugt, %239, %cst_2 : f32
%241 = arith.select %240, %cst_2, %239 : f32
linalg.yield %241 : f32
} -> tensor<129x129x1x96xf32>
flow.return %237 : tensor<129x129x1x96xf32>
}
%collapsed_118 = tensor.collapse_shape %32 [[0], [1], [2, 3]] : tensor<129x129x1x96xf32> into tensor<129x129x96xf32>
%expanded_119 = tensor.expand_shape %collapsed_118 [[0, 1], [2], [3]] output_shape [1, 129, 129, 96] : tensor<129x129x96xf32> into tensor<1x129x129x96xf32>
%33 = tensor.empty() : tensor<1x129x129x96xf32>
%34 = flow.dispatch.region -> (tensor<1x129x129x96xf32>) {
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_119 : tensor<1x129x129x96xf32>) outs(%33 : tensor<1x129x129x96xf32>) {
^bb0(%in: f32, %out: f32):
%238 = arith.divf %in, %cst_10 : f32
%239 = math.round %238 : f32
%240 = arith.addf %239, %cst_14 : f32
%241 = arith.cmpf ult, %240, %cst_16 : f32
%242 = arith.cmpf ugt, %240, %cst_15 : f32
%243 = arith.select %241, %cst_16, %240 : f32
%244 = arith.select %242, %cst_15, %243 : f32
%245 = arith.fptosi %244 : f32 to i8
%246 = arith.extsi %245 : i8 to i32
%247 = arith.sitofp %246 : i32 to f32
%248 = arith.mulf %247, %cst_10 : f32
linalg.yield %248 : f32
} -> tensor<1x129x129x96xf32>
flow.return %237 : tensor<1x129x129x96xf32>
}
%35 = tensor.empty() : tensor<129x129x24xf32>
%36 = tensor.empty() : tensor<1x129x129x24xf32>
%37 = linalg.fill ins(%cst_14 : f32) outs(%36 : tensor<1x129x129x24xf32>) -> tensor<1x129x129x24xf32>
%38 = tensor.empty() : tensor<1x129x129x24xi8>
%39 = flow.dispatch.region -> (tensor<1x129x129x24xi8>) {
%237 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%34, %cst_74 : tensor<1x129x129x96xf32>, tensor<1x1x96x24xf32>) outs(%37 : tensor<1x129x129x24xf32>) -> tensor<1x129x129x24xf32>
%238 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%237, %cst_30 : tensor<1x129x129x24xf32>, tensor<24xf32>) outs(%38 : tensor<1x129x129x24xi8>) {
^bb0(%in: f32, %in_229: f32, %out: i8):
%239 = arith.addf %in, %in_229 : f32
%240 = arith.divf %239, %cst_9 : f32
%241 = math.round %240 : f32
%242 = arith.addf %241, %cst_14 : f32
%243 = arith.cmpf ult, %242, %cst_16 : f32
%244 = arith.cmpf ugt, %242, %cst_15 : f32
%245 = arith.select %243, %cst_16, %242 : f32
%246 = arith.select %244, %cst_15, %245 : f32
%247 = arith.fptosi %246 : f32 to i8
linalg.yield %247 : i8
} -> tensor<1x129x129x24xi8>
flow.return %238 : tensor<1x129x129x24xi8>
}
%collapsed_120 = tensor.collapse_shape %39 [[0, 1], [2], [3]] : tensor<1x129x129x24xi8> into tensor<129x129x24xi8>
%40 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%collapsed_120 : tensor<129x129x24xi8>) outs(%35 : tensor<129x129x24xf32>) {
^bb0(%in: i8, %out: f32):
%237 = arith.extsi %in : i8 to i32
%238 = arith.sitofp %237 : i32 to f32
%239 = arith.mulf %238, %cst_9 : f32
linalg.yield %239 : f32
} -> tensor<129x129x24xf32>
%expanded_121 = tensor.expand_shape %40 [[0, 1], [2], [3]] output_shape [1, 129, 129, 24] : tensor<129x129x24xf32> into tensor<1x129x129x24xf32>
%41 = tensor.empty() : tensor<1x129x129x144xf32>
%42 = linalg.fill ins(%cst_14 : f32) outs(%41 : tensor<1x129x129x144xf32>) -> tensor<1x129x129x144xf32>
%43 = flow.dispatch.region -> (tensor<1x129x129x144xf32>) {
%237 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%expanded_121, %cst_75 : tensor<1x129x129x24xf32>, tensor<1x1x24x144xf32>) outs(%42 : tensor<1x129x129x144xf32>) -> tensor<1x129x129x144xf32>
%238 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%237, %cst_21 : tensor<1x129x129x144xf32>, tensor<144xf32>) outs(%41 : tensor<1x129x129x144xf32>) {
^bb0(%in: f32, %in_229: f32, %out: f32):
%239 = arith.addf %in, %in_229 : f32
%240 = arith.cmpf ult, %239, %cst_14 : f32
%241 = arith.select %240, %cst_14, %239 : f32
%242 = arith.cmpf ugt, %241, %cst_2 : f32
%243 = arith.select %242, %cst_2, %241 : f32
%244 = arith.divf %243, %cst_10 : f32
%245 = math.round %244 : f32
%246 = arith.addf %245, %cst_14 : f32
%247 = arith.cmpf ult, %246, %cst_16 : f32
%248 = arith.cmpf ugt, %246, %cst_15 : f32
%249 = arith.select %247, %cst_16, %246 : f32
%250 = arith.select %248, %cst_15, %249 : f32
%251 = arith.fptosi %250 : f32 to i8
%252 = arith.extsi %251 : i8 to i32
%253 = arith.sitofp %252 : i32 to f32
%254 = arith.mulf %253, %cst_10 : f32
linalg.yield %254 : f32
} -> tensor<1x129x129x144xf32>
flow.return %238 : tensor<1x129x129x144xf32>
}
%collapsed_122 = tensor.collapse_shape %43 [[0, 1], [2], [3]] : tensor<1x129x129x144xf32> into tensor<129x129x144xf32>
%44 = tensor.empty() : tensor<131x131x144xf32>
%45 = linalg.fill ins(%cst_14 : f32) outs(%44 : tensor<131x131x144xf32>) -> tensor<131x131x144xf32>
%inserted_slice_123 = tensor.insert_slice %collapsed_122 into %45[1, 1, 0] [129, 129, 144] [1, 1, 1] : tensor<129x129x144xf32> into tensor<131x131x144xf32>
%expanded_124 = tensor.expand_shape %inserted_slice_123 [[0], [1], [2, 3]] output_shape [131, 131, 1, 144] : tensor<131x131x144xf32> into tensor<131x131x1x144xf32>
%46 = tensor.empty() : tensor<1x144x131x131xf32>
%47 = flow.dispatch.region -> (tensor<1x144x131x131xf32>) {
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_124 : tensor<131x131x1x144xf32>) outs(%46 : tensor<1x144x131x131xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<1x144x131x131xf32>
flow.return %237 : tensor<1x144x131x131xf32>
}
%48 = tensor.empty() : tensor<1x144x129x129xf32>
%49 = linalg.fill ins(%cst_14 : f32) outs(%48 : tensor<1x144x129x129xf32>) -> tensor<1x144x129x129xf32>
%50 = flow.dispatch.region -> (tensor<1x144x129x129xf32>) {
%237 = linalg.depthwise_conv_2d_nchw_chw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%47, %cst_50 : tensor<1x144x131x131xf32>, tensor<144x3x3xf32>) outs(%49 : tensor<1x144x129x129xf32>) -> tensor<1x144x129x129xf32>
flow.return %237 : tensor<1x144x129x129xf32>
}
%51 = tensor.empty() : tensor<129x129x1x144xf32>
%52 = flow.dispatch.region -> (tensor<129x129x1x144xf32>) {
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%50 : tensor<1x144x129x129xf32>) outs(%51 : tensor<129x129x1x144xf32>) {
^bb0(%in: f32, %out: f32):
%238 = arith.cmpf ult, %in, %cst_14 : f32
%239 = arith.select %238, %cst_14, %in : f32
%240 = arith.cmpf ugt, %239, %cst_2 : f32
%241 = arith.select %240, %cst_2, %239 : f32
linalg.yield %241 : f32
} -> tensor<129x129x1x144xf32>
flow.return %237 : tensor<129x129x1x144xf32>
}
%collapsed_125 = tensor.collapse_shape %52 [[0], [1], [2, 3]] : tensor<129x129x1x144xf32> into tensor<129x129x144xf32>
%expanded_126 = tensor.expand_shape %collapsed_125 [[0, 1], [2], [3]] output_shape [1, 129, 129, 144] : tensor<129x129x144xf32> into tensor<1x129x129x144xf32>
%53 = flow.dispatch.region -> (tensor<1x129x129x144xf32>) {
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_126 : tensor<1x129x129x144xf32>) outs(%41 : tensor<1x129x129x144xf32>) {
^bb0(%in: f32, %out: f32):
%238 = arith.divf %in, %cst_10 : f32
%239 = math.round %238 : f32
%240 = arith.addf %239, %cst_14 : f32
%241 = arith.cmpf ult, %240, %cst_16 : f32
%242 = arith.cmpf ugt, %240, %cst_15 : f32
%243 = arith.select %241, %cst_16, %240 : f32
%244 = arith.select %242, %cst_15, %243 : f32
%245 = arith.fptosi %244 : f32 to i8
%246 = arith.extsi %245 : i8 to i32
%247 = arith.sitofp %246 : i32 to f32
%248 = arith.mulf %247, %cst_10 : f32
linalg.yield %248 : f32
} -> tensor<1x129x129x144xf32>
flow.return %237 : tensor<1x129x129x144xf32>
}
%54 = flow.dispatch.region -> (tensor<1x129x129x24xf32>) {
%237 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%53, %cst_76 : tensor<1x129x129x144xf32>, tensor<1x1x144x24xf32>) outs(%37 : tensor<1x129x129x24xf32>) -> tensor<1x129x129x24xf32>
%238 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_121, %237, %cst_51 : tensor<1x129x129x24xf32>, tensor<1x129x129x24xf32>, tensor<24xf32>) outs(%36 : tensor<1x129x129x24xf32>) {
^bb0(%in: f32, %in_229: f32, %in_230: f32, %out: f32):
%239 = arith.addf %in_229, %in_230 : f32
%240 = arith.divf %239, %cst_9 : f32
%241 = math.round %240 : f32
%242 = arith.addf %241, %cst_14 : f32
%243 = arith.cmpf ult, %242, %cst_16 : f32
%244 = arith.cmpf ugt, %242, %cst_15 : f32
%245 = arith.select %243, %cst_16, %242 : f32
%246 = arith.select %244, %cst_15, %245 : f32
%247 = arith.fptosi %246 : f32 to i8
%248 = arith.extsi %247 : i8 to i32
%249 = arith.sitofp %248 : i32 to f32
%250 = arith.mulf %249, %cst_9 : f32
%251 = arith.addf %in, %250 : f32
%252 = arith.divf %251, %cst_12 : f32
%253 = math.round %252 : f32
%254 = arith.addf %253, %cst_14 : f32
%255 = arith.cmpf ult, %254, %cst_16 : f32
%256 = arith.cmpf ugt, %254, %cst_15 : f32
%257 = arith.select %255, %cst_16, %254 : f32
%258 = arith.select %256, %cst_15, %257 : f32
%259 = arith.fptosi %258 : f32 to i8
%260 = arith.extsi %259 : i8 to i32
%261 = arith.sitofp %260 : i32 to f32
%262 = arith.mulf %261, %cst_12 : f32
linalg.yield %262 : f32
} -> tensor<1x129x129x24xf32>
flow.return %238 : tensor<1x129x129x24xf32>
}
%55 = flow.dispatch.region -> (tensor<1x129x129x144xf32>) {
%237 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%54, %cst_77 : tensor<1x129x129x24xf32>, tensor<1x1x24x144xf32>) outs(%42 : tensor<1x129x129x144xf32>) -> tensor<1x129x129x144xf32>
%238 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%237, %cst_53 : tensor<1x129x129x144xf32>, tensor<144xf32>) outs(%41 : tensor<1x129x129x144xf32>) {
^bb0(%in: f32, %in_229: f32, %out: f32):
%239 = arith.addf %in, %in_229 : f32
%240 = arith.cmpf ult, %239, %cst_14 : f32
%241 = arith.select %240, %cst_14, %239 : f32
%242 = arith.cmpf ugt, %241, %cst_2 : f32
%243 = arith.select %242, %cst_2, %241 : f32
%244 = arith.divf %243, %cst_10 : f32
%245 = math.round %244 : f32
%246 = arith.addf %245, %cst_14 : f32
%247 = arith.cmpf ult, %246, %cst_16 : f32
%248 = arith.cmpf ugt, %246, %cst_15 : f32
%249 = arith.select %247, %cst_16, %246 : f32
%250 = arith.select %248, %cst_15, %249 : f32
%251 = arith.fptosi %250 : f32 to i8
%252 = arith.extsi %251 : i8 to i32
%253 = arith.sitofp %252 : i32 to f32
%254 = arith.mulf %253, %cst_10 : f32
linalg.yield %254 : f32
} -> tensor<1x129x129x144xf32>
flow.return %238 : tensor<1x129x129x144xf32>
}
%collapsed_127 = tensor.collapse_shape %55 [[0, 1], [2], [3]] : tensor<1x129x129x144xf32> into tensor<129x129x144xf32>
%inserted_slice_128 = tensor.insert_slice %collapsed_127 into %45[1, 1, 0] [129, 129, 144] [1, 1, 1] : tensor<129x129x144xf32> into tensor<131x131x144xf32>
%expanded_129 = tensor.expand_shape %inserted_slice_128 [[0], [1], [2, 3]] output_shape [131, 131, 1, 144] : tensor<131x131x144xf32> into tensor<131x131x1x144xf32>
%56 = flow.dispatch.region -> (tensor<1x144x131x131xf32>) {
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_129 : tensor<131x131x1x144xf32>) outs(%46 : tensor<1x144x131x131xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<1x144x131x131xf32>
flow.return %237 : tensor<1x144x131x131xf32>
}
%57 = tensor.empty() : tensor<1x144x65x65xf32>
%58 = linalg.fill ins(%cst_14 : f32) outs(%57 : tensor<1x144x65x65xf32>) -> tensor<1x144x65x65xf32>
%59 = flow.dispatch.region -> (tensor<1x144x65x65xf32>) {
%237 = linalg.depthwise_conv_2d_nchw_chw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%56, %cst_52 : tensor<1x144x131x131xf32>, tensor<144x3x3xf32>) outs(%58 : tensor<1x144x65x65xf32>) -> tensor<1x144x65x65xf32>
flow.return %237 : tensor<1x144x65x65xf32>
}
%60 = tensor.empty() : tensor<65x65x1x144xf32>
%61 = flow.dispatch.region -> (tensor<65x65x1x144xf32>) {
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%59 : tensor<1x144x65x65xf32>) outs(%60 : tensor<65x65x1x144xf32>) {
^bb0(%in: f32, %out: f32):
%238 = arith.cmpf ult, %in, %cst_14 : f32
%239 = arith.select %238, %cst_14, %in : f32
%240 = arith.cmpf ugt, %239, %cst_2 : f32
%241 = arith.select %240, %cst_2, %239 : f32
linalg.yield %241 : f32
} -> tensor<65x65x1x144xf32>
flow.return %237 : tensor<65x65x1x144xf32>
}
%collapsed_130 = tensor.collapse_shape %61 [[0], [1], [2, 3]] : tensor<65x65x1x144xf32> into tensor<65x65x144xf32>
%expanded_131 = tensor.expand_shape %collapsed_130 [[0, 1], [2], [3]] output_shape [1, 65, 65, 144] : tensor<65x65x144xf32> into tensor<1x65x65x144xf32>
%62 = tensor.empty() : tensor<1x65x65x144xf32>
%63 = flow.dispatch.region -> (tensor<1x65x65x144xf32>) {
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_131 : tensor<1x65x65x144xf32>) outs(%62 : tensor<1x65x65x144xf32>) {
^bb0(%in: f32, %out: f32):
%238 = arith.divf %in, %cst_10 : f32
%239 = math.round %238 : f32
%240 = arith.addf %239, %cst_14 : f32
%241 = arith.cmpf ult, %240, %cst_16 : f32
%242 = arith.cmpf ugt, %240, %cst_15 : f32
%243 = arith.select %241, %cst_16, %240 : f32
%244 = arith.select %242, %cst_15, %243 : f32
%245 = arith.fptosi %244 : f32 to i8
%246 = arith.extsi %245 : i8 to i32
%247 = arith.sitofp %246 : i32 to f32
%248 = arith.mulf %247, %cst_10 : f32
linalg.yield %248 : f32
} -> tensor<1x65x65x144xf32>
flow.return %237 : tensor<1x65x65x144xf32>
}
%64 = tensor.empty() : tensor<65x65x32xf32>
%65 = tensor.empty() : tensor<1x65x65x32xf32>
%66 = linalg.fill ins(%cst_14 : f32) outs(%65 : tensor<1x65x65x32xf32>) -> tensor<1x65x65x32xf32>
%67 = tensor.empty() : tensor<1x65x65x32xi8>
%68 = flow.dispatch.region -> (tensor<1x65x65x32xi8>) {
%237 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%63, %cst_78 : tensor<1x65x65x144xf32>, tensor<1x1x144x32xf32>) outs(%66 : tensor<1x65x65x32xf32>) -> tensor<1x65x65x32xf32>
%238 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%237, %cst_54 : tensor<1x65x65x32xf32>, tensor<32xf32>) outs(%67 : tensor<1x65x65x32xi8>) {
^bb0(%in: f32, %in_229: f32, %out: i8):
%239 = arith.addf %in, %in_229 : f32
%240 = arith.divf %239, %cst_9 : f32
%241 = math.round %240 : f32
%242 = arith.addf %241, %cst_14 : f32
%243 = arith.cmpf ult, %242, %cst_16 : f32
%244 = arith.cmpf ugt, %242, %cst_15 : f32
%245 = arith.select %243, %cst_16, %242 : f32
%246 = arith.select %244, %cst_15, %245 : f32
%247 = arith.fptosi %246 : f32 to i8
linalg.yield %247 : i8
} -> tensor<1x65x65x32xi8>
flow.return %238 : tensor<1x65x65x32xi8>
}
%collapsed_132 = tensor.collapse_shape %68 [[0, 1], [2], [3]] : tensor<1x65x65x32xi8> into tensor<65x65x32xi8>
%69 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%collapsed_132 : tensor<65x65x32xi8>) outs(%64 : tensor<65x65x32xf32>) {
^bb0(%in: i8, %out: f32):
%237 = arith.extsi %in : i8 to i32
%238 = arith.sitofp %237 : i32 to f32
%239 = arith.mulf %238, %cst_9 : f32
linalg.yield %239 : f32
} -> tensor<65x65x32xf32>
%expanded_133 = tensor.expand_shape %69 [[0, 1], [2], [3]] output_shape [1, 65, 65, 32] : tensor<65x65x32xf32> into tensor<1x65x65x32xf32>
%70 = tensor.empty() : tensor<1x65x65x192xf32>
%71 = linalg.fill ins(%cst_14 : f32) outs(%70 : tensor<1x65x65x192xf32>) -> tensor<1x65x65x192xf32>
%72 = flow.dispatch.region -> (tensor<1x65x65x192xf32>) {
%237 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%expanded_133, %cst_79 : tensor<1x65x65x32xf32>, tensor<1x1x32x192xf32>) outs(%71 : tensor<1x65x65x192xf32>) -> tensor<1x65x65x192xf32>
%238 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%237, %cst_22 : tensor<1x65x65x192xf32>, tensor<192xf32>) outs(%70 : tensor<1x65x65x192xf32>) {
^bb0(%in: f32, %in_229: f32, %out: f32):
%239 = arith.addf %in, %in_229 : f32
%240 = arith.cmpf ult, %239, %cst_14 : f32
%241 = arith.select %240, %cst_14, %239 : f32
%242 = arith.cmpf ugt, %241, %cst_2 : f32
%243 = arith.select %242, %cst_2, %241 : f32
%244 = arith.divf %243, %cst_10 : f32
%245 = math.round %244 : f32
%246 = arith.addf %245, %cst_14 : f32
%247 = arith.cmpf ult, %246, %cst_16 : f32
%248 = arith.cmpf ugt, %246, %cst_15 : f32
%249 = arith.select %247, %cst_16, %246 : f32
%250 = arith.select %248, %cst_15, %249 : f32
%251 = arith.fptosi %250 : f32 to i8
%252 = arith.extsi %251 : i8 to i32
%253 = arith.sitofp %252 : i32 to f32
%254 = arith.mulf %253, %cst_10 : f32
linalg.yield %254 : f32
} -> tensor<1x65x65x192xf32>
flow.return %238 : tensor<1x65x65x192xf32>
}
%collapsed_134 = tensor.collapse_shape %72 [[0, 1], [2], [3]] : tensor<1x65x65x192xf32> into tensor<65x65x192xf32>
%73 = tensor.empty() : tensor<67x67x192xf32>
%74 = linalg.fill ins(%cst_14 : f32) outs(%73 : tensor<67x67x192xf32>) -> tensor<67x67x192xf32>
%inserted_slice_135 = tensor.insert_slice %collapsed_134 into %74[1, 1, 0] [65, 65, 192] [1, 1, 1] : tensor<65x65x192xf32> into tensor<67x67x192xf32>
%expanded_136 = tensor.expand_shape %inserted_slice_135 [[0], [1], [2, 3]] output_shape [67, 67, 1, 192] : tensor<67x67x192xf32> into tensor<67x67x1x192xf32>
%75 = tensor.empty() : tensor<1x192x67x67xf32>
%76 = flow.dispatch.region -> (tensor<1x192x67x67xf32>) {
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_136 : tensor<67x67x1x192xf32>) outs(%75 : tensor<1x192x67x67xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<1x192x67x67xf32>
flow.return %237 : tensor<1x192x67x67xf32>
}
%77 = tensor.empty() : tensor<1x192x65x65xf32>
%78 = linalg.fill ins(%cst_14 : f32) outs(%77 : tensor<1x192x65x65xf32>) -> tensor<1x192x65x65xf32>
%79 = flow.dispatch.region -> (tensor<1x192x65x65xf32>) {
%237 = linalg.depthwise_conv_2d_nchw_chw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%76, %cst_55 : tensor<1x192x67x67xf32>, tensor<192x3x3xf32>) outs(%78 : tensor<1x192x65x65xf32>) -> tensor<1x192x65x65xf32>
flow.return %237 : tensor<1x192x65x65xf32>
}
%80 = tensor.empty() : tensor<65x65x1x192xf32>
%81 = flow.dispatch.region -> (tensor<65x65x1x192xf32>) {
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%79 : tensor<1x192x65x65xf32>) outs(%80 : tensor<65x65x1x192xf32>) {
^bb0(%in: f32, %out: f32):
%238 = arith.cmpf ult, %in, %cst_14 : f32
%239 = arith.select %238, %cst_14, %in : f32
%240 = arith.cmpf ugt, %239, %cst_2 : f32
%241 = arith.select %240, %cst_2, %239 : f32
linalg.yield %241 : f32
} -> tensor<65x65x1x192xf32>
flow.return %237 : tensor<65x65x1x192xf32>
}
%collapsed_137 = tensor.collapse_shape %81 [[0], [1], [2, 3]] : tensor<65x65x1x192xf32> into tensor<65x65x192xf32>
%expanded_138 = tensor.expand_shape %collapsed_137 [[0, 1], [2], [3]] output_shape [1, 65, 65, 192] : tensor<65x65x192xf32> into tensor<1x65x65x192xf32>
%82 = flow.dispatch.region -> (tensor<1x65x65x192xf32>) {
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_138 : tensor<1x65x65x192xf32>) outs(%70 : tensor<1x65x65x192xf32>) {
^bb0(%in: f32, %out: f32):
%238 = arith.divf %in, %cst_10 : f32
%239 = math.round %238 : f32
%240 = arith.addf %239, %cst_14 : f32
%241 = arith.cmpf ult, %240, %cst_16 : f32
%242 = arith.cmpf ugt, %240, %cst_15 : f32
%243 = arith.select %241, %cst_16, %240 : f32
%244 = arith.select %242, %cst_15, %243 : f32
%245 = arith.fptosi %244 : f32 to i8
%246 = arith.extsi %245 : i8 to i32
%247 = arith.sitofp %246 : i32 to f32
%248 = arith.mulf %247, %cst_10 : f32
linalg.yield %248 : f32
} -> tensor<1x65x65x192xf32>
flow.return %237 : tensor<1x65x65x192xf32>
}
%83 = flow.dispatch.region -> (tensor<1x65x65x32xi8>) {
%237 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%82, %cst_80 : tensor<1x65x65x192xf32>, tensor<1x1x192x32xf32>) outs(%66 : tensor<1x65x65x32xf32>) -> tensor<1x65x65x32xf32>
%238 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_133, %237, %cst_56 : tensor<1x65x65x32xf32>, tensor<1x65x65x32xf32>, tensor<32xf32>) outs(%67 : tensor<1x65x65x32xi8>) {
^bb0(%in: f32, %in_229: f32, %in_230: f32, %out: i8):
%239 = arith.addf %in_229, %in_230 : f32
%240 = arith.divf %239, %cst_9 : f32
%241 = math.round %240 : f32
%242 = arith.addf %241, %cst_14 : f32
%243 = arith.cmpf ult, %242, %cst_16 : f32
%244 = arith.cmpf ugt, %242, %cst_15 : f32
%245 = arith.select %243, %cst_16, %242 : f32
%246 = arith.select %244, %cst_15, %245 : f32
%247 = arith.fptosi %246 : f32 to i8
%248 = arith.extsi %247 : i8 to i32
%249 = arith.sitofp %248 : i32 to f32
%250 = arith.mulf %249, %cst_9 : f32
%251 = arith.addf %in, %250 : f32
%252 = arith.divf %251, %cst_9 : f32
%253 = math.round %252 : f32
%254 = arith.addf %253, %cst_14 : f32
%255 = arith.cmpf ult, %254, %cst_16 : f32
%256 = arith.cmpf ugt, %254, %cst_15 : f32
%257 = arith.select %255, %cst_16, %254 : f32
%258 = arith.select %256, %cst_15, %257 : f32
%259 = arith.fptosi %258 : f32 to i8
linalg.yield %259 : i8
} -> tensor<1x65x65x32xi8>
flow.return %238 : tensor<1x65x65x32xi8>
}
%collapsed_139 = tensor.collapse_shape %83 [[0, 1], [2], [3]] : tensor<1x65x65x32xi8> into tensor<65x65x32xi8>
%84 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%collapsed_139 : tensor<65x65x32xi8>) outs(%64 : tensor<65x65x32xf32>) {
^bb0(%in: i8, %out: f32):
%237 = arith.extsi %in : i8 to i32
%238 = arith.sitofp %237 : i32 to f32
%239 = arith.mulf %238, %cst_9 : f32
linalg.yield %239 : f32
} -> tensor<65x65x32xf32>
%expanded_140 = tensor.expand_shape %84 [[0, 1], [2], [3]] output_shape [1, 65, 65, 32] : tensor<65x65x32xf32> into tensor<1x65x65x32xf32>
%85 = flow.dispatch.region -> (tensor<1x65x65x192xf32>) {
%237 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%expanded_140, %cst_81 : tensor<1x65x65x32xf32>, tensor<1x1x32x192xf32>) outs(%71 : tensor<1x65x65x192xf32>) -> tensor<1x65x65x192xf32>
%238 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%237, %cst_58 : tensor<1x65x65x192xf32>, tensor<192xf32>) outs(%70 : tensor<1x65x65x192xf32>) {
^bb0(%in: f32, %in_229: f32, %out: f32):
%239 = arith.addf %in, %in_229 : f32
%240 = arith.cmpf ult, %239, %cst_14 : f32
%241 = arith.select %240, %cst_14, %239 : f32
%242 = arith.cmpf ugt, %241, %cst_2 : f32
%243 = arith.select %242, %cst_2, %241 : f32
%244 = arith.divf %243, %cst_10 : f32
%245 = math.round %244 : f32
%246 = arith.addf %245, %cst_14 : f32
%247 = arith.cmpf ult, %246, %cst_16 : f32
%248 = arith.cmpf ugt, %246, %cst_15 : f32
%249 = arith.select %247, %cst_16, %246 : f32
%250 = arith.select %248, %cst_15, %249 : f32
%251 = arith.fptosi %250 : f32 to i8
%252 = arith.extsi %251 : i8 to i32
%253 = arith.sitofp %252 : i32 to f32
%254 = arith.mulf %253, %cst_10 : f32
linalg.yield %254 : f32
} -> tensor<1x65x65x192xf32>
flow.return %238 : tensor<1x65x65x192xf32>
}
%collapsed_141 = tensor.collapse_shape %85 [[0, 1], [2], [3]] : tensor<1x65x65x192xf32> into tensor<65x65x192xf32>
%inserted_slice_142 = tensor.insert_slice %collapsed_141 into %74[1, 1, 0] [65, 65, 192] [1, 1, 1] : tensor<65x65x192xf32> into tensor<67x67x192xf32>
%expanded_143 = tensor.expand_shape %inserted_slice_142 [[0], [1], [2, 3]] output_shape [67, 67, 1, 192] : tensor<67x67x192xf32> into tensor<67x67x1x192xf32>
%86 = flow.dispatch.region -> (tensor<1x192x67x67xf32>) {
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_143 : tensor<67x67x1x192xf32>) outs(%75 : tensor<1x192x67x67xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<1x192x67x67xf32>
flow.return %237 : tensor<1x192x67x67xf32>
}
%87 = flow.dispatch.region -> (tensor<1x192x65x65xf32>) {
%237 = linalg.depthwise_conv_2d_nchw_chw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%86, %cst_57 : tensor<1x192x67x67xf32>, tensor<192x3x3xf32>) outs(%78 : tensor<1x192x65x65xf32>) -> tensor<1x192x65x65xf32>
flow.return %237 : tensor<1x192x65x65xf32>
}
%88 = flow.dispatch.region -> (tensor<65x65x1x192xf32>) {
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%87 : tensor<1x192x65x65xf32>) outs(%80 : tensor<65x65x1x192xf32>) {
^bb0(%in: f32, %out: f32):
%238 = arith.cmpf ult, %in, %cst_14 : f32
%239 = arith.select %238, %cst_14, %in : f32
%240 = arith.cmpf ugt, %239, %cst_2 : f32
%241 = arith.select %240, %cst_2, %239 : f32
linalg.yield %241 : f32
} -> tensor<65x65x1x192xf32>
flow.return %237 : tensor<65x65x1x192xf32>
}
%collapsed_144 = tensor.collapse_shape %88 [[0], [1], [2, 3]] : tensor<65x65x1x192xf32> into tensor<65x65x192xf32>
%expanded_145 = tensor.expand_shape %collapsed_144 [[0, 1], [2], [3]] output_shape [1, 65, 65, 192] : tensor<65x65x192xf32> into tensor<1x65x65x192xf32>
%89 = flow.dispatch.region -> (tensor<1x65x65x192xf32>) {
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_145 : tensor<1x65x65x192xf32>) outs(%70 : tensor<1x65x65x192xf32>) {
^bb0(%in: f32, %out: f32):
%238 = arith.divf %in, %cst_10 : f32
%239 = math.round %238 : f32
%240 = arith.addf %239, %cst_14 : f32
%241 = arith.cmpf ult, %240, %cst_16 : f32
%242 = arith.cmpf ugt, %240, %cst_15 : f32
%243 = arith.select %241, %cst_16, %240 : f32
%244 = arith.select %242, %cst_15, %243 : f32
%245 = arith.fptosi %244 : f32 to i8
%246 = arith.extsi %245 : i8 to i32
%247 = arith.sitofp %246 : i32 to f32
%248 = arith.mulf %247, %cst_10 : f32
linalg.yield %248 : f32
} -> tensor<1x65x65x192xf32>
flow.return %237 : tensor<1x65x65x192xf32>
}
%90 = flow.dispatch.region -> (tensor<1x65x65x32xf32>) {
%237 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%89, %cst_82 : tensor<1x65x65x192xf32>, tensor<1x1x192x32xf32>) outs(%66 : tensor<1x65x65x32xf32>) -> tensor<1x65x65x32xf32>
%238 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_140, %237, %cst_59 : tensor<1x65x65x32xf32>, tensor<1x65x65x32xf32>, tensor<32xf32>) outs(%65 : tensor<1x65x65x32xf32>) {
^bb0(%in: f32, %in_229: f32, %in_230: f32, %out: f32):
%239 = arith.addf %in_229, %in_230 : f32
%240 = arith.divf %239, %cst_9 : f32
%241 = math.round %240 : f32
%242 = arith.addf %241, %cst_14 : f32
%243 = arith.cmpf ult, %242, %cst_16 : f32
%244 = arith.cmpf ugt, %242, %cst_15 : f32
%245 = arith.select %243, %cst_16, %242 : f32
%246 = arith.select %244, %cst_15, %245 : f32
%247 = arith.fptosi %246 : f32 to i8
%248 = arith.extsi %247 : i8 to i32
%249 = arith.sitofp %248 : i32 to f32
%250 = arith.mulf %249, %cst_9 : f32
%251 = arith.addf %in, %250 : f32
%252 = arith.divf %251, %cst_9 : f32
%253 = math.round %252 : f32
%254 = arith.addf %253, %cst_14 : f32
%255 = arith.cmpf ult, %254, %cst_16 : f32
%256 = arith.cmpf ugt, %254, %cst_15 : f32
%257 = arith.select %255, %cst_16, %254 : f32
%258 = arith.select %256, %cst_15, %257 : f32
%259 = arith.fptosi %258 : f32 to i8
%260 = arith.extsi %259 : i8 to i32
%261 = arith.sitofp %260 : i32 to f32
%262 = arith.mulf %261, %cst_9 : f32
linalg.yield %262 : f32
} -> tensor<1x65x65x32xf32>
flow.return %238 : tensor<1x65x65x32xf32>
}
%91 = flow.dispatch.region -> (tensor<1x65x65x192xf32>) {
%237 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%90, %cst_83 : tensor<1x65x65x32xf32>, tensor<1x1x32x192xf32>) outs(%71 : tensor<1x65x65x192xf32>) -> tensor<1x65x65x192xf32>
%238 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%237, %cst_61 : tensor<1x65x65x192xf32>, tensor<192xf32>) outs(%70 : tensor<1x65x65x192xf32>) {
^bb0(%in: f32, %in_229: f32, %out: f32):
%239 = arith.addf %in, %in_229 : f32
%240 = arith.cmpf ult, %239, %cst_14 : f32
%241 = arith.select %240, %cst_14, %239 : f32
%242 = arith.cmpf ugt, %241, %cst_2 : f32
%243 = arith.select %242, %cst_2, %241 : f32
%244 = arith.divf %243, %cst_10 : f32
%245 = math.round %244 : f32
%246 = arith.addf %245, %cst_14 : f32
%247 = arith.cmpf ult, %246, %cst_16 : f32
%248 = arith.cmpf ugt, %246, %cst_15 : f32
%249 = arith.select %247, %cst_16, %246 : f32
%250 = arith.select %248, %cst_15, %249 : f32
%251 = arith.fptosi %250 : f32 to i8
%252 = arith.extsi %251 : i8 to i32
%253 = arith.sitofp %252 : i32 to f32
%254 = arith.mulf %253, %cst_10 : f32
linalg.yield %254 : f32
} -> tensor<1x65x65x192xf32>
flow.return %238 : tensor<1x65x65x192xf32>
}
%collapsed_146 = tensor.collapse_shape %91 [[0, 1], [2], [3]] : tensor<1x65x65x192xf32> into tensor<65x65x192xf32>
%inserted_slice_147 = tensor.insert_slice %collapsed_146 into %74[1, 1, 0] [65, 65, 192] [1, 1, 1] : tensor<65x65x192xf32> into tensor<67x67x192xf32>
%expanded_148 = tensor.expand_shape %inserted_slice_147 [[0], [1], [2, 3]] output_shape [67, 67, 1, 192] : tensor<67x67x192xf32> into tensor<67x67x1x192xf32>
%92 = flow.dispatch.region -> (tensor<1x192x67x67xf32>) {
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_148 : tensor<67x67x1x192xf32>) outs(%75 : tensor<1x192x67x67xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<1x192x67x67xf32>
flow.return %237 : tensor<1x192x67x67xf32>
}
%93 = flow.dispatch.region -> (tensor<1x192x65x65xf32>) {
%237 = linalg.depthwise_conv_2d_nchw_chw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%92, %cst_60 : tensor<1x192x67x67xf32>, tensor<192x3x3xf32>) outs(%78 : tensor<1x192x65x65xf32>) -> tensor<1x192x65x65xf32>
flow.return %237 : tensor<1x192x65x65xf32>
}
%94 = flow.dispatch.region -> (tensor<65x65x1x192xf32>) {
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%93 : tensor<1x192x65x65xf32>) outs(%80 : tensor<65x65x1x192xf32>) {
^bb0(%in: f32, %out: f32):
%238 = arith.cmpf ult, %in, %cst_14 : f32
%239 = arith.select %238, %cst_14, %in : f32
%240 = arith.cmpf ugt, %239, %cst_2 : f32
%241 = arith.select %240, %cst_2, %239 : f32
linalg.yield %241 : f32
} -> tensor<65x65x1x192xf32>
flow.return %237 : tensor<65x65x1x192xf32>
}
%collapsed_149 = tensor.collapse_shape %94 [[0], [1], [2, 3]] : tensor<65x65x1x192xf32> into tensor<65x65x192xf32>
%expanded_150 = tensor.expand_shape %collapsed_149 [[0, 1], [2], [3]] output_shape [1, 65, 65, 192] : tensor<65x65x192xf32> into tensor<1x65x65x192xf32>
%95 = flow.dispatch.region -> (tensor<1x65x65x192xf32>) {
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_150 : tensor<1x65x65x192xf32>) outs(%70 : tensor<1x65x65x192xf32>) {
^bb0(%in: f32, %out: f32):
%238 = arith.divf %in, %cst_10 : f32
%239 = math.round %238 : f32
%240 = arith.addf %239, %cst_14 : f32
%241 = arith.cmpf ult, %240, %cst_16 : f32
%242 = arith.cmpf ugt, %240, %cst_15 : f32
%243 = arith.select %241, %cst_16, %240 : f32
%244 = arith.select %242, %cst_15, %243 : f32
%245 = arith.fptosi %244 : f32 to i8
%246 = arith.extsi %245 : i8 to i32
%247 = arith.sitofp %246 : i32 to f32
%248 = arith.mulf %247, %cst_10 : f32
linalg.yield %248 : f32
} -> tensor<1x65x65x192xf32>
flow.return %237 : tensor<1x65x65x192xf32>
}
%96 = tensor.empty() : tensor<65x65x64xf32>
%97 = tensor.empty() : tensor<1x65x65x64xf32>
%98 = linalg.fill ins(%cst_14 : f32) outs(%97 : tensor<1x65x65x64xf32>) -> tensor<1x65x65x64xf32>
%99 = tensor.empty() : tensor<1x65x65x64xi8>
%100 = flow.dispatch.region -> (tensor<1x65x65x64xi8>) {
%237 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%95, %cst_84 : tensor<1x65x65x192xf32>, tensor<1x1x192x64xf32>) outs(%98 : tensor<1x65x65x64xf32>) -> tensor<1x65x65x64xf32>
%238 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%237, %cst_62 : tensor<1x65x65x64xf32>, tensor<64xf32>) outs(%99 : tensor<1x65x65x64xi8>) {
^bb0(%in: f32, %in_229: f32, %out: i8):
%239 = arith.addf %in, %in_229 : f32
%240 = arith.divf %239, %cst_8 : f32
%241 = math.round %240 : f32
%242 = arith.addf %241, %cst_14 : f32
%243 = arith.cmpf ult, %242, %cst_16 : f32
%244 = arith.cmpf ugt, %242, %cst_15 : f32
%245 = arith.select %243, %cst_16, %242 : f32
%246 = arith.select %244, %cst_15, %245 : f32
%247 = arith.fptosi %246 : f32 to i8
linalg.yield %247 : i8
} -> tensor<1x65x65x64xi8>
flow.return %238 : tensor<1x65x65x64xi8>
}
%collapsed_151 = tensor.collapse_shape %100 [[0, 1], [2], [3]] : tensor<1x65x65x64xi8> into tensor<65x65x64xi8>
%101 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%collapsed_151 : tensor<65x65x64xi8>) outs(%96 : tensor<65x65x64xf32>) {
^bb0(%in: i8, %out: f32):
%237 = arith.extsi %in : i8 to i32
%238 = arith.sitofp %237 : i32 to f32
%239 = arith.mulf %238, %cst_8 : f32
linalg.yield %239 : f32
} -> tensor<65x65x64xf32>
%expanded_152 = tensor.expand_shape %101 [[0, 1], [2], [3]] output_shape [1, 65, 65, 64] : tensor<65x65x64xf32> into tensor<1x65x65x64xf32>
%102 = tensor.empty() : tensor<1x65x65x384xf32>
%103 = linalg.fill ins(%cst_14 : f32) outs(%102 : tensor<1x65x65x384xf32>) -> tensor<1x65x65x384xf32>
%104 = flow.dispatch.region -> (tensor<1x65x65x384xf32>) {
%237 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%expanded_152, %cst_85 : tensor<1x65x65x64xf32>, tensor<1x1x64x384xf32>) outs(%103 : tensor<1x65x65x384xf32>) -> tensor<1x65x65x384xf32>
%238 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%237, %cst_23 : tensor<1x65x65x384xf32>, tensor<384xf32>) outs(%102 : tensor<1x65x65x384xf32>) {
^bb0(%in: f32, %in_229: f32, %out: f32):
%239 = arith.addf %in, %in_229 : f32
%240 = arith.cmpf ult, %239, %cst_14 : f32
%241 = arith.select %240, %cst_14, %239 : f32
%242 = arith.cmpf ugt, %241, %cst_2 : f32
%243 = arith.select %242, %cst_2, %241 : f32
%244 = arith.divf %243, %cst_10 : f32
%245 = math.round %244 : f32
%246 = arith.addf %245, %cst_14 : f32
%247 = arith.cmpf ult, %246, %cst_16 : f32
%248 = arith.cmpf ugt, %246, %cst_15 : f32
%249 = arith.select %247, %cst_16, %246 : f32
%250 = arith.select %248, %cst_15, %249 : f32
%251 = arith.fptosi %250 : f32 to i8
%252 = arith.extsi %251 : i8 to i32
%253 = arith.sitofp %252 : i32 to f32
%254 = arith.mulf %253, %cst_10 : f32
linalg.yield %254 : f32
} -> tensor<1x65x65x384xf32>
flow.return %238 : tensor<1x65x65x384xf32>
}
%collapsed_153 = tensor.collapse_shape %104 [[0, 1], [2], [3]] : tensor<1x65x65x384xf32> into tensor<65x65x384xf32>
%105 = tensor.empty() : tensor<69x69x384xf32>
%106 = linalg.fill ins(%cst_14 : f32) outs(%105 : tensor<69x69x384xf32>) -> tensor<69x69x384xf32>
%inserted_slice_154 = tensor.insert_slice %collapsed_153 into %106[2, 2, 0] [65, 65, 384] [1, 1, 1] : tensor<65x65x384xf32> into tensor<69x69x384xf32>
%expanded_155 = tensor.expand_shape %inserted_slice_154 [[0], [1], [2, 3]] output_shape [69, 69, 1, 384] : tensor<69x69x384xf32> into tensor<69x69x1x384xf32>
%107 = tensor.empty() : tensor<1x384x69x69xf32>
%108 = flow.dispatch.region -> (tensor<1x384x69x69xf32>) {
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_155 : tensor<69x69x1x384xf32>) outs(%107 : tensor<1x384x69x69xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<1x384x69x69xf32>
flow.return %237 : tensor<1x384x69x69xf32>
}
%109 = tensor.empty() : tensor<1x384x65x65xf32>
%110 = linalg.fill ins(%cst_14 : f32) outs(%109 : tensor<1x384x65x65xf32>) -> tensor<1x384x65x65xf32>
%111 = flow.dispatch.region -> (tensor<1x384x65x65xf32>) {
%237 = linalg.depthwise_conv_2d_nchw_chw {dilations = dense<2> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%108, %cst_63 : tensor<1x384x69x69xf32>, tensor<384x3x3xf32>) outs(%110 : tensor<1x384x65x65xf32>) -> tensor<1x384x65x65xf32>
flow.return %237 : tensor<1x384x65x65xf32>
}
%112 = tensor.empty() : tensor<65x65x1x384xf32>
%113 = flow.dispatch.region -> (tensor<65x65x1x384xf32>) {
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%111 : tensor<1x384x65x65xf32>) outs(%112 : tensor<65x65x1x384xf32>) {
^bb0(%in: f32, %out: f32):
%238 = arith.cmpf ult, %in, %cst_14 : f32
%239 = arith.select %238, %cst_14, %in : f32
%240 = arith.cmpf ugt, %239, %cst_2 : f32
%241 = arith.select %240, %cst_2, %239 : f32
linalg.yield %241 : f32
} -> tensor<65x65x1x384xf32>
flow.return %237 : tensor<65x65x1x384xf32>
}
%collapsed_156 = tensor.collapse_shape %113 [[0], [1], [2, 3]] : tensor<65x65x1x384xf32> into tensor<65x65x384xf32>
%expanded_157 = tensor.expand_shape %collapsed_156 [[0, 1], [2], [3]] output_shape [1, 65, 65, 384] : tensor<65x65x384xf32> into tensor<1x65x65x384xf32>
%114 = flow.dispatch.region -> (tensor<1x65x65x384xf32>) {
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_157 : tensor<1x65x65x384xf32>) outs(%102 : tensor<1x65x65x384xf32>) {
^bb0(%in: f32, %out: f32):
%238 = arith.divf %in, %cst_10 : f32
%239 = math.round %238 : f32
%240 = arith.addf %239, %cst_14 : f32
%241 = arith.cmpf ult, %240, %cst_16 : f32
%242 = arith.cmpf ugt, %240, %cst_15 : f32
%243 = arith.select %241, %cst_16, %240 : f32
%244 = arith.select %242, %cst_15, %243 : f32
%245 = arith.fptosi %244 : f32 to i8
%246 = arith.extsi %245 : i8 to i32
%247 = arith.sitofp %246 : i32 to f32
%248 = arith.mulf %247, %cst_10 : f32
linalg.yield %248 : f32
} -> tensor<1x65x65x384xf32>
flow.return %237 : tensor<1x65x65x384xf32>
}
%115 = flow.dispatch.region -> (tensor<1x65x65x64xi8>) {
%237 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%114, %cst_86 : tensor<1x65x65x384xf32>, tensor<1x1x384x64xf32>) outs(%98 : tensor<1x65x65x64xf32>) -> tensor<1x65x65x64xf32>
%238 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_152, %237, %cst_64 : tensor<1x65x65x64xf32>, tensor<1x65x65x64xf32>, tensor<64xf32>) outs(%99 : tensor<1x65x65x64xi8>) {
^bb0(%in: f32, %in_229: f32, %in_230: f32, %out: i8):
%239 = arith.addf %in_229, %in_230 : f32
%240 = arith.divf %239, %cst_8 : f32
%241 = math.round %240 : f32
%242 = arith.addf %241, %cst_14 : f32
%243 = arith.cmpf ult, %242, %cst_16 : f32
%244 = arith.cmpf ugt, %242, %cst_15 : f32
%245 = arith.select %243, %cst_16, %242 : f32
%246 = arith.select %244, %cst_15, %245 : f32
%247 = arith.fptosi %246 : f32 to i8
%248 = arith.extsi %247 : i8 to i32
%249 = arith.sitofp %248 : i32 to f32
%250 = arith.mulf %249, %cst_8 : f32
%251 = arith.addf %in, %250 : f32
%252 = arith.divf %251, %cst_9 : f32
%253 = math.round %252 : f32
%254 = arith.addf %253, %cst_14 : f32
%255 = arith.cmpf ult, %254, %cst_16 : f32
%256 = arith.cmpf ugt, %254, %cst_15 : f32
%257 = arith.select %255, %cst_16, %254 : f32
%258 = arith.select %256, %cst_15, %257 : f32
%259 = arith.fptosi %258 : f32 to i8
linalg.yield %259 : i8
} -> tensor<1x65x65x64xi8>
flow.return %238 : tensor<1x65x65x64xi8>
}
%collapsed_158 = tensor.collapse_shape %115 [[0, 1], [2], [3]] : tensor<1x65x65x64xi8> into tensor<65x65x64xi8>
%116 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%collapsed_158 : tensor<65x65x64xi8>) outs(%96 : tensor<65x65x64xf32>) {
^bb0(%in: i8, %out: f32):
%237 = arith.extsi %in : i8 to i32
%238 = arith.sitofp %237 : i32 to f32
%239 = arith.mulf %238, %cst_9 : f32
linalg.yield %239 : f32
} -> tensor<65x65x64xf32>
%expanded_159 = tensor.expand_shape %116 [[0, 1], [2], [3]] output_shape [1, 65, 65, 64] : tensor<65x65x64xf32> into tensor<1x65x65x64xf32>
%117 = flow.dispatch.region -> (tensor<1x65x65x384xf32>) {
%237 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%expanded_159, %cst_87 : tensor<1x65x65x64xf32>, tensor<1x1x64x384xf32>) outs(%103 : tensor<1x65x65x384xf32>) -> tensor<1x65x65x384xf32>
%238 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%237, %cst_66 : tensor<1x65x65x384xf32>, tensor<384xf32>) outs(%102 : tensor<1x65x65x384xf32>) {
^bb0(%in: f32, %in_229: f32, %out: f32):
%239 = arith.addf %in, %in_229 : f32
%240 = arith.cmpf ult, %239, %cst_14 : f32
%241 = arith.select %240, %cst_14, %239 : f32
%242 = arith.cmpf ugt, %241, %cst_2 : f32
%243 = arith.select %242, %cst_2, %241 : f32
%244 = arith.divf %243, %cst_10 : f32
%245 = math.round %244 : f32
%246 = arith.addf %245, %cst_14 : f32
%247 = arith.cmpf ult, %246, %cst_16 : f32
%248 = arith.cmpf ugt, %246, %cst_15 : f32
%249 = arith.select %247, %cst_16, %246 : f32
%250 = arith.select %248, %cst_15, %249 : f32
%251 = arith.fptosi %250 : f32 to i8
%252 = arith.extsi %251 : i8 to i32
%253 = arith.sitofp %252 : i32 to f32
%254 = arith.mulf %253, %cst_10 : f32
linalg.yield %254 : f32
} -> tensor<1x65x65x384xf32>
flow.return %238 : tensor<1x65x65x384xf32>
}
%collapsed_160 = tensor.collapse_shape %117 [[0, 1], [2], [3]] : tensor<1x65x65x384xf32> into tensor<65x65x384xf32>
%inserted_slice_161 = tensor.insert_slice %collapsed_160 into %106[2, 2, 0] [65, 65, 384] [1, 1, 1] : tensor<65x65x384xf32> into tensor<69x69x384xf32>
%expanded_162 = tensor.expand_shape %inserted_slice_161 [[0], [1], [2, 3]] output_shape [69, 69, 1, 384] : tensor<69x69x384xf32> into tensor<69x69x1x384xf32>
%118 = flow.dispatch.region -> (tensor<1x384x69x69xf32>) {
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_162 : tensor<69x69x1x384xf32>) outs(%107 : tensor<1x384x69x69xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<1x384x69x69xf32>
flow.return %237 : tensor<1x384x69x69xf32>
}
%119 = flow.dispatch.region -> (tensor<1x384x65x65xf32>) {
%237 = linalg.depthwise_conv_2d_nchw_chw {dilations = dense<2> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%118, %cst_65 : tensor<1x384x69x69xf32>, tensor<384x3x3xf32>) outs(%110 : tensor<1x384x65x65xf32>) -> tensor<1x384x65x65xf32>
flow.return %237 : tensor<1x384x65x65xf32>
}
%120 = flow.dispatch.region -> (tensor<65x65x1x384xf32>) {
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%119 : tensor<1x384x65x65xf32>) outs(%112 : tensor<65x65x1x384xf32>) {
^bb0(%in: f32, %out: f32):
%238 = arith.cmpf ult, %in, %cst_14 : f32
%239 = arith.select %238, %cst_14, %in : f32
%240 = arith.cmpf ugt, %239, %cst_2 : f32
%241 = arith.select %240, %cst_2, %239 : f32
linalg.yield %241 : f32
} -> tensor<65x65x1x384xf32>
flow.return %237 : tensor<65x65x1x384xf32>
}
%collapsed_163 = tensor.collapse_shape %120 [[0], [1], [2, 3]] : tensor<65x65x1x384xf32> into tensor<65x65x384xf32>
%expanded_164 = tensor.expand_shape %collapsed_163 [[0, 1], [2], [3]] output_shape [1, 65, 65, 384] : tensor<65x65x384xf32> into tensor<1x65x65x384xf32>
%121 = flow.dispatch.region -> (tensor<1x65x65x384xf32>) {
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_164 : tensor<1x65x65x384xf32>) outs(%102 : tensor<1x65x65x384xf32>) {
^bb0(%in: f32, %out: f32):
%238 = arith.divf %in, %cst_10 : f32
%239 = math.round %238 : f32
%240 = arith.addf %239, %cst_14 : f32
%241 = arith.cmpf ult, %240, %cst_16 : f32
%242 = arith.cmpf ugt, %240, %cst_15 : f32
%243 = arith.select %241, %cst_16, %240 : f32
%244 = arith.select %242, %cst_15, %243 : f32
%245 = arith.fptosi %244 : f32 to i8
%246 = arith.extsi %245 : i8 to i32
%247 = arith.sitofp %246 : i32 to f32
%248 = arith.mulf %247, %cst_10 : f32
linalg.yield %248 : f32
} -> tensor<1x65x65x384xf32>
flow.return %237 : tensor<1x65x65x384xf32>
}
%122 = flow.dispatch.region -> (tensor<1x65x65x64xi8>) {
%237 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%121, %cst_88 : tensor<1x65x65x384xf32>, tensor<1x1x384x64xf32>) outs(%98 : tensor<1x65x65x64xf32>) -> tensor<1x65x65x64xf32>
%238 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_159, %237, %cst_67 : tensor<1x65x65x64xf32>, tensor<1x65x65x64xf32>, tensor<64xf32>) outs(%99 : tensor<1x65x65x64xi8>) {
^bb0(%in: f32, %in_229: f32, %in_230: f32, %out: i8):
%239 = arith.addf %in_229, %in_230 : f32
%240 = arith.divf %239, %cst_8 : f32
%241 = math.round %240 : f32
%242 = arith.addf %241, %cst_14 : f32
%243 = arith.cmpf ult, %242, %cst_16 : f32
%244 = arith.cmpf ugt, %242, %cst_15 : f32
%245 = arith.select %243, %cst_16, %242 : f32
%246 = arith.select %244, %cst_15, %245 : f32
%247 = arith.fptosi %246 : f32 to i8
%248 = arith.extsi %247 : i8 to i32
%249 = arith.sitofp %248 : i32 to f32
%250 = arith.mulf %249, %cst_8 : f32
%251 = arith.addf %in, %250 : f32
%252 = arith.divf %251, %cst_9 : f32
%253 = math.round %252 : f32
%254 = arith.addf %253, %cst_14 : f32
%255 = arith.cmpf ult, %254, %cst_16 : f32
%256 = arith.cmpf ugt, %254, %cst_15 : f32
%257 = arith.select %255, %cst_16, %254 : f32
%258 = arith.select %256, %cst_15, %257 : f32
%259 = arith.fptosi %258 : f32 to i8
linalg.yield %259 : i8
} -> tensor<1x65x65x64xi8>
flow.return %238 : tensor<1x65x65x64xi8>
}
%collapsed_165 = tensor.collapse_shape %122 [[0, 1], [2], [3]] : tensor<1x65x65x64xi8> into tensor<65x65x64xi8>
%123 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%collapsed_165 : tensor<65x65x64xi8>) outs(%96 : tensor<65x65x64xf32>) {
^bb0(%in: i8, %out: f32):
%237 = arith.extsi %in : i8 to i32
%238 = arith.sitofp %237 : i32 to f32
%239 = arith.mulf %238, %cst_9 : f32
linalg.yield %239 : f32
} -> tensor<65x65x64xf32>
%expanded_166 = tensor.expand_shape %123 [[0, 1], [2], [3]] output_shape [1, 65, 65, 64] : tensor<65x65x64xf32> into tensor<1x65x65x64xf32>
%124 = flow.dispatch.region -> (tensor<1x65x65x384xf32>) {
%237 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%expanded_166, %cst_89 : tensor<1x65x65x64xf32>, tensor<1x1x64x384xf32>) outs(%103 : tensor<1x65x65x384xf32>) -> tensor<1x65x65x384xf32>
%238 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%237, %cst_69 : tensor<1x65x65x384xf32>, tensor<384xf32>) outs(%102 : tensor<1x65x65x384xf32>) {
^bb0(%in: f32, %in_229: f32, %out: f32):
%239 = arith.addf %in, %in_229 : f32
%240 = arith.cmpf ult, %239, %cst_14 : f32
%241 = arith.select %240, %cst_14, %239 : f32
%242 = arith.cmpf ugt, %241, %cst_2 : f32
%243 = arith.select %242, %cst_2, %241 : f32
%244 = arith.divf %243, %cst_10 : f32
%245 = math.round %244 : f32
%246 = arith.addf %245, %cst_14 : f32
%247 = arith.cmpf ult, %246, %cst_16 : f32
%248 = arith.cmpf ugt, %246, %cst_15 : f32
%249 = arith.select %247, %cst_16, %246 : f32
%250 = arith.select %248, %cst_15, %249 : f32
%251 = arith.fptosi %250 : f32 to i8
%252 = arith.extsi %251 : i8 to i32
%253 = arith.sitofp %252 : i32 to f32
%254 = arith.mulf %253, %cst_10 : f32
linalg.yield %254 : f32
} -> tensor<1x65x65x384xf32>
flow.return %238 : tensor<1x65x65x384xf32>
}
%collapsed_167 = tensor.collapse_shape %124 [[0, 1], [2], [3]] : tensor<1x65x65x384xf32> into tensor<65x65x384xf32>
%inserted_slice_168 = tensor.insert_slice %collapsed_167 into %106[2, 2, 0] [65, 65, 384] [1, 1, 1] : tensor<65x65x384xf32> into tensor<69x69x384xf32>
%expanded_169 = tensor.expand_shape %inserted_slice_168 [[0], [1], [2, 3]] output_shape [69, 69, 1, 384] : tensor<69x69x384xf32> into tensor<69x69x1x384xf32>
%125 = flow.dispatch.region -> (tensor<1x384x69x69xf32>) {
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_169 : tensor<69x69x1x384xf32>) outs(%107 : tensor<1x384x69x69xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<1x384x69x69xf32>
flow.return %237 : tensor<1x384x69x69xf32>
}
%126 = flow.dispatch.region -> (tensor<1x384x65x65xf32>) {
%237 = linalg.depthwise_conv_2d_nchw_chw {dilations = dense<2> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%125, %cst_68 : tensor<1x384x69x69xf32>, tensor<384x3x3xf32>) outs(%110 : tensor<1x384x65x65xf32>) -> tensor<1x384x65x65xf32>
flow.return %237 : tensor<1x384x65x65xf32>
}
%127 = flow.dispatch.region -> (tensor<65x65x1x384xf32>) {
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%126 : tensor<1x384x65x65xf32>) outs(%112 : tensor<65x65x1x384xf32>) {
^bb0(%in: f32, %out: f32):
%238 = arith.cmpf ult, %in, %cst_14 : f32
%239 = arith.select %238, %cst_14, %in : f32
%240 = arith.cmpf ugt, %239, %cst_2 : f32
%241 = arith.select %240, %cst_2, %239 : f32
linalg.yield %241 : f32
} -> tensor<65x65x1x384xf32>
flow.return %237 : tensor<65x65x1x384xf32>
}
%collapsed_170 = tensor.collapse_shape %127 [[0], [1], [2, 3]] : tensor<65x65x1x384xf32> into tensor<65x65x384xf32>
%expanded_171 = tensor.expand_shape %collapsed_170 [[0, 1], [2], [3]] output_shape [1, 65, 65, 384] : tensor<65x65x384xf32> into tensor<1x65x65x384xf32>
%128 = flow.dispatch.region -> (tensor<1x65x65x384xf32>) {
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_171 : tensor<1x65x65x384xf32>) outs(%102 : tensor<1x65x65x384xf32>) {
^bb0(%in: f32, %out: f32):
%238 = arith.divf %in, %cst_10 : f32
%239 = math.round %238 : f32
%240 = arith.addf %239, %cst_14 : f32
%241 = arith.cmpf ult, %240, %cst_16 : f32
%242 = arith.cmpf ugt, %240, %cst_15 : f32
%243 = arith.select %241, %cst_16, %240 : f32
%244 = arith.select %242, %cst_15, %243 : f32
%245 = arith.fptosi %244 : f32 to i8
%246 = arith.extsi %245 : i8 to i32
%247 = arith.sitofp %246 : i32 to f32
%248 = arith.mulf %247, %cst_10 : f32
linalg.yield %248 : f32
} -> tensor<1x65x65x384xf32>
flow.return %237 : tensor<1x65x65x384xf32>
}
%129 = flow.dispatch.region -> (tensor<1x65x65x64xf32>) {
%237 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%128, %cst_90 : tensor<1x65x65x384xf32>, tensor<1x1x384x64xf32>) outs(%98 : tensor<1x65x65x64xf32>) -> tensor<1x65x65x64xf32>
%238 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_166, %237, %cst_70 : tensor<1x65x65x64xf32>, tensor<1x65x65x64xf32>, tensor<64xf32>) outs(%97 : tensor<1x65x65x64xf32>) {
^bb0(%in: f32, %in_229: f32, %in_230: f32, %out: f32):
%239 = arith.addf %in_229, %in_230 : f32
%240 = arith.divf %239, %cst_8 : f32
%241 = math.round %240 : f32
%242 = arith.addf %241, %cst_14 : f32
%243 = arith.cmpf ult, %242, %cst_16 : f32
%244 = arith.cmpf ugt, %242, %cst_15 : f32
%245 = arith.select %243, %cst_16, %242 : f32
%246 = arith.select %244, %cst_15, %245 : f32
%247 = arith.fptosi %246 : f32 to i8
%248 = arith.extsi %247 : i8 to i32
%249 = arith.sitofp %248 : i32 to f32
%250 = arith.mulf %249, %cst_8 : f32
%251 = arith.addf %in, %250 : f32
%252 = arith.divf %251, %cst_9 : f32
%253 = math.round %252 : f32
%254 = arith.addf %253, %cst_14 : f32
%255 = arith.cmpf ult, %254, %cst_16 : f32
%256 = arith.cmpf ugt, %254, %cst_15 : f32
%257 = arith.select %255, %cst_16, %254 : f32
%258 = arith.select %256, %cst_15, %257 : f32
%259 = arith.fptosi %258 : f32 to i8
%260 = arith.extsi %259 : i8 to i32
%261 = arith.sitofp %260 : i32 to f32
%262 = arith.mulf %261, %cst_9 : f32
linalg.yield %262 : f32
} -> tensor<1x65x65x64xf32>
flow.return %238 : tensor<1x65x65x64xf32>
}
%130 = flow.dispatch.region -> (tensor<1x65x65x384xf32>) {
%237 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%129, %cst_91 : tensor<1x65x65x64xf32>, tensor<1x1x64x384xf32>) outs(%103 : tensor<1x65x65x384xf32>) -> tensor<1x65x65x384xf32>
%238 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%237, %cst_32 : tensor<1x65x65x384xf32>, tensor<384xf32>) outs(%102 : tensor<1x65x65x384xf32>) {
^bb0(%in: f32, %in_229: f32, %out: f32):
%239 = arith.addf %in, %in_229 : f32
%240 = arith.cmpf ult, %239, %cst_14 : f32
%241 = arith.select %240, %cst_14, %239 : f32
%242 = arith.cmpf ugt, %241, %cst_2 : f32
%243 = arith.select %242, %cst_2, %241 : f32
%244 = arith.divf %243, %cst_10 : f32
%245 = math.round %244 : f32
%246 = arith.addf %245, %cst_14 : f32
%247 = arith.cmpf ult, %246, %cst_16 : f32
%248 = arith.cmpf ugt, %246, %cst_15 : f32
%249 = arith.select %247, %cst_16, %246 : f32
%250 = arith.select %248, %cst_15, %249 : f32
%251 = arith.fptosi %250 : f32 to i8
%252 = arith.extsi %251 : i8 to i32
%253 = arith.sitofp %252 : i32 to f32
%254 = arith.mulf %253, %cst_10 : f32
linalg.yield %254 : f32
} -> tensor<1x65x65x384xf32>
flow.return %238 : tensor<1x65x65x384xf32>
}
%collapsed_172 = tensor.collapse_shape %130 [[0, 1], [2], [3]] : tensor<1x65x65x384xf32> into tensor<65x65x384xf32>
%inserted_slice_173 = tensor.insert_slice %collapsed_172 into %106[2, 2, 0] [65, 65, 384] [1, 1, 1] : tensor<65x65x384xf32> into tensor<69x69x384xf32>
%expanded_174 = tensor.expand_shape %inserted_slice_173 [[0], [1], [2, 3]] output_shape [69, 69, 1, 384] : tensor<69x69x384xf32> into tensor<69x69x1x384xf32>
%131 = flow.dispatch.region -> (tensor<1x384x69x69xf32>) {
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_174 : tensor<69x69x1x384xf32>) outs(%107 : tensor<1x384x69x69xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<1x384x69x69xf32>
flow.return %237 : tensor<1x384x69x69xf32>
}
%132 = flow.dispatch.region -> (tensor<1x384x65x65xf32>) {
%237 = linalg.depthwise_conv_2d_nchw_chw {dilations = dense<2> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%131, %cst_31 : tensor<1x384x69x69xf32>, tensor<384x3x3xf32>) outs(%110 : tensor<1x384x65x65xf32>) -> tensor<1x384x65x65xf32>
flow.return %237 : tensor<1x384x65x65xf32>
}
%133 = flow.dispatch.region -> (tensor<65x65x1x384xf32>) {
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%132 : tensor<1x384x65x65xf32>) outs(%112 : tensor<65x65x1x384xf32>) {
^bb0(%in: f32, %out: f32):
%238 = arith.cmpf ult, %in, %cst_14 : f32
%239 = arith.select %238, %cst_14, %in : f32
%240 = arith.cmpf ugt, %239, %cst_2 : f32
%241 = arith.select %240, %cst_2, %239 : f32
linalg.yield %241 : f32
} -> tensor<65x65x1x384xf32>
flow.return %237 : tensor<65x65x1x384xf32>
}
%collapsed_175 = tensor.collapse_shape %133 [[0], [1], [2, 3]] : tensor<65x65x1x384xf32> into tensor<65x65x384xf32>
%expanded_176 = tensor.expand_shape %collapsed_175 [[0, 1], [2], [3]] output_shape [1, 65, 65, 384] : tensor<65x65x384xf32> into tensor<1x65x65x384xf32>
%134 = flow.dispatch.region -> (tensor<1x65x65x384xf32>) {
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_176 : tensor<1x65x65x384xf32>) outs(%102 : tensor<1x65x65x384xf32>) {
^bb0(%in: f32, %out: f32):
%238 = arith.divf %in, %cst_10 : f32
%239 = math.round %238 : f32
%240 = arith.addf %239, %cst_14 : f32
%241 = arith.cmpf ult, %240, %cst_16 : f32
%242 = arith.cmpf ugt, %240, %cst_15 : f32
%243 = arith.select %241, %cst_16, %240 : f32
%244 = arith.select %242, %cst_15, %243 : f32
%245 = arith.fptosi %244 : f32 to i8
%246 = arith.extsi %245 : i8 to i32
%247 = arith.sitofp %246 : i32 to f32
%248 = arith.mulf %247, %cst_10 : f32
linalg.yield %248 : f32
} -> tensor<1x65x65x384xf32>
flow.return %237 : tensor<1x65x65x384xf32>
}
%135 = tensor.empty() : tensor<65x65x96xf32>
%136 = tensor.empty() : tensor<1x65x65x96xf32>
%137 = linalg.fill ins(%cst_14 : f32) outs(%136 : tensor<1x65x65x96xf32>) -> tensor<1x65x65x96xf32>
%138 = tensor.empty() : tensor<1x65x65x96xi8>
%139 = flow.dispatch.region -> (tensor<1x65x65x96xi8>) {
%237 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%134, %cst_92 : tensor<1x65x65x384xf32>, tensor<1x1x384x96xf32>) outs(%137 : tensor<1x65x65x96xf32>) -> tensor<1x65x65x96xf32>
%238 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%237, %cst_33 : tensor<1x65x65x96xf32>, tensor<96xf32>) outs(%138 : tensor<1x65x65x96xi8>) {
^bb0(%in: f32, %in_229: f32, %out: i8):
%239 = arith.addf %in, %in_229 : f32
%240 = arith.divf %239, %cst_8 : f32
%241 = math.round %240 : f32
%242 = arith.addf %241, %cst_14 : f32
%243 = arith.cmpf ult, %242, %cst_16 : f32
%244 = arith.cmpf ugt, %242, %cst_15 : f32
%245 = arith.select %243, %cst_16, %242 : f32
%246 = arith.select %244, %cst_15, %245 : f32
%247 = arith.fptosi %246 : f32 to i8
linalg.yield %247 : i8
} -> tensor<1x65x65x96xi8>
flow.return %238 : tensor<1x65x65x96xi8>
}
%collapsed_177 = tensor.collapse_shape %139 [[0, 1], [2], [3]] : tensor<1x65x65x96xi8> into tensor<65x65x96xi8>
%140 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%collapsed_177 : tensor<65x65x96xi8>) outs(%135 : tensor<65x65x96xf32>) {
^bb0(%in: i8, %out: f32):
%237 = arith.extsi %in : i8 to i32
%238 = arith.sitofp %237 : i32 to f32
%239 = arith.mulf %238, %cst_8 : f32
linalg.yield %239 : f32
} -> tensor<65x65x96xf32>
%expanded_178 = tensor.expand_shape %140 [[0, 1], [2], [3]] output_shape [1, 65, 65, 96] : tensor<65x65x96xf32> into tensor<1x65x65x96xf32>
%141 = tensor.empty() : tensor<1x65x65x576xf32>
%142 = linalg.fill ins(%cst_14 : f32) outs(%141 : tensor<1x65x65x576xf32>) -> tensor<1x65x65x576xf32>
%143 = flow.dispatch.region -> (tensor<1x65x65x576xf32>) {
%237 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%expanded_178, %cst_93 : tensor<1x65x65x96xf32>, tensor<1x1x96x576xf32>) outs(%142 : tensor<1x65x65x576xf32>) -> tensor<1x65x65x576xf32>
%238 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%237, %cst_19 : tensor<1x65x65x576xf32>, tensor<576xf32>) outs(%141 : tensor<1x65x65x576xf32>) {
^bb0(%in: f32, %in_229: f32, %out: f32):
%239 = arith.addf %in, %in_229 : f32
%240 = arith.cmpf ult, %239, %cst_14 : f32
%241 = arith.select %240, %cst_14, %239 : f32
%242 = arith.cmpf ugt, %241, %cst_2 : f32
%243 = arith.select %242, %cst_2, %241 : f32
%244 = arith.divf %243, %cst_10 : f32
%245 = math.round %244 : f32
%246 = arith.addf %245, %cst_14 : f32
%247 = arith.cmpf ult, %246, %cst_16 : f32
%248 = arith.cmpf ugt, %246, %cst_15 : f32
%249 = arith.select %247, %cst_16, %246 : f32
%250 = arith.select %248, %cst_15, %249 : f32
%251 = arith.fptosi %250 : f32 to i8
%252 = arith.extsi %251 : i8 to i32
%253 = arith.sitofp %252 : i32 to f32
%254 = arith.mulf %253, %cst_10 : f32
linalg.yield %254 : f32
} -> tensor<1x65x65x576xf32>
flow.return %238 : tensor<1x65x65x576xf32>
}
%collapsed_179 = tensor.collapse_shape %143 [[0, 1], [2], [3]] : tensor<1x65x65x576xf32> into tensor<65x65x576xf32>
%144 = tensor.empty() : tensor<69x69x576xf32>
%145 = linalg.fill ins(%cst_14 : f32) outs(%144 : tensor<69x69x576xf32>) -> tensor<69x69x576xf32>
%inserted_slice_180 = tensor.insert_slice %collapsed_179 into %145[2, 2, 0] [65, 65, 576] [1, 1, 1] : tensor<65x65x576xf32> into tensor<69x69x576xf32>
%expanded_181 = tensor.expand_shape %inserted_slice_180 [[0], [1], [2, 3]] output_shape [69, 69, 1, 576] : tensor<69x69x576xf32> into tensor<69x69x1x576xf32>
%146 = tensor.empty() : tensor<1x576x69x69xf32>
%147 = flow.dispatch.region -> (tensor<1x576x69x69xf32>) {
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_181 : tensor<69x69x1x576xf32>) outs(%146 : tensor<1x576x69x69xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<1x576x69x69xf32>
flow.return %237 : tensor<1x576x69x69xf32>
}
%148 = tensor.empty() : tensor<1x576x65x65xf32>
%149 = linalg.fill ins(%cst_14 : f32) outs(%148 : tensor<1x576x65x65xf32>) -> tensor<1x576x65x65xf32>
%150 = flow.dispatch.region -> (tensor<1x576x65x65xf32>) {
%237 = linalg.depthwise_conv_2d_nchw_chw {dilations = dense<2> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%147, %cst_34 : tensor<1x576x69x69xf32>, tensor<576x3x3xf32>) outs(%149 : tensor<1x576x65x65xf32>) -> tensor<1x576x65x65xf32>
flow.return %237 : tensor<1x576x65x65xf32>
}
%151 = tensor.empty() : tensor<65x65x1x576xf32>
%152 = flow.dispatch.region -> (tensor<65x65x1x576xf32>) {
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%150 : tensor<1x576x65x65xf32>) outs(%151 : tensor<65x65x1x576xf32>) {
^bb0(%in: f32, %out: f32):
%238 = arith.cmpf ult, %in, %cst_14 : f32
%239 = arith.select %238, %cst_14, %in : f32
%240 = arith.cmpf ugt, %239, %cst_2 : f32
%241 = arith.select %240, %cst_2, %239 : f32
linalg.yield %241 : f32
} -> tensor<65x65x1x576xf32>
flow.return %237 : tensor<65x65x1x576xf32>
}
%collapsed_182 = tensor.collapse_shape %152 [[0], [1], [2, 3]] : tensor<65x65x1x576xf32> into tensor<65x65x576xf32>
%expanded_183 = tensor.expand_shape %collapsed_182 [[0, 1], [2], [3]] output_shape [1, 65, 65, 576] : tensor<65x65x576xf32> into tensor<1x65x65x576xf32>
%153 = flow.dispatch.region -> (tensor<1x65x65x576xf32>) {
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_183 : tensor<1x65x65x576xf32>) outs(%141 : tensor<1x65x65x576xf32>) {
^bb0(%in: f32, %out: f32):
%238 = arith.divf %in, %cst_10 : f32
%239 = math.round %238 : f32
%240 = arith.addf %239, %cst_14 : f32
%241 = arith.cmpf ult, %240, %cst_16 : f32
%242 = arith.cmpf ugt, %240, %cst_15 : f32
%243 = arith.select %241, %cst_16, %240 : f32
%244 = arith.select %242, %cst_15, %243 : f32
%245 = arith.fptosi %244 : f32 to i8
%246 = arith.extsi %245 : i8 to i32
%247 = arith.sitofp %246 : i32 to f32
%248 = arith.mulf %247, %cst_10 : f32
linalg.yield %248 : f32
} -> tensor<1x65x65x576xf32>
flow.return %237 : tensor<1x65x65x576xf32>
}
%154 = flow.dispatch.region -> (tensor<1x65x65x96xi8>) {
%237 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%153, %cst_94 : tensor<1x65x65x576xf32>, tensor<1x1x576x96xf32>) outs(%137 : tensor<1x65x65x96xf32>) -> tensor<1x65x65x96xf32>
%238 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_178, %237, %cst_35 : tensor<1x65x65x96xf32>, tensor<1x65x65x96xf32>, tensor<96xf32>) outs(%138 : tensor<1x65x65x96xi8>) {
^bb0(%in: f32, %in_229: f32, %in_230: f32, %out: i8):
%239 = arith.addf %in_229, %in_230 : f32
%240 = arith.divf %239, %cst_8 : f32
%241 = math.round %240 : f32
%242 = arith.addf %241, %cst_14 : f32
%243 = arith.cmpf ult, %242, %cst_16 : f32
%244 = arith.cmpf ugt, %242, %cst_15 : f32
%245 = arith.select %243, %cst_16, %242 : f32
%246 = arith.select %244, %cst_15, %245 : f32
%247 = arith.fptosi %246 : f32 to i8
%248 = arith.extsi %247 : i8 to i32
%249 = arith.sitofp %248 : i32 to f32
%250 = arith.mulf %249, %cst_8 : f32
%251 = arith.addf %in, %250 : f32
%252 = arith.divf %251, %cst_8 : f32
%253 = math.round %252 : f32
%254 = arith.addf %253, %cst_14 : f32
%255 = arith.cmpf ult, %254, %cst_16 : f32
%256 = arith.cmpf ugt, %254, %cst_15 : f32
%257 = arith.select %255, %cst_16, %254 : f32
%258 = arith.select %256, %cst_15, %257 : f32
%259 = arith.fptosi %258 : f32 to i8
linalg.yield %259 : i8
} -> tensor<1x65x65x96xi8>
flow.return %238 : tensor<1x65x65x96xi8>
}
%collapsed_184 = tensor.collapse_shape %154 [[0, 1], [2], [3]] : tensor<1x65x65x96xi8> into tensor<65x65x96xi8>
%155 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%collapsed_184 : tensor<65x65x96xi8>) outs(%135 : tensor<65x65x96xf32>) {
^bb0(%in: i8, %out: f32):
%237 = arith.extsi %in : i8 to i32
%238 = arith.sitofp %237 : i32 to f32
%239 = arith.mulf %238, %cst_8 : f32
linalg.yield %239 : f32
} -> tensor<65x65x96xf32>
%expanded_185 = tensor.expand_shape %155 [[0, 1], [2], [3]] output_shape [1, 65, 65, 96] : tensor<65x65x96xf32> into tensor<1x65x65x96xf32>
%156 = flow.dispatch.region -> (tensor<1x65x65x576xf32>) {
%237 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%expanded_185, %cst_95 : tensor<1x65x65x96xf32>, tensor<1x1x96x576xf32>) outs(%142 : tensor<1x65x65x576xf32>) -> tensor<1x65x65x576xf32>
%238 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%237, %cst_37 : tensor<1x65x65x576xf32>, tensor<576xf32>) outs(%141 : tensor<1x65x65x576xf32>) {
^bb0(%in: f32, %in_229: f32, %out: f32):
%239 = arith.addf %in, %in_229 : f32
%240 = arith.cmpf ult, %239, %cst_14 : f32
%241 = arith.select %240, %cst_14, %239 : f32
%242 = arith.cmpf ugt, %241, %cst_2 : f32
%243 = arith.select %242, %cst_2, %241 : f32
%244 = arith.divf %243, %cst_10 : f32
%245 = math.round %244 : f32
%246 = arith.addf %245, %cst_14 : f32
%247 = arith.cmpf ult, %246, %cst_16 : f32
%248 = arith.cmpf ugt, %246, %cst_15 : f32
%249 = arith.select %247, %cst_16, %246 : f32
%250 = arith.select %248, %cst_15, %249 : f32
%251 = arith.fptosi %250 : f32 to i8
%252 = arith.extsi %251 : i8 to i32
%253 = arith.sitofp %252 : i32 to f32
%254 = arith.mulf %253, %cst_10 : f32
linalg.yield %254 : f32
} -> tensor<1x65x65x576xf32>
flow.return %238 : tensor<1x65x65x576xf32>
}
%collapsed_186 = tensor.collapse_shape %156 [[0, 1], [2], [3]] : tensor<1x65x65x576xf32> into tensor<65x65x576xf32>
%inserted_slice_187 = tensor.insert_slice %collapsed_186 into %145[2, 2, 0] [65, 65, 576] [1, 1, 1] : tensor<65x65x576xf32> into tensor<69x69x576xf32>
%expanded_188 = tensor.expand_shape %inserted_slice_187 [[0], [1], [2, 3]] output_shape [69, 69, 1, 576] : tensor<69x69x576xf32> into tensor<69x69x1x576xf32>
%157 = flow.dispatch.region -> (tensor<1x576x69x69xf32>) {
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_188 : tensor<69x69x1x576xf32>) outs(%146 : tensor<1x576x69x69xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<1x576x69x69xf32>
flow.return %237 : tensor<1x576x69x69xf32>
}
%158 = flow.dispatch.region -> (tensor<1x576x65x65xf32>) {
%237 = linalg.depthwise_conv_2d_nchw_chw {dilations = dense<2> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%157, %cst_36 : tensor<1x576x69x69xf32>, tensor<576x3x3xf32>) outs(%149 : tensor<1x576x65x65xf32>) -> tensor<1x576x65x65xf32>
flow.return %237 : tensor<1x576x65x65xf32>
}
%159 = flow.dispatch.region -> (tensor<65x65x1x576xf32>) {
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%158 : tensor<1x576x65x65xf32>) outs(%151 : tensor<65x65x1x576xf32>) {
^bb0(%in: f32, %out: f32):
%238 = arith.cmpf ult, %in, %cst_14 : f32
%239 = arith.select %238, %cst_14, %in : f32
%240 = arith.cmpf ugt, %239, %cst_2 : f32
%241 = arith.select %240, %cst_2, %239 : f32
linalg.yield %241 : f32
} -> tensor<65x65x1x576xf32>
flow.return %237 : tensor<65x65x1x576xf32>
}
%collapsed_189 = tensor.collapse_shape %159 [[0], [1], [2, 3]] : tensor<65x65x1x576xf32> into tensor<65x65x576xf32>
%expanded_190 = tensor.expand_shape %collapsed_189 [[0, 1], [2], [3]] output_shape [1, 65, 65, 576] : tensor<65x65x576xf32> into tensor<1x65x65x576xf32>
%160 = flow.dispatch.region -> (tensor<1x65x65x576xf32>) {
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_190 : tensor<1x65x65x576xf32>) outs(%141 : tensor<1x65x65x576xf32>) {
^bb0(%in: f32, %out: f32):
%238 = arith.divf %in, %cst_10 : f32
%239 = math.round %238 : f32
%240 = arith.addf %239, %cst_14 : f32
%241 = arith.cmpf ult, %240, %cst_16 : f32
%242 = arith.cmpf ugt, %240, %cst_15 : f32
%243 = arith.select %241, %cst_16, %240 : f32
%244 = arith.select %242, %cst_15, %243 : f32
%245 = arith.fptosi %244 : f32 to i8
%246 = arith.extsi %245 : i8 to i32
%247 = arith.sitofp %246 : i32 to f32
%248 = arith.mulf %247, %cst_10 : f32
linalg.yield %248 : f32
} -> tensor<1x65x65x576xf32>
flow.return %237 : tensor<1x65x65x576xf32>
}
%161 = flow.dispatch.region -> (tensor<1x65x65x96xf32>) {
%237 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%160, %cst_96 : tensor<1x65x65x576xf32>, tensor<1x1x576x96xf32>) outs(%137 : tensor<1x65x65x96xf32>) -> tensor<1x65x65x96xf32>
%238 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_185, %237, %cst_38 : tensor<1x65x65x96xf32>, tensor<1x65x65x96xf32>, tensor<96xf32>) outs(%136 : tensor<1x65x65x96xf32>) {
^bb0(%in: f32, %in_229: f32, %in_230: f32, %out: f32):
%239 = arith.addf %in_229, %in_230 : f32
%240 = arith.divf %239, %cst_8 : f32
%241 = math.round %240 : f32
%242 = arith.addf %241, %cst_14 : f32
%243 = arith.cmpf ult, %242, %cst_16 : f32
%244 = arith.cmpf ugt, %242, %cst_15 : f32
%245 = arith.select %243, %cst_16, %242 : f32
%246 = arith.select %244, %cst_15, %245 : f32
%247 = arith.fptosi %246 : f32 to i8
%248 = arith.extsi %247 : i8 to i32
%249 = arith.sitofp %248 : i32 to f32
%250 = arith.mulf %249, %cst_8 : f32
%251 = arith.addf %in, %250 : f32
%252 = arith.divf %251, %cst_8 : f32
%253 = math.round %252 : f32
%254 = arith.addf %253, %cst_14 : f32
%255 = arith.cmpf ult, %254, %cst_16 : f32
%256 = arith.cmpf ugt, %254, %cst_15 : f32
%257 = arith.select %255, %cst_16, %254 : f32
%258 = arith.select %256, %cst_15, %257 : f32
%259 = arith.fptosi %258 : f32 to i8
%260 = arith.extsi %259 : i8 to i32
%261 = arith.sitofp %260 : i32 to f32
%262 = arith.mulf %261, %cst_8 : f32
linalg.yield %262 : f32
} -> tensor<1x65x65x96xf32>
flow.return %238 : tensor<1x65x65x96xf32>
}
%162 = flow.dispatch.region -> (tensor<1x65x65x576xf32>) {
%237 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%161, %cst_97 : tensor<1x65x65x96xf32>, tensor<1x1x96x576xf32>) outs(%142 : tensor<1x65x65x576xf32>) -> tensor<1x65x65x576xf32>
%238 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%237, %cst_40 : tensor<1x65x65x576xf32>, tensor<576xf32>) outs(%141 : tensor<1x65x65x576xf32>) {
^bb0(%in: f32, %in_229: f32, %out: f32):
%239 = arith.addf %in, %in_229 : f32
%240 = arith.cmpf ult, %239, %cst_14 : f32
%241 = arith.select %240, %cst_14, %239 : f32
%242 = arith.cmpf ugt, %241, %cst_2 : f32
%243 = arith.select %242, %cst_2, %241 : f32
%244 = arith.divf %243, %cst_10 : f32
%245 = math.round %244 : f32
%246 = arith.addf %245, %cst_14 : f32
%247 = arith.cmpf ult, %246, %cst_16 : f32
%248 = arith.cmpf ugt, %246, %cst_15 : f32
%249 = arith.select %247, %cst_16, %246 : f32
%250 = arith.select %248, %cst_15, %249 : f32
%251 = arith.fptosi %250 : f32 to i8
%252 = arith.extsi %251 : i8 to i32
%253 = arith.sitofp %252 : i32 to f32
%254 = arith.mulf %253, %cst_10 : f32
linalg.yield %254 : f32
} -> tensor<1x65x65x576xf32>
flow.return %238 : tensor<1x65x65x576xf32>
}
%collapsed_191 = tensor.collapse_shape %162 [[0, 1], [2], [3]] : tensor<1x65x65x576xf32> into tensor<65x65x576xf32>
%inserted_slice_192 = tensor.insert_slice %collapsed_191 into %145[2, 2, 0] [65, 65, 576] [1, 1, 1] : tensor<65x65x576xf32> into tensor<69x69x576xf32>
%expanded_193 = tensor.expand_shape %inserted_slice_192 [[0], [1], [2, 3]] output_shape [69, 69, 1, 576] : tensor<69x69x576xf32> into tensor<69x69x1x576xf32>
%163 = flow.dispatch.region -> (tensor<1x576x69x69xf32>) {
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_193 : tensor<69x69x1x576xf32>) outs(%146 : tensor<1x576x69x69xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<1x576x69x69xf32>
flow.return %237 : tensor<1x576x69x69xf32>
}
%164 = flow.dispatch.region -> (tensor<1x576x65x65xf32>) {
%237 = linalg.depthwise_conv_2d_nchw_chw {dilations = dense<2> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%163, %cst_39 : tensor<1x576x69x69xf32>, tensor<576x3x3xf32>) outs(%149 : tensor<1x576x65x65xf32>) -> tensor<1x576x65x65xf32>
flow.return %237 : tensor<1x576x65x65xf32>
}
%165 = flow.dispatch.region -> (tensor<65x65x1x576xf32>) {
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%164 : tensor<1x576x65x65xf32>) outs(%151 : tensor<65x65x1x576xf32>) {
^bb0(%in: f32, %out: f32):
%238 = arith.cmpf ult, %in, %cst_14 : f32
%239 = arith.select %238, %cst_14, %in : f32
%240 = arith.cmpf ugt, %239, %cst_2 : f32
%241 = arith.select %240, %cst_2, %239 : f32
linalg.yield %241 : f32
} -> tensor<65x65x1x576xf32>
flow.return %237 : tensor<65x65x1x576xf32>
}
%collapsed_194 = tensor.collapse_shape %165 [[0], [1], [2, 3]] : tensor<65x65x1x576xf32> into tensor<65x65x576xf32>
%expanded_195 = tensor.expand_shape %collapsed_194 [[0, 1], [2], [3]] output_shape [1, 65, 65, 576] : tensor<65x65x576xf32> into tensor<1x65x65x576xf32>
%166 = flow.dispatch.region -> (tensor<1x65x65x576xf32>) {
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_195 : tensor<1x65x65x576xf32>) outs(%141 : tensor<1x65x65x576xf32>) {
^bb0(%in: f32, %out: f32):
%238 = arith.divf %in, %cst_10 : f32
%239 = math.round %238 : f32
%240 = arith.addf %239, %cst_14 : f32
%241 = arith.cmpf ult, %240, %cst_16 : f32
%242 = arith.cmpf ugt, %240, %cst_15 : f32
%243 = arith.select %241, %cst_16, %240 : f32
%244 = arith.select %242, %cst_15, %243 : f32
%245 = arith.fptosi %244 : f32 to i8
%246 = arith.extsi %245 : i8 to i32
%247 = arith.sitofp %246 : i32 to f32
%248 = arith.mulf %247, %cst_10 : f32
linalg.yield %248 : f32
} -> tensor<1x65x65x576xf32>
flow.return %237 : tensor<1x65x65x576xf32>
}
%167 = tensor.empty() : tensor<65x65x160xf32>
%168 = tensor.empty() : tensor<1x65x65x160xf32>
%169 = linalg.fill ins(%cst_14 : f32) outs(%168 : tensor<1x65x65x160xf32>) -> tensor<1x65x65x160xf32>
%170 = tensor.empty() : tensor<1x65x65x160xi8>
%171 = flow.dispatch.region -> (tensor<1x65x65x160xi8>) {
%237 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%166, %cst_98 : tensor<1x65x65x576xf32>, tensor<1x1x576x160xf32>) outs(%169 : tensor<1x65x65x160xf32>) -> tensor<1x65x65x160xf32>
%238 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%237, %cst_41 : tensor<1x65x65x160xf32>, tensor<160xf32>) outs(%170 : tensor<1x65x65x160xi8>) {
^bb0(%in: f32, %in_229: f32, %out: i8):
%239 = arith.addf %in, %in_229 : f32
%240 = arith.divf %239, %cst_8 : f32
%241 = math.round %240 : f32
%242 = arith.addf %241, %cst_14 : f32
%243 = arith.cmpf ult, %242, %cst_16 : f32
%244 = arith.cmpf ugt, %242, %cst_15 : f32
%245 = arith.select %243, %cst_16, %242 : f32
%246 = arith.select %244, %cst_15, %245 : f32
%247 = arith.fptosi %246 : f32 to i8
linalg.yield %247 : i8
} -> tensor<1x65x65x160xi8>
flow.return %238 : tensor<1x65x65x160xi8>
}
%collapsed_196 = tensor.collapse_shape %171 [[0, 1], [2], [3]] : tensor<1x65x65x160xi8> into tensor<65x65x160xi8>
%172 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%collapsed_196 : tensor<65x65x160xi8>) outs(%167 : tensor<65x65x160xf32>) {
^bb0(%in: i8, %out: f32):
%237 = arith.extsi %in : i8 to i32
%238 = arith.sitofp %237 : i32 to f32
%239 = arith.mulf %238, %cst_8 : f32
linalg.yield %239 : f32
} -> tensor<65x65x160xf32>
%expanded_197 = tensor.expand_shape %172 [[0, 1], [2], [3]] output_shape [1, 65, 65, 160] : tensor<65x65x160xf32> into tensor<1x65x65x160xf32>
%173 = tensor.empty() : tensor<1x65x65x960xf32>
%174 = linalg.fill ins(%cst_14 : f32) outs(%173 : tensor<1x65x65x960xf32>) -> tensor<1x65x65x960xf32>
%175 = flow.dispatch.region -> (tensor<1x65x65x960xf32>) {
%237 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%expanded_197, %cst_99 : tensor<1x65x65x160xf32>, tensor<1x1x160x960xf32>) outs(%174 : tensor<1x65x65x960xf32>) -> tensor<1x65x65x960xf32>
%238 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%237, %cst_20 : tensor<1x65x65x960xf32>, tensor<960xf32>) outs(%173 : tensor<1x65x65x960xf32>) {
^bb0(%in: f32, %in_229: f32, %out: f32):
%239 = arith.addf %in, %in_229 : f32
%240 = arith.cmpf ult, %239, %cst_14 : f32
%241 = arith.select %240, %cst_14, %239 : f32
%242 = arith.cmpf ugt, %241, %cst_2 : f32
%243 = arith.select %242, %cst_2, %241 : f32
%244 = arith.divf %243, %cst_10 : f32
%245 = math.round %244 : f32
%246 = arith.addf %245, %cst_14 : f32
%247 = arith.cmpf ult, %246, %cst_16 : f32
%248 = arith.cmpf ugt, %246, %cst_15 : f32
%249 = arith.select %247, %cst_16, %246 : f32
%250 = arith.select %248, %cst_15, %249 : f32
%251 = arith.fptosi %250 : f32 to i8
%252 = arith.extsi %251 : i8 to i32
%253 = arith.sitofp %252 : i32 to f32
%254 = arith.mulf %253, %cst_10 : f32
linalg.yield %254 : f32
} -> tensor<1x65x65x960xf32>
flow.return %238 : tensor<1x65x65x960xf32>
}
%collapsed_198 = tensor.collapse_shape %175 [[0, 1], [2], [3]] : tensor<1x65x65x960xf32> into tensor<65x65x960xf32>
%176 = tensor.empty() : tensor<73x73x960xf32>
%177 = linalg.fill ins(%cst_14 : f32) outs(%176 : tensor<73x73x960xf32>) -> tensor<73x73x960xf32>
%inserted_slice_199 = tensor.insert_slice %collapsed_198 into %177[4, 4, 0] [65, 65, 960] [1, 1, 1] : tensor<65x65x960xf32> into tensor<73x73x960xf32>
%expanded_200 = tensor.expand_shape %inserted_slice_199 [[0], [1], [2, 3]] output_shape [73, 73, 1, 960] : tensor<73x73x960xf32> into tensor<73x73x1x960xf32>
%178 = tensor.empty() : tensor<1x960x73x73xf32>
%179 = flow.dispatch.region -> (tensor<1x960x73x73xf32>) {
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_200 : tensor<73x73x1x960xf32>) outs(%178 : tensor<1x960x73x73xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<1x960x73x73xf32>
flow.return %237 : tensor<1x960x73x73xf32>
}
%180 = tensor.empty() : tensor<1x960x65x65xf32>
%181 = linalg.fill ins(%cst_14 : f32) outs(%180 : tensor<1x960x65x65xf32>) -> tensor<1x960x65x65xf32>
%182 = flow.dispatch.region -> (tensor<1x960x65x65xf32>) {
%237 = linalg.depthwise_conv_2d_nchw_chw {dilations = dense<4> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%179, %cst_42 : tensor<1x960x73x73xf32>, tensor<960x3x3xf32>) outs(%181 : tensor<1x960x65x65xf32>) -> tensor<1x960x65x65xf32>
flow.return %237 : tensor<1x960x65x65xf32>
}
%183 = tensor.empty() : tensor<65x65x1x960xf32>
%184 = flow.dispatch.region -> (tensor<65x65x1x960xf32>) {
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%182 : tensor<1x960x65x65xf32>) outs(%183 : tensor<65x65x1x960xf32>) {
^bb0(%in: f32, %out: f32):
%238 = arith.cmpf ult, %in, %cst_14 : f32
%239 = arith.select %238, %cst_14, %in : f32
%240 = arith.cmpf ugt, %239, %cst_2 : f32
%241 = arith.select %240, %cst_2, %239 : f32
linalg.yield %241 : f32
} -> tensor<65x65x1x960xf32>
flow.return %237 : tensor<65x65x1x960xf32>
}
%collapsed_201 = tensor.collapse_shape %184 [[0], [1], [2, 3]] : tensor<65x65x1x960xf32> into tensor<65x65x960xf32>
%expanded_202 = tensor.expand_shape %collapsed_201 [[0, 1], [2], [3]] output_shape [1, 65, 65, 960] : tensor<65x65x960xf32> into tensor<1x65x65x960xf32>
%185 = flow.dispatch.region -> (tensor<1x65x65x960xf32>) {
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_202 : tensor<1x65x65x960xf32>) outs(%173 : tensor<1x65x65x960xf32>) {
^bb0(%in: f32, %out: f32):
%238 = arith.divf %in, %cst_10 : f32
%239 = math.round %238 : f32
%240 = arith.addf %239, %cst_14 : f32
%241 = arith.cmpf ult, %240, %cst_16 : f32
%242 = arith.cmpf ugt, %240, %cst_15 : f32
%243 = arith.select %241, %cst_16, %240 : f32
%244 = arith.select %242, %cst_15, %243 : f32
%245 = arith.fptosi %244 : f32 to i8
%246 = arith.extsi %245 : i8 to i32
%247 = arith.sitofp %246 : i32 to f32
%248 = arith.mulf %247, %cst_10 : f32
linalg.yield %248 : f32
} -> tensor<1x65x65x960xf32>
flow.return %237 : tensor<1x65x65x960xf32>
}
%186 = flow.dispatch.region -> (tensor<1x65x65x160xi8>) {
%237 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%185, %cst_100 : tensor<1x65x65x960xf32>, tensor<1x1x960x160xf32>) outs(%169 : tensor<1x65x65x160xf32>) -> tensor<1x65x65x160xf32>
%238 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_197, %237, %cst_43 : tensor<1x65x65x160xf32>, tensor<1x65x65x160xf32>, tensor<160xf32>) outs(%170 : tensor<1x65x65x160xi8>) {
^bb0(%in: f32, %in_229: f32, %in_230: f32, %out: i8):
%239 = arith.addf %in_229, %in_230 : f32
%240 = arith.divf %239, %cst_10 : f32
%241 = math.round %240 : f32
%242 = arith.addf %241, %cst_14 : f32
%243 = arith.cmpf ult, %242, %cst_16 : f32
%244 = arith.cmpf ugt, %242, %cst_15 : f32
%245 = arith.select %243, %cst_16, %242 : f32
%246 = arith.select %244, %cst_15, %245 : f32
%247 = arith.fptosi %246 : f32 to i8
%248 = arith.extsi %247 : i8 to i32
%249 = arith.sitofp %248 : i32 to f32
%250 = arith.mulf %249, %cst_10 : f32
%251 = arith.addf %in, %250 : f32
%252 = arith.divf %251, %cst_8 : f32
%253 = math.round %252 : f32
%254 = arith.addf %253, %cst_14 : f32
%255 = arith.cmpf ult, %254, %cst_16 : f32
%256 = arith.cmpf ugt, %254, %cst_15 : f32
%257 = arith.select %255, %cst_16, %254 : f32
%258 = arith.select %256, %cst_15, %257 : f32
%259 = arith.fptosi %258 : f32 to i8
linalg.yield %259 : i8
} -> tensor<1x65x65x160xi8>
flow.return %238 : tensor<1x65x65x160xi8>
}
%collapsed_203 = tensor.collapse_shape %186 [[0, 1], [2], [3]] : tensor<1x65x65x160xi8> into tensor<65x65x160xi8>
%187 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%collapsed_203 : tensor<65x65x160xi8>) outs(%167 : tensor<65x65x160xf32>) {
^bb0(%in: i8, %out: f32):
%237 = arith.extsi %in : i8 to i32
%238 = arith.sitofp %237 : i32 to f32
%239 = arith.mulf %238, %cst_8 : f32
linalg.yield %239 : f32
} -> tensor<65x65x160xf32>
%expanded_204 = tensor.expand_shape %187 [[0, 1], [2], [3]] output_shape [1, 65, 65, 160] : tensor<65x65x160xf32> into tensor<1x65x65x160xf32>
%188 = flow.dispatch.region -> (tensor<1x65x65x960xf32>) {
%237 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%expanded_204, %cst_101 : tensor<1x65x65x160xf32>, tensor<1x1x160x960xf32>) outs(%174 : tensor<1x65x65x960xf32>) -> tensor<1x65x65x960xf32>
%238 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%237, %cst_45 : tensor<1x65x65x960xf32>, tensor<960xf32>) outs(%173 : tensor<1x65x65x960xf32>) {
^bb0(%in: f32, %in_229: f32, %out: f32):
%239 = arith.addf %in, %in_229 : f32
%240 = arith.cmpf ult, %239, %cst_14 : f32
%241 = arith.select %240, %cst_14, %239 : f32
%242 = arith.cmpf ugt, %241, %cst_2 : f32
%243 = arith.select %242, %cst_2, %241 : f32
%244 = arith.divf %243, %cst_10 : f32
%245 = math.round %244 : f32
%246 = arith.addf %245, %cst_14 : f32
%247 = arith.cmpf ult, %246, %cst_16 : f32
%248 = arith.cmpf ugt, %246, %cst_15 : f32
%249 = arith.select %247, %cst_16, %246 : f32
%250 = arith.select %248, %cst_15, %249 : f32
%251 = arith.fptosi %250 : f32 to i8
%252 = arith.extsi %251 : i8 to i32
%253 = arith.sitofp %252 : i32 to f32
%254 = arith.mulf %253, %cst_10 : f32
linalg.yield %254 : f32
} -> tensor<1x65x65x960xf32>
flow.return %238 : tensor<1x65x65x960xf32>
}
%collapsed_205 = tensor.collapse_shape %188 [[0, 1], [2], [3]] : tensor<1x65x65x960xf32> into tensor<65x65x960xf32>
%inserted_slice_206 = tensor.insert_slice %collapsed_205 into %177[4, 4, 0] [65, 65, 960] [1, 1, 1] : tensor<65x65x960xf32> into tensor<73x73x960xf32>
%expanded_207 = tensor.expand_shape %inserted_slice_206 [[0], [1], [2, 3]] output_shape [73, 73, 1, 960] : tensor<73x73x960xf32> into tensor<73x73x1x960xf32>
%189 = flow.dispatch.region -> (tensor<1x960x73x73xf32>) {
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_207 : tensor<73x73x1x960xf32>) outs(%178 : tensor<1x960x73x73xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<1x960x73x73xf32>
flow.return %237 : tensor<1x960x73x73xf32>
}
%190 = flow.dispatch.region -> (tensor<1x960x65x65xf32>) {
%237 = linalg.depthwise_conv_2d_nchw_chw {dilations = dense<4> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%189, %cst_44 : tensor<1x960x73x73xf32>, tensor<960x3x3xf32>) outs(%181 : tensor<1x960x65x65xf32>) -> tensor<1x960x65x65xf32>
flow.return %237 : tensor<1x960x65x65xf32>
}
%191 = flow.dispatch.region -> (tensor<65x65x1x960xf32>) {
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%190 : tensor<1x960x65x65xf32>) outs(%183 : tensor<65x65x1x960xf32>) {
^bb0(%in: f32, %out: f32):
%238 = arith.cmpf ult, %in, %cst_14 : f32
%239 = arith.select %238, %cst_14, %in : f32
%240 = arith.cmpf ugt, %239, %cst_2 : f32
%241 = arith.select %240, %cst_2, %239 : f32
linalg.yield %241 : f32
} -> tensor<65x65x1x960xf32>
flow.return %237 : tensor<65x65x1x960xf32>
}
%collapsed_208 = tensor.collapse_shape %191 [[0], [1], [2, 3]] : tensor<65x65x1x960xf32> into tensor<65x65x960xf32>
%expanded_209 = tensor.expand_shape %collapsed_208 [[0, 1], [2], [3]] output_shape [1, 65, 65, 960] : tensor<65x65x960xf32> into tensor<1x65x65x960xf32>
%192 = flow.dispatch.region -> (tensor<1x65x65x960xf32>) {
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_209 : tensor<1x65x65x960xf32>) outs(%173 : tensor<1x65x65x960xf32>) {
^bb0(%in: f32, %out: f32):
%238 = arith.divf %in, %cst_10 : f32
%239 = math.round %238 : f32
%240 = arith.addf %239, %cst_14 : f32
%241 = arith.cmpf ult, %240, %cst_16 : f32
%242 = arith.cmpf ugt, %240, %cst_15 : f32
%243 = arith.select %241, %cst_16, %240 : f32
%244 = arith.select %242, %cst_15, %243 : f32
%245 = arith.fptosi %244 : f32 to i8
%246 = arith.extsi %245 : i8 to i32
%247 = arith.sitofp %246 : i32 to f32
%248 = arith.mulf %247, %cst_10 : f32
linalg.yield %248 : f32
} -> tensor<1x65x65x960xf32>
flow.return %237 : tensor<1x65x65x960xf32>
}
%193 = flow.dispatch.region -> (tensor<1x65x65x160xf32>) {
%237 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%192, %cst_102 : tensor<1x65x65x960xf32>, tensor<1x1x960x160xf32>) outs(%169 : tensor<1x65x65x160xf32>) -> tensor<1x65x65x160xf32>
%238 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_204, %237, %cst_46 : tensor<1x65x65x160xf32>, tensor<1x65x65x160xf32>, tensor<160xf32>) outs(%168 : tensor<1x65x65x160xf32>) {
^bb0(%in: f32, %in_229: f32, %in_230: f32, %out: f32):
%239 = arith.addf %in_229, %in_230 : f32
%240 = arith.divf %239, %cst_8 : f32
%241 = math.round %240 : f32
%242 = arith.addf %241, %cst_14 : f32
%243 = arith.cmpf ult, %242, %cst_16 : f32
%244 = arith.cmpf ugt, %242, %cst_15 : f32
%245 = arith.select %243, %cst_16, %242 : f32
%246 = arith.select %244, %cst_15, %245 : f32
%247 = arith.fptosi %246 : f32 to i8
%248 = arith.extsi %247 : i8 to i32
%249 = arith.sitofp %248 : i32 to f32
%250 = arith.mulf %249, %cst_8 : f32
%251 = arith.addf %in, %250 : f32
%252 = arith.divf %251, %cst_8 : f32
%253 = math.round %252 : f32
%254 = arith.addf %253, %cst_14 : f32
%255 = arith.cmpf ult, %254, %cst_16 : f32
%256 = arith.cmpf ugt, %254, %cst_15 : f32
%257 = arith.select %255, %cst_16, %254 : f32
%258 = arith.select %256, %cst_15, %257 : f32
%259 = arith.fptosi %258 : f32 to i8
%260 = arith.extsi %259 : i8 to i32
%261 = arith.sitofp %260 : i32 to f32
%262 = arith.mulf %261, %cst_8 : f32
linalg.yield %262 : f32
} -> tensor<1x65x65x160xf32>
flow.return %238 : tensor<1x65x65x160xf32>
}
%194 = flow.dispatch.region -> (tensor<1x65x65x960xf32>) {
%237 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%193, %cst_103 : tensor<1x65x65x160xf32>, tensor<1x1x160x960xf32>) outs(%174 : tensor<1x65x65x960xf32>) -> tensor<1x65x65x960xf32>
%238 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%237, %cst_48 : tensor<1x65x65x960xf32>, tensor<960xf32>) outs(%173 : tensor<1x65x65x960xf32>) {
^bb0(%in: f32, %in_229: f32, %out: f32):
%239 = arith.addf %in, %in_229 : f32
%240 = arith.cmpf ult, %239, %cst_14 : f32
%241 = arith.select %240, %cst_14, %239 : f32
%242 = arith.cmpf ugt, %241, %cst_2 : f32
%243 = arith.select %242, %cst_2, %241 : f32
%244 = arith.divf %243, %cst_10 : f32
%245 = math.round %244 : f32
%246 = arith.addf %245, %cst_14 : f32
%247 = arith.cmpf ult, %246, %cst_16 : f32
%248 = arith.cmpf ugt, %246, %cst_15 : f32
%249 = arith.select %247, %cst_16, %246 : f32
%250 = arith.select %248, %cst_15, %249 : f32
%251 = arith.fptosi %250 : f32 to i8
%252 = arith.extsi %251 : i8 to i32
%253 = arith.sitofp %252 : i32 to f32
%254 = arith.mulf %253, %cst_10 : f32
linalg.yield %254 : f32
} -> tensor<1x65x65x960xf32>
flow.return %238 : tensor<1x65x65x960xf32>
}
%collapsed_210 = tensor.collapse_shape %194 [[0, 1], [2], [3]] : tensor<1x65x65x960xf32> into tensor<65x65x960xf32>
%inserted_slice_211 = tensor.insert_slice %collapsed_210 into %177[4, 4, 0] [65, 65, 960] [1, 1, 1] : tensor<65x65x960xf32> into tensor<73x73x960xf32>
%expanded_212 = tensor.expand_shape %inserted_slice_211 [[0], [1], [2, 3]] output_shape [73, 73, 1, 960] : tensor<73x73x960xf32> into tensor<73x73x1x960xf32>
%195 = flow.dispatch.region -> (tensor<1x960x73x73xf32>) {
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_212 : tensor<73x73x1x960xf32>) outs(%178 : tensor<1x960x73x73xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<1x960x73x73xf32>
flow.return %237 : tensor<1x960x73x73xf32>
}
%196 = flow.dispatch.region -> (tensor<1x960x65x65xf32>) {
%237 = linalg.depthwise_conv_2d_nchw_chw {dilations = dense<4> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%195, %cst_47 : tensor<1x960x73x73xf32>, tensor<960x3x3xf32>) outs(%181 : tensor<1x960x65x65xf32>) -> tensor<1x960x65x65xf32>
flow.return %237 : tensor<1x960x65x65xf32>
}
%197 = flow.dispatch.region -> (tensor<65x65x1x960xf32>) {
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%196 : tensor<1x960x65x65xf32>) outs(%183 : tensor<65x65x1x960xf32>) {
^bb0(%in: f32, %out: f32):
%238 = arith.cmpf ult, %in, %cst_14 : f32
%239 = arith.select %238, %cst_14, %in : f32
%240 = arith.cmpf ugt, %239, %cst_2 : f32
%241 = arith.select %240, %cst_2, %239 : f32
linalg.yield %241 : f32
} -> tensor<65x65x1x960xf32>
flow.return %237 : tensor<65x65x1x960xf32>
}
%collapsed_213 = tensor.collapse_shape %197 [[0], [1], [2, 3]] : tensor<65x65x1x960xf32> into tensor<65x65x960xf32>
%expanded_214 = tensor.expand_shape %collapsed_213 [[0, 1], [2], [3]] output_shape [1, 65, 65, 960] : tensor<65x65x960xf32> into tensor<1x65x65x960xf32>
%198 = flow.dispatch.region -> (tensor<1x65x65x960xf32>) {
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_214 : tensor<1x65x65x960xf32>) outs(%173 : tensor<1x65x65x960xf32>) {
^bb0(%in: f32, %out: f32):
%238 = arith.divf %in, %cst_10 : f32
%239 = math.round %238 : f32
%240 = arith.addf %239, %cst_14 : f32
%241 = arith.cmpf ult, %240, %cst_16 : f32
%242 = arith.cmpf ugt, %240, %cst_15 : f32
%243 = arith.select %241, %cst_16, %240 : f32
%244 = arith.select %242, %cst_15, %243 : f32
%245 = arith.fptosi %244 : f32 to i8
%246 = arith.extsi %245 : i8 to i32
%247 = arith.sitofp %246 : i32 to f32
%248 = arith.mulf %247, %cst_10 : f32
linalg.yield %248 : f32
} -> tensor<1x65x65x960xf32>
flow.return %237 : tensor<1x65x65x960xf32>
}
%199 = tensor.empty() : tensor<65x65x320xf32>
%200 = tensor.empty() : tensor<1x65x65x320xf32>
%201 = linalg.fill ins(%cst_14 : f32) outs(%200 : tensor<1x65x65x320xf32>) -> tensor<1x65x65x320xf32>
%202 = tensor.empty() : tensor<1x65x65x320xi8>
%203 = flow.dispatch.region -> (tensor<1x65x65x320xi8>) {
%237 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%198, %cst_104 : tensor<1x65x65x960xf32>, tensor<1x1x960x320xf32>) outs(%201 : tensor<1x65x65x320xf32>) -> tensor<1x65x65x320xf32>
%238 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%237, %cst_49 : tensor<1x65x65x320xf32>, tensor<320xf32>) outs(%202 : tensor<1x65x65x320xi8>) {
^bb0(%in: f32, %in_229: f32, %out: i8):
%239 = arith.addf %in, %in_229 : f32
%240 = arith.divf %239, %cst_8 : f32
%241 = math.round %240 : f32
%242 = arith.addf %241, %cst_14 : f32
%243 = arith.cmpf ult, %242, %cst_16 : f32
%244 = arith.cmpf ugt, %242, %cst_15 : f32
%245 = arith.select %243, %cst_16, %242 : f32
%246 = arith.select %244, %cst_15, %245 : f32
%247 = arith.fptosi %246 : f32 to i8
linalg.yield %247 : i8
} -> tensor<1x65x65x320xi8>
flow.return %238 : tensor<1x65x65x320xi8>
}
%collapsed_215 = tensor.collapse_shape %203 [[0, 1], [2], [3]] : tensor<1x65x65x320xi8> into tensor<65x65x320xi8>
%204 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%collapsed_215 : tensor<65x65x320xi8>) outs(%199 : tensor<65x65x320xf32>) {
^bb0(%in: i8, %out: f32):
%237 = arith.extsi %in : i8 to i32
%238 = arith.sitofp %237 : i32 to f32
%239 = arith.mulf %238, %cst_8 : f32
linalg.yield %239 : f32
} -> tensor<65x65x320xf32>
%expanded_216 = tensor.expand_shape %204 [[0, 1], [2], [3]] output_shape [1, 65, 65, 320] : tensor<65x65x320xf32> into tensor<1x65x65x320xf32>
%205 = tensor.empty() : tensor<320x65x65xf32>
%206 = flow.dispatch.region -> (tensor<320x65x65xf32>) {
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d1, d2, d0)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%204 : tensor<65x65x320xf32>) outs(%205 : tensor<320x65x65xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<320x65x65xf32>
flow.return %237 : tensor<320x65x65xf32>
}
%expanded_217 = tensor.expand_shape %206 [[0, 1], [2], [3]] output_shape [1, 320, 65, 65] : tensor<320x65x65xf32> into tensor<1x320x65x65xf32>
%207 = tensor.empty() : tensor<1x320x1x1xf32>
%208 = linalg.fill ins(%cst_14 : f32) outs(%207 : tensor<1x320x1x1xf32>) -> tensor<1x320x1x1xf32>
%209 = tensor.empty() : tensor<65x65xf32>
%210 = flow.dispatch.region -> (tensor<1x320x1x1xf32>) {
%237 = linalg.pooling_nchw_sum {dilations = dense<1> : vector<2xi64>, strides = dense<65> : vector<2xi64>} ins(%expanded_217, %209 : tensor<1x320x65x65xf32>, tensor<65x65xf32>) outs(%208 : tensor<1x320x1x1xf32>) -> tensor<1x320x1x1xf32>
flow.return %237 : tensor<1x320x1x1xf32>
}
%collapsed_218 = tensor.collapse_shape %210 [[0, 1, 2, 3]] : tensor<1x320x1x1xf32> into tensor<320xf32>
%211 = tensor.empty() : tensor<1x65x65x256xf32>
%212 = linalg.fill ins(%cst_14 : f32) outs(%211 : tensor<1x65x65x256xf32>) -> tensor<1x65x65x256xf32>
%expanded_219 = tensor.expand_shape %collapsed_218 [[0, 1, 2, 3]] output_shape [1, 1, 1, 320] : tensor<320xf32> into tensor<1x1x1x320xf32>
%213 = tensor.empty() : tensor<1x1x1x320xf32>
%214 = flow.dispatch.region -> (tensor<1x1x1x320xf32>) {
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_219 : tensor<1x1x1x320xf32>) outs(%213 : tensor<1x1x1x320xf32>) {
^bb0(%in: f32, %out: f32):
%238 = arith.divf %in, %cst_17 : f32
%239 = arith.mulf %238, %cst_1 : f32
%240 = arith.divf %239, %cst_8 : f32
%241 = math.round %240 : f32
%242 = arith.addf %241, %cst_14 : f32
%243 = arith.cmpf ult, %242, %cst_16 : f32
%244 = arith.cmpf ugt, %242, %cst_15 : f32
%245 = arith.select %243, %cst_16, %242 : f32
%246 = arith.select %244, %cst_15, %245 : f32
%247 = arith.fptosi %246 : f32 to i8
%248 = arith.extsi %247 : i8 to i32
%249 = arith.sitofp %248 : i32 to f32
%250 = arith.mulf %249, %cst_8 : f32
linalg.yield %250 : f32
} -> tensor<1x1x1x320xf32>
flow.return %237 : tensor<1x1x1x320xf32>
}
%215 = tensor.empty() : tensor<256x65x65xf32>
%216 = tensor.empty() : tensor<256x1x65x65xf32>
%217 = flow.dispatch.region -> (tensor<256x1x65x65xf32>) {
%237 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%expanded_216, %cst_105 : tensor<1x65x65x320xf32>, tensor<1x1x320x256xf32>) outs(%212 : tensor<1x65x65x256xf32>) -> tensor<1x65x65x256xf32>
%238 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%237, %cst_24 : tensor<1x65x65x256xf32>, tensor<256xf32>) outs(%216 : tensor<256x1x65x65xf32>) {
^bb0(%in: f32, %in_229: f32, %out: f32):
%239 = arith.addf %in, %in_229 : f32
%240 = arith.cmpf ugt, %239, %cst_14 : f32
%241 = arith.select %240, %239, %cst_14 : f32
%242 = arith.divf %241, %cst_11 : f32
%243 = math.round %242 : f32
%244 = arith.addf %243, %cst_14 : f32
%245 = arith.cmpf ult, %244, %cst_16 : f32
%246 = arith.cmpf ugt, %244, %cst_15 : f32
%247 = arith.select %245, %cst_16, %244 : f32
%248 = arith.select %246, %cst_15, %247 : f32
%249 = arith.fptosi %248 : f32 to i8
%250 = arith.extsi %249 : i8 to i32
%251 = arith.sitofp %250 : i32 to f32
%252 = arith.mulf %251, %cst_11 : f32
linalg.yield %252 : f32
} -> tensor<256x1x65x65xf32>
flow.return %238 : tensor<256x1x65x65xf32>
}
%collapsed_220 = tensor.collapse_shape %217 [[0], [1, 2], [3]] : tensor<256x1x65x65xf32> into tensor<256x65x65xf32>
%218 = tensor.empty() : tensor<1x1x1x256xf32>
%219 = linalg.fill ins(%cst_14 : f32) outs(%218 : tensor<1x1x1x256xf32>) -> tensor<1x1x1x256xf32>
%220 = flow.dispatch.region -> (tensor<1x1x1x256xf32>) {
%237 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%214, %cst_106 : tensor<1x1x1x320xf32>, tensor<1x1x320x256xf32>) outs(%219 : tensor<1x1x1x256xf32>) -> tensor<1x1x1x256xf32>
flow.return %237 : tensor<1x1x1x256xf32>
}
%collapsed_221 = tensor.collapse_shape %220 [[0, 1, 2, 3]] : tensor<1x1x1x256xf32> into tensor<256xf32>
%expanded_222 = tensor.expand_shape %collapsed_221 [[0, 1, 2, 3]] output_shape [1, 256, 1, 1] : tensor<256xf32> into tensor<1x256x1x1xf32>
%221 = tensor.empty() : tensor<1x256x1x1xf32>
%222 = flow.dispatch.region -> (tensor<1x256x1x1xf32>) {
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_222, %cst_0 : tensor<1x256x1x1xf32>, tensor<1x256x1x1xf32>) outs(%221 : tensor<1x256x1x1xf32>) {
^bb0(%in: f32, %in_229: f32, %out: f32):
%238 = arith.addf %in, %in_229 : f32
%239 = arith.cmpf ugt, %238, %cst_14 : f32
%240 = arith.select %239, %238, %cst_14 : f32
%241 = arith.divf %240, %cst_11 : f32
%242 = math.round %241 : f32
%243 = arith.addf %242, %cst_14 : f32
%244 = arith.cmpf ult, %243, %cst_16 : f32
%245 = arith.cmpf ugt, %243, %cst_15 : f32
%246 = arith.select %244, %cst_16, %243 : f32
%247 = arith.select %245, %cst_15, %246 : f32
%248 = arith.fptosi %247 : f32 to i8
%249 = arith.extsi %248 : i8 to i32
%250 = arith.sitofp %249 : i32 to f32
%251 = arith.mulf %250, %cst_11 : f32
linalg.yield %251 : f32
} -> tensor<1x256x1x1xf32>
flow.return %237 : tensor<1x256x1x1xf32>
}
%223 = flow.dispatch.region -> (tensor<256x65x65xf32>) {
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} outs(%215 : tensor<256x65x65xf32>) {
^bb0(%out: f32):
%238 = linalg.index 0 : index
%239 = linalg.index 1 : index
%240 = linalg.index 2 : index
%241 = arith.index_cast %239 : index to i64
%242 = arith.sitofp %241 : i64 to f32
%243 = arith.addf %242, %cst_12 : f32
%244 = arith.divf %243, %cst_6 : f32
%245 = arith.subf %244, %cst_12 : f32
%246 = arith.maximumf %245, %cst_14 : f32
%247 = arith.minimumf %246, %cst_4 : f32
%248 = arith.minimumf %246, %cst_14 : f32
%249 = math.floor %247 : f32
%250 = arith.addf %247, %cst_13 : f32
%251 = math.floor %250 : f32
%252 = arith.fptosi %249 : f32 to i64
%253 = arith.index_cast %252 : i64 to index
%254 = arith.fptosi %251 : f32 to i64
%255 = arith.index_cast %254 : i64 to index
%256 = arith.index_cast %240 : index to i64
%257 = arith.sitofp %256 : i64 to f32
%258 = arith.addf %257, %cst_12 : f32
%259 = arith.divf %258, %cst_6 : f32
%260 = arith.subf %259, %cst_12 : f32
%261 = arith.maximumf %260, %cst_14 : f32
%262 = arith.minimumf %261, %cst_4 : f32
%263 = arith.minimumf %261, %cst_14 : f32
%264 = math.floor %262 : f32
%265 = arith.addf %262, %cst_13 : f32
%266 = math.floor %265 : f32
%267 = arith.fptosi %264 : f32 to i64
%268 = arith.index_cast %267 : i64 to index
%269 = arith.fptosi %266 : f32 to i64
%270 = arith.index_cast %269 : i64 to index
%extracted = tensor.extract %222[%c0, %238, %253, %268] : tensor<1x256x1x1xf32>
%extracted_229 = tensor.extract %222[%c0, %238, %253, %270] : tensor<1x256x1x1xf32>
%extracted_230 = tensor.extract %222[%c0, %238, %255, %268] : tensor<1x256x1x1xf32>
%extracted_231 = tensor.extract %222[%c0, %238, %255, %270] : tensor<1x256x1x1xf32>
%271 = arith.subf %251, %248 : f32
%272 = arith.subf %248, %249 : f32
%273 = arith.subf %266, %263 : f32
%274 = arith.subf %263, %264 : f32
%275 = arith.mulf %273, %extracted : f32
%276 = arith.mulf %274, %extracted_229 : f32
%277 = arith.addf %275, %276 : f32
%278 = arith.mulf %271, %277 : f32
%279 = arith.mulf %273, %extracted_230 : f32
%280 = arith.mulf %274, %extracted_231 : f32
%281 = arith.addf %279, %280 : f32
%282 = arith.mulf %272, %281 : f32
%283 = arith.addf %278, %282 : f32
%284 = arith.divf %283, %cst_11 : f32
%285 = math.round %284 : f32
%286 = arith.addf %285, %cst_14 : f32
%287 = arith.cmpf ult, %286, %cst_16 : f32
%288 = arith.cmpf ugt, %286, %cst_15 : f32
%289 = arith.select %287, %cst_16, %286 : f32
%290 = arith.select %288, %cst_15, %289 : f32
%291 = arith.fptosi %290 : f32 to i8
%292 = arith.extsi %291 : i8 to i32
%293 = arith.sitofp %292 : i32 to f32
%294 = arith.mulf %293, %cst_11 : f32
linalg.yield %294 : f32
} -> tensor<256x65x65xf32>
flow.return %237 : tensor<256x65x65xf32>
}
%224 = tensor.empty() : tensor<1x512x65x65xf32>
%inserted_slice_223 = tensor.insert_slice %223 into %224[0, 0, 0, 0] [1, 256, 65, 65] [1, 1, 1, 1] : tensor<256x65x65xf32> into tensor<1x512x65x65xf32>
%inserted_slice_224 = tensor.insert_slice %collapsed_220 into %inserted_slice_223[0, 256, 0, 0] [1, 256, 65, 65] [1, 1, 1, 1] : tensor<256x65x65xf32> into tensor<1x512x65x65xf32>
%collapsed_225 = tensor.collapse_shape %inserted_slice_224 [[0, 1], [2], [3]] : tensor<1x512x65x65xf32> into tensor<512x65x65xf32>
%expanded_226 = tensor.expand_shape %collapsed_225 [[0], [1, 2], [3]] output_shape [512, 1, 65, 65] : tensor<512x65x65xf32> into tensor<512x1x65x65xf32>
%225 = tensor.empty() : tensor<1x65x65x512xf32>
%226 = flow.dispatch.region -> (tensor<1x65x65x512xf32>) {
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_226 : tensor<512x1x65x65xf32>) outs(%225 : tensor<1x65x65x512xf32>) {
^bb0(%in: f32, %out: f32):
%238 = arith.divf %in, %cst_11 : f32
%239 = math.round %238 : f32
%240 = arith.addf %239, %cst_14 : f32
%241 = arith.cmpf ult, %240, %cst_16 : f32
%242 = arith.cmpf ugt, %240, %cst_15 : f32
%243 = arith.select %241, %cst_16, %240 : f32
%244 = arith.select %242, %cst_15, %243 : f32
%245 = arith.fptosi %244 : f32 to i8
%246 = arith.extsi %245 : i8 to i32
%247 = arith.sitofp %246 : i32 to f32
%248 = arith.mulf %247, %cst_11 : f32
linalg.yield %248 : f32
} -> tensor<1x65x65x512xf32>
flow.return %237 : tensor<1x65x65x512xf32>
}
%227 = flow.dispatch.region -> (tensor<1x65x65x256xf32>) {
%237 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%226, %cst_107 : tensor<1x65x65x512xf32>, tensor<1x1x512x256xf32>) outs(%212 : tensor<1x65x65x256xf32>) -> tensor<1x65x65x256xf32>
%238 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%237, %cst_71 : tensor<1x65x65x256xf32>, tensor<256xf32>) outs(%211 : tensor<1x65x65x256xf32>) {
^bb0(%in: f32, %in_229: f32, %out: f32):
%239 = arith.addf %in, %in_229 : f32
%240 = arith.cmpf ugt, %239, %cst_14 : f32
%241 = arith.select %240, %239, %cst_14 : f32
%242 = arith.divf %241, %cst_10 : f32
%243 = math.round %242 : f32
%244 = arith.addf %243, %cst_14 : f32
%245 = arith.cmpf ult, %244, %cst_16 : f32
%246 = arith.cmpf ugt, %244, %cst_15 : f32
%247 = arith.select %245, %cst_16, %244 : f32
%248 = arith.select %246, %cst_15, %247 : f32
%249 = arith.fptosi %248 : f32 to i8
%250 = arith.extsi %249 : i8 to i32
%251 = arith.sitofp %250 : i32 to f32
%252 = arith.mulf %251, %cst_10 : f32
linalg.yield %252 : f32
} -> tensor<1x65x65x256xf32>
flow.return %238 : tensor<1x65x65x256xf32>
}
%228 = tensor.empty() : tensor<1x65x65x21xf32>
%229 = linalg.fill ins(%cst_14 : f32) outs(%228 : tensor<1x65x65x21xf32>) -> tensor<1x65x65x21xf32>
%230 = flow.dispatch.region -> (tensor<1x65x65x21xf32>) {
%237 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%227, %cst_108 : tensor<1x65x65x256xf32>, tensor<1x1x256x21xf32>) outs(%229 : tensor<1x65x65x21xf32>) -> tensor<1x65x65x21xf32>
flow.return %237 : tensor<1x65x65x21xf32>
}
%collapsed_227 = tensor.collapse_shape %230 [[0, 1], [2], [3]] : tensor<1x65x65x21xf32> into tensor<65x65x21xf32>
%expanded_228 = tensor.expand_shape %collapsed_227 [[0], [1], [2, 3]] output_shape [65, 65, 1, 21] : tensor<65x65x21xf32> into tensor<65x65x1x21xf32>
%231 = tensor.empty() : tensor<1x21x65x65xi8>
%232 = flow.dispatch.region -> (tensor<1x21x65x65xi8>) {
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d2, d3, d0, d1)>, affine_map<(d0, d1, d2, d3) -> (d0, d1)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_228, %cst : tensor<65x65x1x21xf32>, tensor<1x21xf32>) outs(%231 : tensor<1x21x65x65xi8>) {
^bb0(%in: f32, %in_229: f32, %out: i8):
%238 = arith.addf %in, %in_229 : f32
%239 = arith.divf %238, %cst_9 : f32
%240 = math.round %239 : f32
%241 = arith.addf %240, %cst_14 : f32
%242 = arith.cmpf ult, %241, %cst_16 : f32
%243 = arith.cmpf ugt, %241, %cst_15 : f32
%244 = arith.select %242, %cst_16, %241 : f32
%245 = arith.select %243, %cst_15, %244 : f32
%246 = arith.fptosi %245 : f32 to i8
linalg.yield %246 : i8
} -> tensor<1x21x65x65xi8>
flow.return %237 : tensor<1x21x65x65xi8>
}
%233 = tensor.empty() : tensor<1x513x513x21xf32>
%234 = flow.dispatch.region -> (tensor<1x513x513x21xf32>) {
%237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} outs(%233 : tensor<1x513x513x21xf32>) {
^bb0(%out: f32):
%238 = linalg.index 0 : index
%239 = linalg.index 1 : index
%240 = linalg.index 2 : index
%241 = linalg.index 3 : index
%242 = affine.apply affine_map<(d0, d1) -> (d0 + d1 * 513)>(%239, %238)
%243 = arith.index_cast %242 : index to i64
%244 = arith.sitofp %243 : i64 to f32
%245 = arith.addf %244, %cst_12 : f32
%246 = arith.divf %245, %cst_5 : f32
%247 = arith.subf %246, %cst_12 : f32
%248 = arith.maximumf %247, %cst_14 : f32
%249 = arith.minimumf %248, %cst_3 : f32
%250 = math.floor %249 : f32
%251 = arith.addf %249, %cst_13 : f32
%252 = math.floor %251 : f32
%253 = arith.fptosi %250 : f32 to i64
%254 = arith.index_cast %253 : i64 to index
%255 = arith.fptosi %252 : f32 to i64
%256 = arith.index_cast %255 : i64 to index
%257 = arith.index_cast %240 : index to i64
%258 = arith.sitofp %257 : i64 to f32
%259 = arith.addf %258, %cst_12 : f32
%260 = arith.divf %259, %cst_5 : f32
%261 = arith.subf %260, %cst_12 : f32
%262 = arith.maximumf %261, %cst_14 : f32
%263 = arith.minimumf %262, %cst_3 : f32
%264 = math.floor %263 : f32
%265 = arith.addf %263, %cst_13 : f32
%266 = math.floor %265 : f32
%267 = arith.fptosi %264 : f32 to i64
%268 = arith.index_cast %267 : i64 to index
%269 = arith.fptosi %266 : f32 to i64
%270 = arith.index_cast %269 : i64 to index
%extracted = tensor.extract %232[%c0, %241, %254, %268] : tensor<1x21x65x65xi8>
%271 = arith.extsi %extracted : i8 to i32
%272 = arith.sitofp %271 : i32 to f32
%273 = arith.mulf %272, %cst_9 : f32
%extracted_229 = tensor.extract %232[%c0, %241, %254, %270] : tensor<1x21x65x65xi8>
%274 = arith.extsi %extracted_229 : i8 to i32
%275 = arith.sitofp %274 : i32 to f32
%276 = arith.mulf %275, %cst_9 : f32
%extracted_230 = tensor.extract %232[%c0, %241, %256, %268] : tensor<1x21x65x65xi8>
%277 = arith.extsi %extracted_230 : i8 to i32
%278 = arith.sitofp %277 : i32 to f32
%279 = arith.mulf %278, %cst_9 : f32
%extracted_231 = tensor.extract %232[%c0, %241, %256, %270] : tensor<1x21x65x65xi8>
%280 = arith.extsi %extracted_231 : i8 to i32
%281 = arith.sitofp %280 : i32 to f32
%282 = arith.mulf %281, %cst_9 : f32
%283 = arith.subf %252, %249 : f32
%284 = arith.subf %249, %250 : f32
%285 = arith.subf %266, %263 : f32
%286 = arith.subf %263, %264 : f32
%287 = arith.mulf %285, %273 : f32
%288 = arith.mulf %286, %276 : f32
%289 = arith.addf %287, %288 : f32
%290 = arith.mulf %283, %289 : f32
%291 = arith.mulf %285, %279 : f32
%292 = arith.mulf %286, %282 : f32
%293 = arith.addf %291, %292 : f32
%294 = arith.mulf %284, %293 : f32
%295 = arith.addf %290, %294 : f32
%296 = arith.divf %295, %cst_9 : f32
%297 = math.round %296 : f32
%298 = arith.addf %297, %cst_14 : f32
%299 = arith.cmpf ult, %298, %cst_16 : f32
%300 = arith.cmpf ugt, %298, %cst_15 : f32
%301 = arith.select %299, %cst_16, %298 : f32
%302 = arith.select %300, %cst_15, %301 : f32
%303 = arith.fptosi %302 : f32 to i8
%304 = arith.extsi %303 : i8 to i32
%305 = arith.sitofp %304 : i32 to f32
%306 = arith.mulf %305, %cst_9 : f32
%307 = arith.divf %306, %cst_9 : f32
%308 = math.round %307 : f32
%309 = arith.addf %308, %cst_14 : f32
%310 = arith.cmpf ult, %309, %cst_16 : f32
%311 = arith.cmpf ugt, %309, %cst_15 : f32
%312 = arith.select %310, %cst_16, %309 : f32
%313 = arith.select %311, %cst_15, %312 : f32
%314 = arith.fptosi %313 : f32 to i8
%315 = arith.extsi %314 : i8 to i32
%316 = arith.sitofp %315 : i32 to f32
%317 = arith.mulf %316, %cst_9 : f32
linalg.yield %317 : f32
} -> tensor<1x513x513x21xf32>
flow.return %237 : tensor<1x513x513x21xf32>
}
%235 = hal.tensor.barrier join(%234 : tensor<1x513x513x21xf32>) => %arg2 : !hal.fence
%236 = hal.tensor.export %235 : tensor<1x513x513x21xf32> -> !hal.buffer_view
util.return %236 : !hal.buffer_view
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment