Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save vedranmiletic/6be5ff2b34fb7817797183bd2606d551 to your computer and use it in GitHub Desktop.
Save vedranmiletic/6be5ff2b34fb7817797183bd2606d551 to your computer and use it in GitHub Desktop.
This file has been truncated, but you can view the full file.
:-) GROMACS - gmx mdrun, 2016.2-dev-20170105-4feb0be (-:
GROMACS is written by:
Emile Apol Rossen Apostolov Herman J.C. Berendsen Par Bjelkmar
Aldert van Buuren Rudi van Drunen Anton Feenstra Gerrit Groenhof
Christoph Junghans Anca Hamuraru Vincent Hindriksen Dimitrios Karkoulis
Peter Kasson Jiri Kraus Carsten Kutzner Per Larsson
Justin A. Lemkul Magnus Lundborg Pieter Meulenhoff Erik Marklund
Teemu Murtola Szilard Pall Sander Pronk Roland Schulz
Alexey Shvetsov Michael Shirts Alfons Sijbers Peter Tieleman
Teemu Virolainen Christian Wennberg Maarten Wolf
and the project leaders:
Mark Abraham, Berk Hess, Erik Lindahl, and David van der Spoel
Copyright (c) 1991-2000, University of Groningen, The Netherlands.
Copyright (c) 2001-2015, The GROMACS development team at
Uppsala University, Stockholm University and
the Royal Institute of Technology, Sweden.
check out http://www.gromacs.org for more information.
GROMACS is free software; you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License
as published by the Free Software Foundation; either version 2.1
of the License, or (at your option) any later version.
GROMACS: gmx mdrun, version 2016.2-dev-20170105-4feb0be
Executable: /usr/local/gromacs/bin/gmx
Data prefix: /usr/local/gromacs
Working dir: /home/vedranm/0001.5
Command line:
gmx mdrun -v
Back Off! I just backed up md.log to ./#md.log.90#
Running on 1 node with total 4 cores, 8 logical cores, 1 compatible GPU
Hardware detected:
CPU info:
Vendor: Intel
Brand: Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz
SIMD instructions most likely to fit this hardware: AVX_256
SIMD instructions selected at GROMACS compile time: AVX_256
Hardware topology: Basic
GPU info:
Number of GPUs detected: 1
#0: name: AMD HAWAII (DRM 2.48.0 / 4.9.6-200.fc25.x86_64, LLVM 5.0.0), vendor: AMD, device version: OpenCL 1.1 Mesa 17.1.0-devel (git-af303ab), stat: compatible
Reading file topol.tpr, VERSION 2016.2-dev-20161115-0f1ce5d (single precision)
Changing nstlist from 10 to 40, rlist from 1 to 1.101
Using 1 MPI thread
Using 8 OpenMP threads
1 compatible GPU is present, with ID 0
1 GPU auto-selected for this run.
Mapping of GPU ID to the 1 PP rank in this node: 0
.text
.section .AMDGPU.config
.long 47176
.long 11272192
.long 47180
.long 140
.long 47200
.long 0
.long 4
.long 0
.long 8
.long 0
.text
.globl test
.p2align 8
.type test,@function
.amdgpu_hsa_kernel test
test: ; @test
.amd_kernel_code_t
amd_code_version_major = 1
amd_code_version_minor = 0
amd_machine_kind = 1
amd_machine_version_major = 7
amd_machine_version_minor = 0
amd_machine_version_stepping = 1
kernel_code_entry_byte_offset = 256
kernel_code_prefetch_byte_size = 0
max_scratch_backing_memory_byte_size = 0
granulated_workitem_vgpr_count = 0
granulated_wavefront_sgpr_count = 0
priority = 0
float_mode = 192
priv = 0
enable_dx10_clamp = 1
debug_mode = 0
enable_ieee_mode = 1
enable_sgpr_private_segment_wave_byte_offset = 0
user_sgpr_count = 6
enable_sgpr_workgroup_id_x = 1
enable_sgpr_workgroup_id_y = 0
enable_sgpr_workgroup_id_z = 0
enable_sgpr_workgroup_info = 0
enable_vgpr_workitem_id = 0
enable_exception_msb = 0
granulated_lds_size = 0
enable_exception = 0
enable_sgpr_private_segment_buffer = 1
enable_sgpr_dispatch_ptr = 0
enable_sgpr_queue_ptr = 0
enable_sgpr_kernarg_segment_ptr = 1
enable_sgpr_dispatch_id = 0
enable_sgpr_flat_scratch_init = 0
enable_sgpr_private_segment_size = 0
enable_sgpr_grid_workgroup_count_x = 0
enable_sgpr_grid_workgroup_count_y = 0
enable_sgpr_grid_workgroup_count_z = 0
enable_ordered_append_gds = 0
private_element_size = 1
is_ptr64 = 1
is_dynamic_callstack = 0
is_debug_enabled = 0
is_xnack_enabled = 0
workitem_private_segment_byte_size = 0
workgroup_group_segment_byte_size = 0
gds_segment_byte_size = 0
kernarg_segment_byte_size = 24
workgroup_fbarrier_count = 0
wavefront_sgpr_count = 6
workitem_vgpr_count = 4
reserved_vgpr_first = 0
reserved_vgpr_count = 0
reserved_sgpr_first = 0
reserved_sgpr_count = 0
debug_wavefront_private_segment_offset_sgpr = 0
debug_private_segment_buffer_sgpr = 0
kernarg_segment_alignment = 4
group_segment_alignment = 4
private_segment_alignment = 4
wavefront_size = 6
call_convention = -1
runtime_loader_kernel_symbol = 0
.end_amd_kernel_code_t
; BB#0:
s_load_dwordx2 s[0:1], s[4:5], 0x0
v_mov_b32_e32 v1, 0
s_mov_b32 s3, 0xf000
s_mov_b32 s2, 0
v_lshl_b64 v[2:3], v[0:1], 2
s_waitcnt lgkmcnt(0)
buffer_store_dword v1, v[2:3], s[0:3], 0 addr64
s_endpgm
.Lfunc_end0:
.size test, .Lfunc_end0-test
.section .AMDGPU.csdata
; Kernel info:
; codeLenInByte = 44
; NumSgprs: 6
; NumVgprs: 4
; FloatMode: 192
; IeeeMode: 1
; ScratchSize: 0
; LDSByteSize: 0 bytes/workgroup (compile time only)
; SGPRBlocks: 0
; VGPRBlocks: 0
; NumSGPRsForWavesPerEU: 6
; NumVGPRsForWavesPerEU: 4
; ReservedVGPRFirst: 0
; ReservedVGPRCount: 0
; COMPUTE_PGM_RSRC2:USER_SGPR: 6
; COMPUTE_PGM_RSRC2:TGID_X_EN: 1
; COMPUTE_PGM_RSRC2:TGID_Y_EN: 0
; COMPUTE_PGM_RSRC2:TGID_Z_EN: 0
; COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0
.ident "clang version 5.0.0 (https://github.com/llvm-mirror/clang.git 0b6e3eba6801a3995dd16fc50952fbff3537537e) (https://github.com/llvm-mirror/llvm.git 2733db6fd1aa77bb055dc7d8359b067fa86fe229)"
.section ".note.GNU-stack"
.text
.section .AMDGPU.config
.long 47176
.long 11272257
.long 47180
.long 144
.long 47200
.long 0
.long 4
.long 0
.long 8
.long 0
.text
.globl memset_f3
.p2align 8
.type memset_f3,@function
.amdgpu_hsa_kernel memset_f3
memset_f3: ; @memset_f3
.amd_kernel_code_t
amd_code_version_major = 1
amd_code_version_minor = 0
amd_machine_kind = 1
amd_machine_version_major = 7
amd_machine_version_minor = 0
amd_machine_version_stepping = 1
kernel_code_entry_byte_offset = 256
kernel_code_prefetch_byte_size = 0
max_scratch_backing_memory_byte_size = 0
granulated_workitem_vgpr_count = 1
granulated_wavefront_sgpr_count = 1
priority = 0
float_mode = 192
priv = 0
enable_dx10_clamp = 1
debug_mode = 0
enable_ieee_mode = 1
enable_sgpr_private_segment_wave_byte_offset = 0
user_sgpr_count = 8
enable_sgpr_workgroup_id_x = 1
enable_sgpr_workgroup_id_y = 0
enable_sgpr_workgroup_id_z = 0
enable_sgpr_workgroup_info = 0
enable_vgpr_workitem_id = 0
enable_exception_msb = 0
granulated_lds_size = 0
enable_exception = 0
enable_sgpr_private_segment_buffer = 1
enable_sgpr_dispatch_ptr = 1
enable_sgpr_queue_ptr = 0
enable_sgpr_kernarg_segment_ptr = 1
enable_sgpr_dispatch_id = 0
enable_sgpr_flat_scratch_init = 0
enable_sgpr_private_segment_size = 0
enable_sgpr_grid_workgroup_count_x = 0
enable_sgpr_grid_workgroup_count_y = 0
enable_sgpr_grid_workgroup_count_z = 0
enable_ordered_append_gds = 0
private_element_size = 1
is_ptr64 = 1
is_dynamic_callstack = 0
is_debug_enabled = 0
is_xnack_enabled = 0
workitem_private_segment_byte_size = 0
workgroup_group_segment_byte_size = 0
gds_segment_byte_size = 0
kernarg_segment_byte_size = 32
workgroup_fbarrier_count = 0
wavefront_sgpr_count = 11
workitem_vgpr_count = 5
reserved_vgpr_first = 0
reserved_vgpr_count = 0
reserved_sgpr_first = 0
reserved_sgpr_count = 0
debug_wavefront_private_segment_offset_sgpr = 0
debug_private_segment_buffer_sgpr = 0
kernarg_segment_alignment = 4
group_segment_alignment = 4
private_segment_alignment = 4
wavefront_size = 6
call_convention = -1
runtime_loader_kernel_symbol = 0
.end_amd_kernel_code_t
; BB#0:
s_load_dword s0, s[4:5], 0x1
s_load_dword s1, s[6:7], 0x5
s_mov_b32 s2, 0
s_waitcnt lgkmcnt(0)
s_and_b32 s0, s0, 0xffff
s_mul_i32 s0, s0, s8
v_mov_b32_e32 v1, s0
s_load_dword s0, s[6:7], 0x3
v_add_i32_e32 v0, vcc, v0, v1
v_add_i32_e32 v0, vcc, s1, v0
s_waitcnt lgkmcnt(0)
v_cmp_gt_u32_e32 vcc, s0, v0
s_and_saveexec_b64 s[0:1], vcc
s_xor_b64 s[4:5], exec, s[0:1]
; mask branch BB0_2
BB0_1:
s_load_dwordx2 s[0:1], s[6:7], 0x0
s_load_dword s6, s[6:7], 0x2
v_mov_b32_e32 v1, 0
v_lshl_b64 v[3:4], v[0:1], 4
s_mov_b32 s3, 0xf000
s_waitcnt lgkmcnt(0)
v_mov_b32_e32 v0, s6
v_mov_b32_e32 v1, v0
v_mov_b32_e32 v2, v0
buffer_store_dwordx4 v[0:3], v[3:4], s[0:3], 0 addr64
s_waitcnt vmcnt(0) expcnt(0)
BB0_2:
s_or_b64 exec, exec, s[4:5]
s_endpgm
.Lfunc_end0:
.size memset_f3, .Lfunc_end0-memset_f3
.section .AMDGPU.csdata
; Kernel info:
; codeLenInByte = 124
; NumSgprs: 11
; NumVgprs: 5
; FloatMode: 192
; IeeeMode: 1
; ScratchSize: 0
; LDSByteSize: 0 bytes/workgroup (compile time only)
; SGPRBlocks: 1
; VGPRBlocks: 1
; NumSGPRsForWavesPerEU: 11
; NumVGPRsForWavesPerEU: 5
; ReservedVGPRFirst: 0
; ReservedVGPRCount: 0
; COMPUTE_PGM_RSRC2:USER_SGPR: 8
; COMPUTE_PGM_RSRC2:TGID_X_EN: 1
; COMPUTE_PGM_RSRC2:TGID_Y_EN: 0
; COMPUTE_PGM_RSRC2:TGID_Z_EN: 0
; COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0
.section .AMDGPU.config
.long 47176
.long 11272256
.long 47180
.long 144
.long 47200
.long 0
.long 4
.long 0
.long 8
.long 0
.text
.globl memset_f2
.p2align 8
.type memset_f2,@function
.amdgpu_hsa_kernel memset_f2
memset_f2: ; @memset_f2
.amd_kernel_code_t
amd_code_version_major = 1
amd_code_version_minor = 0
amd_machine_kind = 1
amd_machine_version_major = 7
amd_machine_version_minor = 0
amd_machine_version_stepping = 1
kernel_code_entry_byte_offset = 256
kernel_code_prefetch_byte_size = 0
max_scratch_backing_memory_byte_size = 0
granulated_workitem_vgpr_count = 0
granulated_wavefront_sgpr_count = 1
priority = 0
float_mode = 192
priv = 0
enable_dx10_clamp = 1
debug_mode = 0
enable_ieee_mode = 1
enable_sgpr_private_segment_wave_byte_offset = 0
user_sgpr_count = 8
enable_sgpr_workgroup_id_x = 1
enable_sgpr_workgroup_id_y = 0
enable_sgpr_workgroup_id_z = 0
enable_sgpr_workgroup_info = 0
enable_vgpr_workitem_id = 0
enable_exception_msb = 0
granulated_lds_size = 0
enable_exception = 0
enable_sgpr_private_segment_buffer = 1
enable_sgpr_dispatch_ptr = 1
enable_sgpr_queue_ptr = 0
enable_sgpr_kernarg_segment_ptr = 1
enable_sgpr_dispatch_id = 0
enable_sgpr_flat_scratch_init = 0
enable_sgpr_private_segment_size = 0
enable_sgpr_grid_workgroup_count_x = 0
enable_sgpr_grid_workgroup_count_y = 0
enable_sgpr_grid_workgroup_count_z = 0
enable_ordered_append_gds = 0
private_element_size = 1
is_ptr64 = 1
is_dynamic_callstack = 0
is_debug_enabled = 0
is_xnack_enabled = 0
workitem_private_segment_byte_size = 0
workgroup_group_segment_byte_size = 0
gds_segment_byte_size = 0
kernarg_segment_byte_size = 32
workgroup_fbarrier_count = 0
wavefront_sgpr_count = 11
workitem_vgpr_count = 4
reserved_vgpr_first = 0
reserved_vgpr_count = 0
reserved_sgpr_first = 0
reserved_sgpr_count = 0
debug_wavefront_private_segment_offset_sgpr = 0
debug_private_segment_buffer_sgpr = 0
kernarg_segment_alignment = 4
group_segment_alignment = 4
private_segment_alignment = 4
wavefront_size = 6
call_convention = -1
runtime_loader_kernel_symbol = 0
.end_amd_kernel_code_t
; BB#0:
s_load_dword s0, s[4:5], 0x1
s_load_dword s1, s[6:7], 0x5
s_mov_b32 s2, 0
s_waitcnt lgkmcnt(0)
s_and_b32 s0, s0, 0xffff
s_mul_i32 s0, s0, s8
v_mov_b32_e32 v1, s0
s_load_dword s0, s[6:7], 0x3
v_add_i32_e32 v0, vcc, v0, v1
v_add_i32_e32 v0, vcc, s1, v0
s_waitcnt lgkmcnt(0)
v_cmp_gt_u32_e32 vcc, s0, v0
s_and_saveexec_b64 s[0:1], vcc
s_xor_b64 s[4:5], exec, s[0:1]
; mask branch BB1_2
BB1_1:
s_load_dwordx2 s[0:1], s[6:7], 0x0
s_load_dword s6, s[6:7], 0x2
v_mov_b32_e32 v1, 0
s_mov_b32 s3, 0xf000
v_lshl_b64 v[0:1], v[0:1], 3
s_waitcnt lgkmcnt(0)
v_mov_b32_e32 v2, s6
v_mov_b32_e32 v3, v2
buffer_store_dwordx2 v[2:3], v[0:1], s[0:3], 0 addr64
s_waitcnt vmcnt(0) expcnt(0)
BB1_2:
s_or_b64 exec, exec, s[4:5]
s_endpgm
.Lfunc_end1:
.size memset_f2, .Lfunc_end1-memset_f2
.section .AMDGPU.csdata
; Kernel info:
; codeLenInByte = 120
; NumSgprs: 11
; NumVgprs: 4
; FloatMode: 192
; IeeeMode: 1
; ScratchSize: 0
; LDSByteSize: 0 bytes/workgroup (compile time only)
; SGPRBlocks: 1
; VGPRBlocks: 0
; NumSGPRsForWavesPerEU: 11
; NumVGPRsForWavesPerEU: 4
; ReservedVGPRFirst: 0
; ReservedVGPRCount: 0
; COMPUTE_PGM_RSRC2:USER_SGPR: 8
; COMPUTE_PGM_RSRC2:TGID_X_EN: 1
; COMPUTE_PGM_RSRC2:TGID_Y_EN: 0
; COMPUTE_PGM_RSRC2:TGID_Z_EN: 0
; COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0
.section .AMDGPU.config
.long 47176
.long 11272256
.long 47180
.long 144
.long 47200
.long 0
.long 4
.long 0
.long 8
.long 0
.text
.globl memset_f
.p2align 8
.type memset_f,@function
.amdgpu_hsa_kernel memset_f
memset_f: ; @memset_f
.amd_kernel_code_t
amd_code_version_major = 1
amd_code_version_minor = 0
amd_machine_kind = 1
amd_machine_version_major = 7
amd_machine_version_minor = 0
amd_machine_version_stepping = 1
kernel_code_entry_byte_offset = 256
kernel_code_prefetch_byte_size = 0
max_scratch_backing_memory_byte_size = 0
granulated_workitem_vgpr_count = 0
granulated_wavefront_sgpr_count = 1
priority = 0
float_mode = 192
priv = 0
enable_dx10_clamp = 1
debug_mode = 0
enable_ieee_mode = 1
enable_sgpr_private_segment_wave_byte_offset = 0
user_sgpr_count = 8
enable_sgpr_workgroup_id_x = 1
enable_sgpr_workgroup_id_y = 0
enable_sgpr_workgroup_id_z = 0
enable_sgpr_workgroup_info = 0
enable_vgpr_workitem_id = 0
enable_exception_msb = 0
granulated_lds_size = 0
enable_exception = 0
enable_sgpr_private_segment_buffer = 1
enable_sgpr_dispatch_ptr = 1
enable_sgpr_queue_ptr = 0
enable_sgpr_kernarg_segment_ptr = 1
enable_sgpr_dispatch_id = 0
enable_sgpr_flat_scratch_init = 0
enable_sgpr_private_segment_size = 0
enable_sgpr_grid_workgroup_count_x = 0
enable_sgpr_grid_workgroup_count_y = 0
enable_sgpr_grid_workgroup_count_z = 0
enable_ordered_append_gds = 0
private_element_size = 1
is_ptr64 = 1
is_dynamic_callstack = 0
is_debug_enabled = 0
is_xnack_enabled = 0
workitem_private_segment_byte_size = 0
workgroup_group_segment_byte_size = 0
gds_segment_byte_size = 0
kernarg_segment_byte_size = 32
workgroup_fbarrier_count = 0
wavefront_sgpr_count = 11
workitem_vgpr_count = 3
reserved_vgpr_first = 0
reserved_vgpr_count = 0
reserved_sgpr_first = 0
reserved_sgpr_count = 0
debug_wavefront_private_segment_offset_sgpr = 0
debug_private_segment_buffer_sgpr = 0
kernarg_segment_alignment = 4
group_segment_alignment = 4
private_segment_alignment = 4
wavefront_size = 6
call_convention = -1
runtime_loader_kernel_symbol = 0
.end_amd_kernel_code_t
; BB#0:
s_load_dword s0, s[4:5], 0x1
s_load_dword s1, s[6:7], 0x5
s_mov_b32 s2, 0
s_waitcnt lgkmcnt(0)
s_and_b32 s0, s0, 0xffff
s_mul_i32 s0, s0, s8
v_mov_b32_e32 v1, s0
s_load_dword s0, s[6:7], 0x3
v_add_i32_e32 v0, vcc, v0, v1
v_add_i32_e32 v0, vcc, s1, v0
s_waitcnt lgkmcnt(0)
v_cmp_gt_u32_e32 vcc, s0, v0
s_and_saveexec_b64 s[0:1], vcc
s_xor_b64 s[4:5], exec, s[0:1]
; mask branch BB2_2
BB2_1:
s_load_dwordx2 s[0:1], s[6:7], 0x0
s_load_dword s6, s[6:7], 0x2
v_mov_b32_e32 v1, 0
s_mov_b32 s3, 0xf000
v_lshl_b64 v[0:1], v[0:1], 2
s_waitcnt lgkmcnt(0)
v_mov_b32_e32 v2, s6
buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
s_waitcnt vmcnt(0) expcnt(0)
BB2_2:
s_or_b64 exec, exec, s[4:5]
s_endpgm
.Lfunc_end2:
.size memset_f, .Lfunc_end2-memset_f
.section .AMDGPU.csdata
; Kernel info:
; codeLenInByte = 116
; NumSgprs: 11
; NumVgprs: 3
; FloatMode: 192
; IeeeMode: 1
; ScratchSize: 0
; LDSByteSize: 0 bytes/workgroup (compile time only)
; SGPRBlocks: 1
; VGPRBlocks: 0
; NumSGPRsForWavesPerEU: 11
; NumVGPRsForWavesPerEU: 3
; ReservedVGPRFirst: 0
; ReservedVGPRCount: 0
; COMPUTE_PGM_RSRC2:USER_SGPR: 8
; COMPUTE_PGM_RSRC2:TGID_X_EN: 1
; COMPUTE_PGM_RSRC2:TGID_Y_EN: 0
; COMPUTE_PGM_RSRC2:TGID_Z_EN: 0
; COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0
.section .AMDGPU.config
.long 47176
.long 11272256
.long 47180
.long 144
.long 47200
.long 0
.long 4
.long 0
.long 8
.long 0
.text
.globl zero_e_fshift
.p2align 8
.type zero_e_fshift,@function
.amdgpu_hsa_kernel zero_e_fshift
zero_e_fshift: ; @zero_e_fshift
.amd_kernel_code_t
amd_code_version_major = 1
amd_code_version_minor = 0
amd_machine_kind = 1
amd_machine_version_major = 7
amd_machine_version_minor = 0
amd_machine_version_stepping = 1
kernel_code_entry_byte_offset = 256
kernel_code_prefetch_byte_size = 0
max_scratch_backing_memory_byte_size = 0
granulated_workitem_vgpr_count = 0
granulated_wavefront_sgpr_count = 1
priority = 0
float_mode = 192
priv = 0
enable_dx10_clamp = 1
debug_mode = 0
enable_ieee_mode = 1
enable_sgpr_private_segment_wave_byte_offset = 0
user_sgpr_count = 8
enable_sgpr_workgroup_id_x = 1
enable_sgpr_workgroup_id_y = 0
enable_sgpr_workgroup_id_z = 0
enable_sgpr_workgroup_info = 0
enable_vgpr_workitem_id = 0
enable_exception_msb = 0
granulated_lds_size = 0
enable_exception = 0
enable_sgpr_private_segment_buffer = 1
enable_sgpr_dispatch_ptr = 1
enable_sgpr_queue_ptr = 0
enable_sgpr_kernarg_segment_ptr = 1
enable_sgpr_dispatch_id = 0
enable_sgpr_flat_scratch_init = 0
enable_sgpr_private_segment_size = 0
enable_sgpr_grid_workgroup_count_x = 0
enable_sgpr_grid_workgroup_count_y = 0
enable_sgpr_grid_workgroup_count_z = 0
enable_ordered_append_gds = 0
private_element_size = 1
is_ptr64 = 1
is_dynamic_callstack = 0
is_debug_enabled = 0
is_xnack_enabled = 0
workitem_private_segment_byte_size = 0
workgroup_group_segment_byte_size = 0
gds_segment_byte_size = 0
kernarg_segment_byte_size = 44
workgroup_fbarrier_count = 0
wavefront_sgpr_count = 14
workitem_vgpr_count = 4
reserved_vgpr_first = 0
reserved_vgpr_count = 0
reserved_sgpr_first = 0
reserved_sgpr_count = 0
debug_wavefront_private_segment_offset_sgpr = 0
debug_private_segment_buffer_sgpr = 0
kernarg_segment_alignment = 4
group_segment_alignment = 4
private_segment_alignment = 4
wavefront_size = 6
call_convention = -1
runtime_loader_kernel_symbol = 0
.end_amd_kernel_code_t
; BB#0:
s_load_dword s0, s[4:5], 0x1
s_load_dword s1, s[6:7], 0x8
s_mov_b32 s2, 0
s_waitcnt lgkmcnt(0)
s_and_b32 s0, s0, 0xffff
s_mul_i32 s0, s0, s8
v_mov_b32_e32 v1, s0
s_load_dword s0, s[6:7], 0x6
v_add_i32_e32 v0, vcc, v0, v1
v_add_i32_e32 v0, vcc, s1, v0
s_waitcnt lgkmcnt(0)
v_cmp_gt_u32_e32 vcc, s0, v0
s_and_saveexec_b64 s[0:1], vcc
s_xor_b64 s[4:5], exec, s[0:1]
; mask branch BB3_2
BB3_1:
s_load_dwordx2 s[0:1], s[6:7], 0x0
v_mov_b32_e32 v1, 0
s_mov_b32 s3, 0xf000
v_lshl_b64 v[2:3], v[0:1], 2
s_waitcnt lgkmcnt(0)
buffer_store_dword v1, v[2:3], s[0:3], 0 addr64
s_waitcnt vmcnt(0) expcnt(0)
BB3_2:
s_or_b64 exec, exec, s[4:5]
v_cmp_eq_u32_e32 vcc, 0, v0
s_and_saveexec_b64 s[0:1], vcc
s_xor_b64 s[0:1], exec, s[0:1]
; mask branch BB3_4
BB3_3:
s_load_dwordx2 s[8:9], s[6:7], 0x2
s_load_dwordx2 s[4:5], s[6:7], 0x4
s_mov_b32 s11, 0xf000
s_mov_b32 s10, -1
v_mov_b32_e32 v0, 0
s_mov_b32 s6, s10
s_mov_b32 s7, s11
s_waitcnt lgkmcnt(0)
buffer_store_dword v0, off, s[8:11], 0
buffer_store_dword v0, off, s[4:7], 0
s_waitcnt vmcnt(0) expcnt(0)
BB3_4:
s_or_b64 exec, exec, s[0:1]
s_endpgm
.Lfunc_end3:
.size zero_e_fshift, .Lfunc_end3-zero_e_fshift
.section .AMDGPU.csdata
; Kernel info:
; codeLenInByte = 180
; NumSgprs: 14
; NumVgprs: 4
; FloatMode: 192
; IeeeMode: 1
; ScratchSize: 0
; LDSByteSize: 0 bytes/workgroup (compile time only)
; SGPRBlocks: 1
; VGPRBlocks: 0
; NumSGPRsForWavesPerEU: 14
; NumVGPRsForWavesPerEU: 4
; ReservedVGPRFirst: 0
; ReservedVGPRCount: 0
; COMPUTE_PGM_RSRC2:USER_SGPR: 8
; COMPUTE_PGM_RSRC2:TGID_X_EN: 1
; COMPUTE_PGM_RSRC2:TGID_Y_EN: 0
; COMPUTE_PGM_RSRC2:TGID_Z_EN: 0
; COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0
.section .AMDGPU.config
.long 47176
.long 11272532
.long 47180
.long 2192
.long 47200
.long 0
.long 4
.long 0
.long 8
.long 0
.text
.globl nbnxn_kernel_ElecEw_VdwLJCombGeom_F_opencl
.p2align 8
.type nbnxn_kernel_ElecEw_VdwLJCombGeom_F_opencl,@function
.amdgpu_hsa_kernel nbnxn_kernel_ElecEw_VdwLJCombGeom_F_opencl
nbnxn_kernel_ElecEw_VdwLJCombGeom_F_opencl: ; @nbnxn_kernel_ElecEw_VdwLJCombGeom_F_opencl
.amd_kernel_code_t
amd_code_version_major = 1
amd_code_version_minor = 0
amd_machine_kind = 1
amd_machine_version_major = 7
amd_machine_version_minor = 0
amd_machine_version_stepping = 1
kernel_code_entry_byte_offset = 256
kernel_code_prefetch_byte_size = 0
max_scratch_backing_memory_byte_size = 0
granulated_workitem_vgpr_count = 20
granulated_wavefront_sgpr_count = 5
priority = 0
float_mode = 192
priv = 0
enable_dx10_clamp = 1
debug_mode = 0
enable_ieee_mode = 1
enable_sgpr_private_segment_wave_byte_offset = 0
user_sgpr_count = 8
enable_sgpr_workgroup_id_x = 1
enable_sgpr_workgroup_id_y = 0
enable_sgpr_workgroup_id_z = 0
enable_sgpr_workgroup_info = 0
enable_vgpr_workitem_id = 1
enable_exception_msb = 0
granulated_lds_size = 0
enable_exception = 0
enable_sgpr_private_segment_buffer = 1
enable_sgpr_dispatch_ptr = 1
enable_sgpr_queue_ptr = 0
enable_sgpr_kernarg_segment_ptr = 1
enable_sgpr_dispatch_id = 0
enable_sgpr_flat_scratch_init = 0
enable_sgpr_private_segment_size = 0
enable_sgpr_grid_workgroup_count_x = 0
enable_sgpr_grid_workgroup_count_y = 0
enable_sgpr_grid_workgroup_count_z = 0
enable_ordered_append_gds = 0
private_element_size = 1
is_ptr64 = 1
is_dynamic_callstack = 0
is_debug_enabled = 0
is_xnack_enabled = 0
workitem_private_segment_byte_size = 0
workgroup_group_segment_byte_size = 0
gds_segment_byte_size = 0
kernarg_segment_byte_size = 232
workgroup_fbarrier_count = 0
wavefront_sgpr_count = 42
workitem_vgpr_count = 83
reserved_vgpr_first = 0
reserved_vgpr_count = 0
reserved_sgpr_first = 0
reserved_sgpr_count = 0
debug_wavefront_private_segment_offset_sgpr = 0
debug_private_segment_buffer_sgpr = 0
kernarg_segment_alignment = 4
group_segment_alignment = 4
private_segment_alignment = 4
wavefront_size = 6
call_convention = -1
runtime_loader_kernel_symbol = 0
.end_amd_kernel_code_t
; BB#0:
s_load_dwordx2 s[0:1], s[6:7], 0x2c
s_mov_b32 s9, 0
s_lshl_b64 s[10:11], s[8:9], 4
v_mov_b32_e32 v3, s10
s_mov_b32 s2, s9
s_mov_b32 s3, 0xf000
v_mov_b32_e32 v4, s11
s_waitcnt lgkmcnt(0)
buffer_load_dwordx4 v[31:34], v[3:4], s[0:3], 0 addr64
v_mov_b32_e32 v2, v0
s_load_dwordx2 s[8:9], s[6:7], 0x24
s_load_dwordx2 s[16:17], s[6:7], 0x18
s_mov_b64 s[18:19], s[2:3]
s_mov_b64 s[10:11], s[2:3]
s_load_dword s14, s[6:7], 0x33
s_load_dword s0, s[6:7], 0x2
s_load_dwordx2 s[20:21], s[6:7], 0x22
s_mov_b32 m0, -1
s_mov_b64 s[22:23], s[2:3]
s_load_dword s1, s[4:5], 0x1
s_waitcnt lgkmcnt(0)
s_add_i32 s15, s14, 0x420
s_waitcnt vmcnt(0)
v_lshlrev_b32_e32 v40, 3, v31
v_mul_lo_i32 v4, v32, 3
v_add_i32_e32 v0, vcc, v1, v40
v_lshlrev_b32_e32 v0, 3, v0
v_add_i32_e32 v9, vcc, v2, v0
v_ashrrev_i32_e32 v10, 31, v9
v_ashrrev_i32_e32 v5, 31, v4
v_lshl_b64 v[11:12], v[4:5], 2
v_lshl_b64 v[6:7], v[9:10], 4
buffer_load_dwordx4 v[5:8], v[6:7], s[16:19], 0 addr64
buffer_load_dwordx2 v[13:14], v[11:12], s[8:11], 0 addr64
buffer_load_dword v0, v[11:12], s[8:11], 0 addr64 offset:8
s_waitcnt vmcnt(1)
v_add_f32_e32 v11, v5, v13
s_waitcnt vmcnt(0)
v_add_f32_e32 v5, v7, v0
v_lshlrev_b32_e32 v0, 3, v1
v_add_i32_e32 v39, vcc, v2, v0
v_lshlrev_b32_e32 v3, 4, v39
v_add_f32_e32 v12, v6, v14
v_mul_f32_e32 v6, s0, v8
v_add_i32_e32 v3, vcc, s14, v3
ds_write2_b64 v3, v[11:12], v[5:6] offset1:1
s_waitcnt lgkmcnt(0)
v_lshl_b64 v[5:6], v[9:10], 3
buffer_load_dwordx2 v[5:6], v[5:6], s[20:23], 0 addr64
s_and_b32 s0, s1, 0xffff
v_mad_u32_u24 v52, s0, v1, v2
v_lshlrev_b32_e32 v7, 3, v39
v_add_i32_e32 v7, vcc, s15, v7
v_or_b32_e32 v3, 32, v52
v_lshrrev_b32_e32 v41, 5, v52
v_cmp_eq_u32_e32 vcc, 32, v3
s_waitcnt vmcnt(0)
ds_write_b64 v7, v[5:6]
s_and_saveexec_b64 s[0:1], vcc
s_xor_b64 s[0:1], exec, s[0:1]
s_waitcnt lgkmcnt(0)
; mask branch BB4_2
BB4_1:
v_lshlrev_b32_e32 v3, 2, v41
v_add_i32_e32 v3, vcc, s14, v3
v_mov_b32_e32 v5, 0
s_mov_b32 m0, -1
ds_write_b32 v3, v5 offset:2336
s_waitcnt lgkmcnt(0)
BB4_2: ; %.preheader447587
s_or_b64 exec, exec, s[0:1]
s_barrier
s_load_dwordx2 s[12:13], s[6:7], 0x1a
v_cmp_lt_i32_e32 vcc, v33, v34
s_and_b64 vcc, exec, vcc
v_mov_b32_e32 v12, -1
s_waitcnt lgkmcnt(0)
s_mov_b64 vcc, vcc
s_cbranch_vccnz BB4_4
; BB#3: ; %.preheader447587.._crit_edge_crit_edge
v_mov_b32_e32 v43, 0
v_lshlrev_b32_e32 v3, 2, v52
v_mov_b32_e32 v44, v43
v_mov_b32_e32 v45, v43
v_add_i32_e32 v3, vcc, s14, v3
v_mov_b32_e32 v8, v43
v_add_i32_e32 v5, vcc, 0x620, v3
v_add_i32_e32 v6, vcc, 0x720, v3
v_add_i32_e32 v7, vcc, 0x820, v3
v_mov_b32_e32 v12, 0
v_mov_b32_e32 v9, v44
v_mov_b32_e32 v10, v45
v_mov_b32_e32 v11, v46
s_branch BB4_5
BB4_4:
; implicit-def: %VGPR43_VGPR44_VGPR45_VGPR46
; implicit-def: %VGPR5
; implicit-def: %VGPR6
; implicit-def: %VGPR7
; implicit-def: %VGPR8_VGPR9_VGPR10_VGPR11
BB4_5: ; %Flow1190
s_load_dwordx2 s[8:9], s[6:7], 0x20
v_cmp_ne_u32_e32 vcc, 0, v12
v_cndmask_b32_e64 v11, 0, 1, vcc
v_cmp_ne_u32_e32 vcc, 1, v11
v_mov_b32_e32 v35, v43
v_mov_b32_e32 v27, v43
v_mov_b32_e32 v23, v43
v_mov_b32_e32 v19, v43
v_mov_b32_e32 v15, v43
v_mov_b32_e32 v11, v43
s_movk_i32 s18, 0x620
v_mov_b32_e32 v3, 0
s_add_i32 s4, s14, s18
s_and_b64 vcc, exec, vcc
v_mov_b32_e32 v36, v44
v_mov_b32_e32 v37, v45
v_mov_b32_e32 v38, v46
v_mov_b32_e32 v28, v44
v_mov_b32_e32 v29, v45
v_mov_b32_e32 v30, v46
v_mov_b32_e32 v24, v44
v_mov_b32_e32 v25, v45
v_mov_b32_e32 v26, v46
v_mov_b32_e32 v20, v44
v_mov_b32_e32 v21, v45
v_mov_b32_e32 v22, v46
v_mov_b32_e32 v16, v44
v_mov_b32_e32 v17, v45
v_mov_b32_e32 v18, v46
v_mov_b32_e32 v12, v44
v_mov_b32_e32 v13, v45
v_mov_b32_e32 v14, v46
s_waitcnt lgkmcnt(0)
s_mov_b64 vcc, vcc
s_cbranch_vccnz BB4_172
; BB#6: ; %.lr.ph
v_or_b32_e32 v5, 4, v1
v_cmp_eq_u32_e32 vcc, 4, v5
v_cmp_gt_u32_e64 s[0:1], 4, v2
s_and_b64 s[26:27], s[0:1], vcc
v_add_i32_e32 v5, vcc, v1, v2
v_and_b32_e32 v8, 4, v1
s_add_i32 s22, s14, 0x400
v_lshlrev_b32_e32 v5, 2, v5
v_lshlrev_b32_e32 v8, 2, v8
v_add_i32_e32 v50, vcc, s22, v5
v_lshlrev_b32_e32 v5, 2, v52
v_and_b32_e32 v48, 31, v52
v_add_i32_e32 v52, vcc, s22, v8
v_lshlrev_b32_e32 v8, 4, v2
s_load_dword s19, s[6:7], 0x5
v_add_i32_e32 v53, vcc, s14, v8
v_lshlrev_b32_e32 v8, 3, v2
v_add_i32_e32 v54, vcc, s15, v8
v_mov_b32_e32 v8, 0
s_load_dwordx2 s[10:11], s[6:7], 0x30
s_load_dword s5, s[6:7], 0x9
s_load_dwordx2 s[24:25], s[6:7], 0x2e
v_mov_b32_e32 v9, v8
v_mov_b32_e32 v10, v8
v_mov_b32_e32 v14, v11
v_mov_b32_e32 v13, v10
v_mov_b32_e32 v12, v9
v_mov_b32_e32 v11, v8
v_add_i32_e32 v7, vcc, s14, v5
v_mov_b32_e32 v18, v11
v_mov_b32_e32 v22, v11
v_mov_b32_e32 v26, v11
v_mov_b32_e32 v30, v11
v_mov_b32_e32 v38, v11
v_mov_b32_e32 v46, v11
s_waitcnt lgkmcnt(0)
v_mul_f32_e64 v47, s19, s19
v_mov_b32_e32 v42, 0
s_mov_b32 s30, 0
v_mov_b32_e32 v49, v42
v_cmp_gt_u32_e64 s[0:1], v1, v2
v_cmp_ne_u32_e64 s[2:3], 22, v32
v_mul_f32_e32 v51, s19, v47
v_add_i32_e32 v5, vcc, s18, v7
v_add_i32_e32 v6, vcc, 0x720, v7
v_add_i32_e32 v7, vcc, 0x820, v7
s_mov_b32 s31, 0xf000
s_mov_b64 s[28:29], 0
v_ashrrev_i32_e32 v56, 31, v33
v_mov_b32_e32 v55, v33
v_or_b32_e32 v33, 7, v40
v_or_b32_e32 v57, 6, v40
v_or_b32_e32 v58, 5, v40
v_or_b32_e32 v59, 4, v40
v_or_b32_e32 v60, 3, v40
v_or_b32_e32 v61, 2, v40
v_or_b32_e32 v62, 1, v40
v_mov_b32_e32 v17, v10
v_mov_b32_e32 v16, v9
v_mov_b32_e32 v15, v8
v_mov_b32_e32 v21, v10
v_mov_b32_e32 v20, v9
v_mov_b32_e32 v19, v8
v_mov_b32_e32 v25, v10
v_mov_b32_e32 v24, v9
v_mov_b32_e32 v23, v8
v_mov_b32_e32 v29, v10
v_mov_b32_e32 v28, v9
v_mov_b32_e32 v27, v8
v_mov_b32_e32 v37, v10
v_mov_b32_e32 v36, v9
v_mov_b32_e32 v35, v8
v_mov_b32_e32 v45, v10
v_mov_b32_e32 v44, v9
v_mov_b32_e32 v43, v8
BB4_7: ; =>This Loop Header: Depth=1
; Child Loop BB4_47 Depth 2
; Child Loop BB4_87 Depth 2
; Child Loop BB4_127 Depth 2
; Child Loop BB4_167 Depth 2
v_lshl_b64 v[63:64], v[55:56], 5
v_add_i32_e32 v65, vcc, s24, v63
v_mov_b32_e32 v14, s25
v_addc_u32_e32 v64, vcc, v64, v14, vcc
v_lshl_b64 v[66:67], v[41:42], 3
v_add_i32_e32 v65, vcc, v65, v66
v_addc_u32_e32 v66, vcc, v64, v67, vcc
buffer_load_dwordx2 v[63:64], v[65:66], s[28:31], 0 addr64 offset:16
s_waitcnt vmcnt(0)
v_cmp_ne_u32_e32 vcc, 0, v63
s_and_saveexec_b64 s[14:15], vcc
s_xor_b64 s[32:33], exec, s[14:15]
; mask branch BB4_171
s_cbranch_execz BB4_171
BB4_8: ; in Loop: Header=BB4_7 Depth=1
v_ashrrev_i32_e32 v66, 31, v64
v_mov_b32_e32 v65, v64
v_lshl_b64 v[64:65], v[65:66], 7
v_add_i32_e32 v66, vcc, s10, v64
v_mov_b32_e32 v14, s11
v_addc_u32_e32 v65, vcc, v65, v14, vcc
v_lshl_b64 v[67:68], v[48:49], 2
v_add_i32_e32 v66, vcc, v66, v67
v_addc_u32_e32 v67, vcc, v65, v68, vcc
buffer_load_dword v14, v[66:67], s[28:31], 0 addr64
s_and_saveexec_b64 s[14:15], s[26:27]
s_xor_b64 s[14:15], exec, s[14:15]
s_waitcnt vmcnt(0)
; mask branch BB4_10
s_cbranch_execz BB4_10
BB4_9: ; in Loop: Header=BB4_7 Depth=1
v_lshl_b64 v[64:65], v[55:56], 5
v_add_i32_e32 v66, vcc, s24, v64
v_mov_b32_e32 v18, s25
v_addc_u32_e32 v65, vcc, v65, v18, vcc
v_lshl_b64 v[67:68], v[2:3], 2
v_add_i32_e32 v66, vcc, v66, v67
v_addc_u32_e32 v67, vcc, v65, v68, vcc
buffer_load_dword v18, v[66:67], s[28:31], 0 addr64
s_mov_b32 m0, -1
s_waitcnt vmcnt(0)
ds_write_b32 v50, v18
s_waitcnt lgkmcnt(0)
BB4_10: ; %.preheader.preheader
; in Loop: Header=BB4_7 Depth=1
s_or_b64 exec, exec, s[14:15]
v_and_b32_e32 v18, 0xff, v63
v_cmp_ne_u32_e32 vcc, 0, v18
s_and_saveexec_b64 s[14:15], vcc
s_xor_b64 s[34:35], exec, s[14:15]
; mask branch BB4_50
s_cbranch_execz BB4_50
BB4_11: ; in Loop: Header=BB4_7 Depth=1
s_mov_b32 m0, -1
ds_read_b32 v18, v52
s_mov_b64 s[18:19], s[30:31]
s_mov_b64 s[22:23], s[30:31]
s_waitcnt lgkmcnt(0)
v_lshlrev_b32_e32 v22, 3, v18
v_add_i32_e32 v64, vcc, v22, v1
v_ashrrev_i32_e32 v65, 31, v64
v_lshl_b64 v[66:67], v[64:65], 4
v_lshl_b64 v[68:69], v[64:65], 3
buffer_load_dwordx4 v[73:76], v[66:67], s[16:19], 0 addr64
buffer_load_dwordx2 v[65:66], v[68:69], s[20:23], 0 addr64
v_mov_b32_e32 v67, 0
v_and_b32_e32 v22, 1, v63
v_cmp_eq_u32_e32 vcc, 1, v22
v_mov_b32_e32 v69, v67
v_mov_b32_e32 v72, v67
s_and_saveexec_b64 s[14:15], vcc
s_xor_b64 s[14:15], exec, s[14:15]
s_waitcnt vmcnt(0)
; mask branch BB4_15
s_cbranch_execz BB4_15
BB4_12: ; in Loop: Header=BB4_7 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[77:80], v53 offset1:1
v_cmp_ne_u32_e32 vcc, v40, v18
s_and_b64 s[18:19], exec, s[2:3]
s_or_b64 s[18:19], s[18:19], vcc
s_and_b64 s[22:23], exec, s[0:1]
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v22, v74, v78
s_or_b64 s[18:19], s[22:23], s[18:19]
v_subrev_f32_e32 v26, v73, v77
v_mul_f32_e32 v38, v22, v22
v_cndmask_b32_e64 v46, 0, 1.0, s[18:19]
v_subrev_f32_e32 v30, v75, v79
v_mac_f32_e32 v38, v26, v26
v_mov_b32_e32 v67, 0
v_mac_f32_e32 v38, v30, v30
v_mul_f32_e32 v46, s5, v46
v_cmp_lt_f32_e32 vcc, v38, v46
v_mov_b32_e32 v69, v67
v_mov_b32_e32 v72, v67
s_and_saveexec_b64 s[18:19], vcc
s_xor_b64 s[18:19], exec, s[18:19]
; mask branch BB4_14
s_cbranch_execz BB4_14
BB4_13: ; in Loop: Header=BB4_7 Depth=1
v_max_f32_e32 v38, 0x34cd15ae, v38
v_rsq_f32_e32 v70, v38
v_and_b32_e32 v67, 1, v14
v_cmp_eq_u32_e32 vcc, 1, v67
v_cndmask_b32_e64 v69, 0, 1.0, vcc
v_mul_f32_e32 v77, v70, v70
v_mul_f32_e32 v67, v77, v77
v_mul_f32_e32 v67, v69, v67
s_mov_b32 m0, -1
v_mul_f32_e32 v78, v77, v67
ds_read_b64 v[67:68], v54
v_mul_f32_e32 v38, v47, v38
v_mul_f32_e32 v71, v38, v38
v_mov_b32_e32 v72, 0x3a92b707
v_mov_b32_e32 v79, 0x35c55945
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v68, v66, v68
v_mul_f32_e32 v68, v78, v68
v_mad_f32 v67, v65, v67, -v68
v_madak_f32_e32 v68, v72, v71, 0x3ded3cb2
v_mov_b32_e32 v72, 0x3c739487
v_madak_f32_e32 v72, v72, v71, 0x3f01e2bc
v_mad_f32 v68, v68, v71, 1.0
v_mac_f32_e32 v68, v38, v72
v_mov_b32_e32 v72, 0xb2951928
v_madak_f32_e32 v72, v72, v71, 0xb85ffb93
v_madak_f32_e32 v79, v79, v71, 0x3a83ca0c
v_madak_f32_e32 v72, v72, v71, 0xbc9ded90
v_madak_f32_e32 v79, v79, v71, 0x3d8eaf3b
v_madak_f32_e32 v71, v72, v71, 0xbf409397
v_mac_f32_e32 v71, v38, v79
v_rcp_f32_e32 v38, v68
v_mul_f32_e32 v68, v69, v77
v_mul_f32_e32 v46, v76, v80
v_mul_f32_e32 v38, v51, v38
v_mul_f32_e32 v38, v71, v38
v_mac_f32_e32 v38, v70, v68
v_mul_f32_e32 v68, v77, v78
v_mul_f32_e32 v69, v67, v68
v_mad_f32 v70, v46, v38, -v69
v_mul_f32_e32 v38, v38, v46
v_mad_f32 v38, v68, v67, -v38
v_mad_f32 v45, v38, -v30, v45
v_mad_f32 v44, v38, -v22, v44
v_mad_f32 v43, v38, -v26, v43
v_mul_f32_e64 v72, v70, -v30
v_mul_f32_e64 v69, v70, -v22
v_mul_f32_e64 v67, v70, -v26
BB4_14: ; %Flow1186
; in Loop: Header=BB4_7 Depth=1
s_or_b64 exec, exec, s[18:19]
BB4_15: ; in Loop: Header=BB4_7 Depth=1
s_or_b64 exec, exec, s[14:15]
v_lshrrev_b32_e32 v22, 1, v63
v_and_b32_e32 v22, 1, v22
v_cmp_eq_u32_e32 vcc, 1, v22
s_and_saveexec_b64 s[14:15], vcc
s_xor_b64 s[14:15], exec, s[14:15]
; mask branch BB4_19
s_cbranch_execz BB4_19
BB4_16: ; in Loop: Header=BB4_7 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[77:80], v53 offset0:16 offset1:17
v_cmp_ne_u32_e32 vcc, v62, v18
s_and_b64 s[18:19], exec, s[2:3]
s_or_b64 s[18:19], s[18:19], vcc
s_and_b64 s[22:23], exec, s[0:1]
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v26, v74, v78
s_or_b64 s[18:19], s[22:23], s[18:19]
v_subrev_f32_e32 v22, v73, v77
v_mul_f32_e32 v38, v26, v26
v_cndmask_b32_e64 v46, 0, 1.0, s[18:19]
v_subrev_f32_e32 v30, v75, v79
v_mac_f32_e32 v38, v22, v22
v_mac_f32_e32 v38, v30, v30
v_mul_f32_e32 v46, s5, v46
v_cmp_lt_f32_e32 vcc, v38, v46
s_and_saveexec_b64 s[18:19], vcc
s_xor_b64 s[18:19], exec, s[18:19]
; mask branch BB4_18
s_cbranch_execz BB4_18
BB4_17: ; in Loop: Header=BB4_7 Depth=1
v_lshrrev_b32_e32 v68, 1, v14
v_and_b32_e32 v68, 1, v68
v_max_f32_e32 v38, 0x34cd15ae, v38
v_cmp_eq_u32_e32 vcc, 1, v68
v_rsq_f32_e32 v68, v38
v_cndmask_b32_e64 v77, 0, 1.0, vcc
s_mov_b32 m0, -1
v_mul_f32_e32 v38, v47, v38
v_mul_f32_e32 v78, v68, v68
v_mul_f32_e32 v70, v78, v78
v_mul_f32_e32 v70, v77, v70
v_mul_f32_e32 v79, v78, v70
ds_read_b64 v[70:71], v54 offset:64
v_mul_f32_e32 v46, v76, v80
v_mov_b32_e32 v80, 0x3a92b707
v_mov_b32_e32 v81, 0x3c739487
v_mov_b32_e32 v82, 0x35c55945
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v71, v66, v71
v_mul_f32_e32 v71, v79, v71
v_mad_f32 v70, v65, v70, -v71
v_mul_f32_e32 v71, v38, v38
v_madak_f32_e32 v80, v80, v71, 0x3ded3cb2
v_madak_f32_e32 v81, v81, v71, 0x3f01e2bc
v_mad_f32 v80, v80, v71, 1.0
v_mac_f32_e32 v80, v38, v81
v_mov_b32_e32 v81, 0xb2951928
v_madak_f32_e32 v81, v81, v71, 0xb85ffb93
v_madak_f32_e32 v82, v82, v71, 0x3a83ca0c
v_madak_f32_e32 v81, v81, v71, 0xbc9ded90
v_madak_f32_e32 v82, v82, v71, 0x3d8eaf3b
v_madak_f32_e32 v71, v81, v71, 0xbf409397
v_mac_f32_e32 v71, v38, v82
v_rcp_f32_e32 v38, v80
v_mul_f32_e32 v38, v51, v38
v_mul_f32_e32 v38, v71, v38
v_mul_f32_e32 v71, v77, v78
v_mac_f32_e32 v38, v68, v71
v_mul_f32_e32 v68, v78, v79
v_mul_f32_e32 v71, v70, v68
v_mad_f32 v71, v46, v38, -v71
v_mul_f32_e32 v38, v38, v46
v_mad_f32 v38, v68, v70, -v38
v_mad_f32 v72, v30, v38, v72
v_mad_f32 v69, v26, v38, v69
v_mac_f32_e32 v67, v22, v38
v_mad_f32 v37, v30, v71, v37
v_mad_f32 v36, v26, v71, v36
v_mac_f32_e32 v35, v22, v71
BB4_18: ; %Flow1185
; in Loop: Header=BB4_7 Depth=1
s_or_b64 exec, exec, s[18:19]
BB4_19: ; in Loop: Header=BB4_7 Depth=1
s_or_b64 exec, exec, s[14:15]
v_lshrrev_b32_e32 v22, 2, v63
v_and_b32_e32 v22, 1, v22
v_cmp_eq_u32_e32 vcc, 1, v22
s_and_saveexec_b64 s[14:15], vcc
s_xor_b64 s[14:15], exec, s[14:15]
; mask branch BB4_23
s_cbranch_execz BB4_23
BB4_20: ; in Loop: Header=BB4_7 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[77:80], v53 offset0:32 offset1:33
v_cmp_ne_u32_e32 vcc, v61, v18
s_and_b64 s[18:19], exec, s[2:3]
s_or_b64 s[18:19], s[18:19], vcc
s_and_b64 s[22:23], exec, s[0:1]
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v26, v74, v78
s_or_b64 s[18:19], s[22:23], s[18:19]
v_subrev_f32_e32 v22, v73, v77
v_mul_f32_e32 v38, v26, v26
v_cndmask_b32_e64 v46, 0, 1.0, s[18:19]
v_subrev_f32_e32 v30, v75, v79
v_mac_f32_e32 v38, v22, v22
v_mac_f32_e32 v38, v30, v30
v_mul_f32_e32 v46, s5, v46
v_cmp_lt_f32_e32 vcc, v38, v46
s_and_saveexec_b64 s[18:19], vcc
s_xor_b64 s[18:19], exec, s[18:19]
; mask branch BB4_22
s_cbranch_execz BB4_22
BB4_21: ; in Loop: Header=BB4_7 Depth=1
v_lshrrev_b32_e32 v68, 2, v14
v_and_b32_e32 v68, 1, v68
v_max_f32_e32 v38, 0x34cd15ae, v38
v_cmp_eq_u32_e32 vcc, 1, v68
v_rsq_f32_e32 v68, v38
v_cndmask_b32_e64 v77, 0, 1.0, vcc
s_mov_b32 m0, -1
v_mul_f32_e32 v38, v47, v38
v_mul_f32_e32 v78, v68, v68
v_mul_f32_e32 v70, v78, v78
v_mul_f32_e32 v70, v77, v70
v_mul_f32_e32 v79, v78, v70
ds_read_b64 v[70:71], v54 offset:128
v_mul_f32_e32 v46, v76, v80
v_mov_b32_e32 v80, 0x3a92b707
v_mov_b32_e32 v81, 0x3c739487
v_mov_b32_e32 v82, 0x35c55945
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v71, v66, v71
v_mul_f32_e32 v71, v79, v71
v_mad_f32 v70, v65, v70, -v71
v_mul_f32_e32 v71, v38, v38
v_madak_f32_e32 v80, v80, v71, 0x3ded3cb2
v_madak_f32_e32 v81, v81, v71, 0x3f01e2bc
v_mad_f32 v80, v80, v71, 1.0
v_mac_f32_e32 v80, v38, v81
v_mov_b32_e32 v81, 0xb2951928
v_madak_f32_e32 v81, v81, v71, 0xb85ffb93
v_madak_f32_e32 v82, v82, v71, 0x3a83ca0c
v_madak_f32_e32 v81, v81, v71, 0xbc9ded90
v_madak_f32_e32 v82, v82, v71, 0x3d8eaf3b
v_madak_f32_e32 v71, v81, v71, 0xbf409397
v_mac_f32_e32 v71, v38, v82
v_rcp_f32_e32 v38, v80
v_mul_f32_e32 v38, v51, v38
v_mul_f32_e32 v38, v71, v38
v_mul_f32_e32 v71, v77, v78
v_mac_f32_e32 v38, v68, v71
v_mul_f32_e32 v68, v78, v79
v_mul_f32_e32 v71, v70, v68
v_mad_f32 v71, v46, v38, -v71
v_mul_f32_e32 v38, v38, v46
v_mad_f32 v38, v68, v70, -v38
v_mad_f32 v72, v30, v38, v72
v_mad_f32 v69, v26, v38, v69
v_mac_f32_e32 v67, v22, v38
v_mad_f32 v29, v30, v71, v29
v_mad_f32 v28, v26, v71, v28
v_mac_f32_e32 v27, v22, v71
BB4_22: ; %Flow1184
; in Loop: Header=BB4_7 Depth=1
s_or_b64 exec, exec, s[18:19]
BB4_23: ; in Loop: Header=BB4_7 Depth=1
s_or_b64 exec, exec, s[14:15]
v_lshrrev_b32_e32 v22, 3, v63
v_and_b32_e32 v22, 1, v22
v_cmp_eq_u32_e32 vcc, 1, v22
s_and_saveexec_b64 s[14:15], vcc
s_xor_b64 s[14:15], exec, s[14:15]
; mask branch BB4_27
s_cbranch_execz BB4_27
BB4_24: ; in Loop: Header=BB4_7 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[77:80], v53 offset0:48 offset1:49
v_cmp_ne_u32_e32 vcc, v60, v18
s_and_b64 s[18:19], exec, s[2:3]
s_or_b64 s[18:19], s[18:19], vcc
s_and_b64 s[22:23], exec, s[0:1]
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v26, v74, v78
s_or_b64 s[18:19], s[22:23], s[18:19]
v_subrev_f32_e32 v22, v73, v77
v_mul_f32_e32 v38, v26, v26
v_cndmask_b32_e64 v46, 0, 1.0, s[18:19]
v_subrev_f32_e32 v30, v75, v79
v_mac_f32_e32 v38, v22, v22
v_mac_f32_e32 v38, v30, v30
v_mul_f32_e32 v46, s5, v46
v_cmp_lt_f32_e32 vcc, v38, v46
s_and_saveexec_b64 s[18:19], vcc
s_xor_b64 s[18:19], exec, s[18:19]
; mask branch BB4_26
s_cbranch_execz BB4_26
BB4_25: ; in Loop: Header=BB4_7 Depth=1
v_lshrrev_b32_e32 v68, 3, v14
v_and_b32_e32 v68, 1, v68
v_max_f32_e32 v38, 0x34cd15ae, v38
v_cmp_eq_u32_e32 vcc, 1, v68
v_rsq_f32_e32 v68, v38
v_cndmask_b32_e64 v77, 0, 1.0, vcc
s_mov_b32 m0, -1
v_mul_f32_e32 v38, v47, v38
v_mul_f32_e32 v78, v68, v68
v_mul_f32_e32 v70, v78, v78
v_mul_f32_e32 v70, v77, v70
v_mul_f32_e32 v79, v78, v70
ds_read_b64 v[70:71], v54 offset:192
v_mul_f32_e32 v46, v76, v80
v_mov_b32_e32 v80, 0x3a92b707
v_mov_b32_e32 v81, 0x3c739487
v_mov_b32_e32 v82, 0x35c55945
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v71, v66, v71
v_mul_f32_e32 v71, v79, v71
v_mad_f32 v70, v65, v70, -v71
v_mul_f32_e32 v71, v38, v38
v_madak_f32_e32 v80, v80, v71, 0x3ded3cb2
v_madak_f32_e32 v81, v81, v71, 0x3f01e2bc
v_mad_f32 v80, v80, v71, 1.0
v_mac_f32_e32 v80, v38, v81
v_mov_b32_e32 v81, 0xb2951928
v_madak_f32_e32 v81, v81, v71, 0xb85ffb93
v_madak_f32_e32 v82, v82, v71, 0x3a83ca0c
v_madak_f32_e32 v81, v81, v71, 0xbc9ded90
v_madak_f32_e32 v82, v82, v71, 0x3d8eaf3b
v_madak_f32_e32 v71, v81, v71, 0xbf409397
v_mac_f32_e32 v71, v38, v82
v_rcp_f32_e32 v38, v80
v_mul_f32_e32 v38, v51, v38
v_mul_f32_e32 v38, v71, v38
v_mul_f32_e32 v71, v77, v78
v_mac_f32_e32 v38, v68, v71
v_mul_f32_e32 v68, v78, v79
v_mul_f32_e32 v71, v70, v68
v_mad_f32 v71, v46, v38, -v71
v_mul_f32_e32 v38, v38, v46
v_mad_f32 v38, v68, v70, -v38
v_mad_f32 v72, v30, v38, v72
v_mad_f32 v69, v26, v38, v69
v_mac_f32_e32 v67, v22, v38
v_mad_f32 v25, v30, v71, v25
v_mad_f32 v24, v26, v71, v24
v_mac_f32_e32 v23, v22, v71
BB4_26: ; %Flow1183
; in Loop: Header=BB4_7 Depth=1
s_or_b64 exec, exec, s[18:19]
BB4_27: ; in Loop: Header=BB4_7 Depth=1
s_or_b64 exec, exec, s[14:15]
v_lshrrev_b32_e32 v22, 4, v63
v_and_b32_e32 v22, 1, v22
v_cmp_eq_u32_e32 vcc, 1, v22
s_and_saveexec_b64 s[14:15], vcc
s_xor_b64 s[14:15], exec, s[14:15]
; mask branch BB4_31
s_cbranch_execz BB4_31
BB4_28: ; in Loop: Header=BB4_7 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[77:80], v53 offset0:64 offset1:65
v_cmp_ne_u32_e32 vcc, v59, v18
s_and_b64 s[18:19], exec, s[2:3]
s_or_b64 s[18:19], s[18:19], vcc
s_and_b64 s[22:23], exec, s[0:1]
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v26, v74, v78
s_or_b64 s[18:19], s[22:23], s[18:19]
v_subrev_f32_e32 v22, v73, v77
v_mul_f32_e32 v38, v26, v26
v_cndmask_b32_e64 v46, 0, 1.0, s[18:19]
v_subrev_f32_e32 v30, v75, v79
v_mac_f32_e32 v38, v22, v22
v_mac_f32_e32 v38, v30, v30
v_mul_f32_e32 v46, s5, v46
v_cmp_lt_f32_e32 vcc, v38, v46
s_and_saveexec_b64 s[18:19], vcc
s_xor_b64 s[18:19], exec, s[18:19]
; mask branch BB4_30
s_cbranch_execz BB4_30
BB4_29: ; in Loop: Header=BB4_7 Depth=1
v_lshrrev_b32_e32 v68, 4, v14
v_and_b32_e32 v68, 1, v68
v_max_f32_e32 v38, 0x34cd15ae, v38
v_cmp_eq_u32_e32 vcc, 1, v68
v_rsq_f32_e32 v68, v38
v_cndmask_b32_e64 v77, 0, 1.0, vcc
s_mov_b32 m0, -1
v_mul_f32_e32 v38, v47, v38
v_mul_f32_e32 v78, v68, v68
v_mul_f32_e32 v70, v78, v78
v_mul_f32_e32 v70, v77, v70
v_mul_f32_e32 v79, v78, v70
ds_read_b64 v[70:71], v54 offset:256
v_mul_f32_e32 v46, v76, v80
v_mov_b32_e32 v80, 0x3a92b707
v_mov_b32_e32 v81, 0x3c739487
v_mov_b32_e32 v82, 0x35c55945
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v71, v66, v71
v_mul_f32_e32 v71, v79, v71
v_mad_f32 v70, v65, v70, -v71
v_mul_f32_e32 v71, v38, v38
v_madak_f32_e32 v80, v80, v71, 0x3ded3cb2
v_madak_f32_e32 v81, v81, v71, 0x3f01e2bc
v_mad_f32 v80, v80, v71, 1.0
v_mac_f32_e32 v80, v38, v81
v_mov_b32_e32 v81, 0xb2951928
v_madak_f32_e32 v81, v81, v71, 0xb85ffb93
v_madak_f32_e32 v82, v82, v71, 0x3a83ca0c
v_madak_f32_e32 v81, v81, v71, 0xbc9ded90
v_madak_f32_e32 v82, v82, v71, 0x3d8eaf3b
v_madak_f32_e32 v71, v81, v71, 0xbf409397
v_mac_f32_e32 v71, v38, v82
v_rcp_f32_e32 v38, v80
v_mul_f32_e32 v38, v51, v38
v_mul_f32_e32 v38, v71, v38
v_mul_f32_e32 v71, v77, v78
v_mac_f32_e32 v38, v68, v71
v_mul_f32_e32 v68, v78, v79
v_mul_f32_e32 v71, v70, v68
v_mad_f32 v71, v46, v38, -v71
v_mul_f32_e32 v38, v38, v46
v_mad_f32 v38, v68, v70, -v38
v_mad_f32 v72, v30, v38, v72
v_mad_f32 v69, v26, v38, v69
v_mac_f32_e32 v67, v22, v38
v_mad_f32 v21, v30, v71, v21
v_mad_f32 v20, v26, v71, v20
v_mac_f32_e32 v19, v22, v71
BB4_30: ; %Flow1182
; in Loop: Header=BB4_7 Depth=1
s_or_b64 exec, exec, s[18:19]
BB4_31: ; in Loop: Header=BB4_7 Depth=1
s_or_b64 exec, exec, s[14:15]
v_lshrrev_b32_e32 v22, 5, v63
v_and_b32_e32 v22, 1, v22
v_cmp_eq_u32_e32 vcc, 1, v22
s_and_saveexec_b64 s[14:15], vcc
s_xor_b64 s[14:15], exec, s[14:15]
; mask branch BB4_35
s_cbranch_execz BB4_35
BB4_32: ; in Loop: Header=BB4_7 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[77:80], v53 offset0:80 offset1:81
v_cmp_ne_u32_e32 vcc, v58, v18
s_and_b64 s[18:19], exec, s[2:3]
s_or_b64 s[18:19], s[18:19], vcc
s_and_b64 s[22:23], exec, s[0:1]
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v26, v74, v78
s_or_b64 s[18:19], s[22:23], s[18:19]
v_subrev_f32_e32 v22, v73, v77
v_mul_f32_e32 v38, v26, v26
v_cndmask_b32_e64 v46, 0, 1.0, s[18:19]
v_subrev_f32_e32 v30, v75, v79
v_mac_f32_e32 v38, v22, v22
v_mac_f32_e32 v38, v30, v30
v_mul_f32_e32 v46, s5, v46
v_cmp_lt_f32_e32 vcc, v38, v46
s_and_saveexec_b64 s[18:19], vcc
s_xor_b64 s[18:19], exec, s[18:19]
; mask branch BB4_34
s_cbranch_execz BB4_34
BB4_33: ; in Loop: Header=BB4_7 Depth=1
v_lshrrev_b32_e32 v68, 5, v14
v_and_b32_e32 v68, 1, v68
v_max_f32_e32 v38, 0x34cd15ae, v38
v_cmp_eq_u32_e32 vcc, 1, v68
v_rsq_f32_e32 v68, v38
v_cndmask_b32_e64 v77, 0, 1.0, vcc
s_mov_b32 m0, -1
v_mul_f32_e32 v38, v47, v38
v_mul_f32_e32 v78, v68, v68
v_mul_f32_e32 v70, v78, v78
v_mul_f32_e32 v70, v77, v70
v_mul_f32_e32 v79, v78, v70
ds_read_b64 v[70:71], v54 offset:320
v_mul_f32_e32 v46, v76, v80
v_mov_b32_e32 v80, 0x3a92b707
v_mov_b32_e32 v81, 0x3c739487
v_mov_b32_e32 v82, 0x35c55945
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v71, v66, v71
v_mul_f32_e32 v71, v79, v71
v_mad_f32 v70, v65, v70, -v71
v_mul_f32_e32 v71, v38, v38
v_madak_f32_e32 v80, v80, v71, 0x3ded3cb2
v_madak_f32_e32 v81, v81, v71, 0x3f01e2bc
v_mad_f32 v80, v80, v71, 1.0
v_mac_f32_e32 v80, v38, v81
v_mov_b32_e32 v81, 0xb2951928
v_madak_f32_e32 v81, v81, v71, 0xb85ffb93
v_madak_f32_e32 v82, v82, v71, 0x3a83ca0c
v_madak_f32_e32 v81, v81, v71, 0xbc9ded90
v_madak_f32_e32 v82, v82, v71, 0x3d8eaf3b
v_madak_f32_e32 v71, v81, v71, 0xbf409397
v_mac_f32_e32 v71, v38, v82
v_rcp_f32_e32 v38, v80
v_mul_f32_e32 v38, v51, v38
v_mul_f32_e32 v38, v71, v38
v_mul_f32_e32 v71, v77, v78
v_mac_f32_e32 v38, v68, v71
v_mul_f32_e32 v68, v78, v79
v_mul_f32_e32 v71, v70, v68
v_mad_f32 v71, v46, v38, -v71
v_mul_f32_e32 v38, v38, v46
v_mad_f32 v38, v68, v70, -v38
v_mad_f32 v72, v30, v38, v72
v_mad_f32 v69, v26, v38, v69
v_mac_f32_e32 v67, v22, v38
v_mad_f32 v17, v30, v71, v17
v_mad_f32 v16, v26, v71, v16
v_mac_f32_e32 v15, v22, v71
BB4_34: ; %Flow1181
; in Loop: Header=BB4_7 Depth=1
s_or_b64 exec, exec, s[18:19]
BB4_35: ; in Loop: Header=BB4_7 Depth=1
s_or_b64 exec, exec, s[14:15]
v_lshrrev_b32_e32 v22, 6, v63
v_and_b32_e32 v22, 1, v22
v_cmp_eq_u32_e32 vcc, 1, v22
s_and_saveexec_b64 s[14:15], vcc
s_xor_b64 s[14:15], exec, s[14:15]
; mask branch BB4_39
s_cbranch_execz BB4_39
BB4_36: ; in Loop: Header=BB4_7 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[77:80], v53 offset0:96 offset1:97
v_cmp_ne_u32_e32 vcc, v57, v18
s_and_b64 s[18:19], exec, s[2:3]
s_or_b64 s[18:19], s[18:19], vcc
s_and_b64 s[22:23], exec, s[0:1]
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v26, v74, v78
s_or_b64 s[18:19], s[22:23], s[18:19]
v_subrev_f32_e32 v22, v73, v77
v_mul_f32_e32 v38, v26, v26
v_cndmask_b32_e64 v46, 0, 1.0, s[18:19]
v_subrev_f32_e32 v30, v75, v79
v_mac_f32_e32 v38, v22, v22
v_mac_f32_e32 v38, v30, v30
v_mul_f32_e32 v46, s5, v46
v_cmp_lt_f32_e32 vcc, v38, v46
s_and_saveexec_b64 s[18:19], vcc
s_xor_b64 s[18:19], exec, s[18:19]
; mask branch BB4_38
s_cbranch_execz BB4_38
BB4_37: ; in Loop: Header=BB4_7 Depth=1
v_lshrrev_b32_e32 v68, 6, v14
v_and_b32_e32 v68, 1, v68
v_max_f32_e32 v38, 0x34cd15ae, v38
v_cmp_eq_u32_e32 vcc, 1, v68
v_rsq_f32_e32 v68, v38
v_cndmask_b32_e64 v77, 0, 1.0, vcc
s_mov_b32 m0, -1
v_mul_f32_e32 v38, v47, v38
v_mul_f32_e32 v78, v68, v68
v_mul_f32_e32 v70, v78, v78
v_mul_f32_e32 v70, v77, v70
v_mul_f32_e32 v79, v78, v70
ds_read_b64 v[70:71], v54 offset:384
v_mul_f32_e32 v46, v76, v80
v_mov_b32_e32 v80, 0x3a92b707
v_mov_b32_e32 v81, 0x3c739487
v_mov_b32_e32 v82, 0x35c55945
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v71, v66, v71
v_mul_f32_e32 v71, v79, v71
v_mad_f32 v70, v65, v70, -v71
v_mul_f32_e32 v71, v38, v38
v_madak_f32_e32 v80, v80, v71, 0x3ded3cb2
v_madak_f32_e32 v81, v81, v71, 0x3f01e2bc
v_mad_f32 v80, v80, v71, 1.0
v_mac_f32_e32 v80, v38, v81
v_mov_b32_e32 v81, 0xb2951928
v_madak_f32_e32 v81, v81, v71, 0xb85ffb93
v_madak_f32_e32 v82, v82, v71, 0x3a83ca0c
v_madak_f32_e32 v81, v81, v71, 0xbc9ded90
v_madak_f32_e32 v82, v82, v71, 0x3d8eaf3b
v_madak_f32_e32 v71, v81, v71, 0xbf409397
v_mac_f32_e32 v71, v38, v82
v_rcp_f32_e32 v38, v80
v_mul_f32_e32 v38, v51, v38
v_mul_f32_e32 v38, v71, v38
v_mul_f32_e32 v71, v77, v78
v_mac_f32_e32 v38, v68, v71
v_mul_f32_e32 v68, v78, v79
v_mul_f32_e32 v71, v70, v68
v_mad_f32 v71, v46, v38, -v71
v_mul_f32_e32 v38, v38, v46
v_mad_f32 v38, v68, v70, -v38
v_mad_f32 v72, v30, v38, v72
v_mad_f32 v69, v26, v38, v69
v_mac_f32_e32 v67, v22, v38
v_mad_f32 v13, v30, v71, v13
v_mad_f32 v12, v26, v71, v12
v_mac_f32_e32 v11, v22, v71
BB4_38: ; %Flow1180
; in Loop: Header=BB4_7 Depth=1
s_or_b64 exec, exec, s[18:19]
BB4_39: ; in Loop: Header=BB4_7 Depth=1
s_or_b64 exec, exec, s[14:15]
v_lshrrev_b32_e32 v22, 7, v63
v_and_b32_e32 v22, 1, v22
v_cmp_eq_u32_e32 vcc, 1, v22
s_and_saveexec_b64 s[14:15], vcc
s_xor_b64 s[14:15], exec, s[14:15]
; mask branch BB4_43
s_cbranch_execz BB4_43
BB4_40: ; in Loop: Header=BB4_7 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[77:80], v53 offset0:112 offset1:113
v_cmp_ne_u32_e32 vcc, v33, v18
s_and_b64 s[18:19], exec, s[2:3]
s_or_b64 s[18:19], s[18:19], vcc
s_and_b64 s[22:23], exec, s[0:1]
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v22, v74, v78
s_or_b64 s[18:19], s[22:23], s[18:19]
v_subrev_f32_e32 v18, v73, v77
v_mul_f32_e32 v30, v22, v22
v_cndmask_b32_e64 v38, 0, 1.0, s[18:19]
v_subrev_f32_e32 v26, v75, v79
v_mac_f32_e32 v30, v18, v18
v_mac_f32_e32 v30, v26, v26
v_mul_f32_e32 v38, s5, v38
v_cmp_lt_f32_e32 vcc, v30, v38
s_and_saveexec_b64 s[18:19], vcc
s_xor_b64 s[18:19], exec, s[18:19]
; mask branch BB4_42
s_cbranch_execz BB4_42
BB4_41: ; in Loop: Header=BB4_7 Depth=1
v_lshrrev_b32_e32 v46, 7, v14
v_and_b32_e32 v46, 1, v46
v_max_f32_e32 v30, 0x34cd15ae, v30
v_cmp_eq_u32_e32 vcc, 1, v46
v_rsq_f32_e32 v46, v30
v_cndmask_b32_e64 v68, 0, 1.0, vcc
s_mov_b32 m0, -1
v_mul_f32_e32 v30, v47, v30
v_mul_f32_e32 v73, v46, v46
v_mul_f32_e32 v70, v73, v73
v_mul_f32_e32 v70, v68, v70
v_mul_f32_e32 v74, v73, v70
ds_read_b64 v[70:71], v54 offset:448
v_mov_b32_e32 v75, 0x35c55945
v_mul_f32_e32 v38, v76, v80
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v66, v66, v71
v_mul_f32_e32 v66, v74, v66
v_mad_f32 v65, v65, v70, -v66
v_mul_f32_e32 v66, v30, v30
v_mov_b32_e32 v70, 0x3a92b707
v_madak_f32_e32 v70, v70, v66, 0x3ded3cb2
v_mov_b32_e32 v71, 0x3c739487
v_madak_f32_e32 v71, v71, v66, 0x3f01e2bc
v_mad_f32 v70, v70, v66, 1.0
v_mac_f32_e32 v70, v30, v71
v_mov_b32_e32 v71, 0xb2951928
v_madak_f32_e32 v71, v71, v66, 0xb85ffb93
v_madak_f32_e32 v75, v75, v66, 0x3a83ca0c
v_madak_f32_e32 v71, v71, v66, 0xbc9ded90
v_madak_f32_e32 v75, v75, v66, 0x3d8eaf3b
v_madak_f32_e32 v66, v71, v66, 0xbf409397
v_mac_f32_e32 v66, v30, v75
v_rcp_f32_e32 v30, v70
v_mul_f32_e32 v30, v51, v30
v_mul_f32_e32 v30, v66, v30
v_mul_f32_e32 v66, v68, v73
v_mac_f32_e32 v30, v46, v66
v_mul_f32_e32 v46, v73, v74
v_mul_f32_e32 v66, v65, v46
v_mad_f32 v66, v38, v30, -v66
v_mul_f32_e32 v30, v30, v38
v_mad_f32 v30, v46, v65, -v30
v_mad_f32 v72, v26, v30, v72
v_mad_f32 v69, v22, v30, v69
v_mac_f32_e32 v67, v18, v30
v_mad_f32 v10, v26, v66, v10
v_mad_f32 v9, v22, v66, v9
v_mac_f32_e32 v8, v18, v66
BB4_42: ; %Flow1179
; in Loop: Header=BB4_7 Depth=1
s_or_b64 exec, exec, s[18:19]
BB4_43: ; in Loop: Header=BB4_7 Depth=1
s_or_b64 exec, exec, s[14:15]
s_mov_b32 m0, -1
v_cmp_gt_i32_e32 vcc, 3, v2
ds_write_b32 v5, v67
ds_write_b32 v6, v69
ds_write_b32 v7, v72
s_and_saveexec_b64 s[14:15], vcc
s_xor_b64 s[18:19], exec, s[14:15]
s_waitcnt lgkmcnt(0)
; mask branch BB4_49
s_cbranch_execz BB4_49
BB4_44: ; in Loop: Header=BB4_7 Depth=1
v_lshlrev_b32_e32 v22, 6, v2
v_add_i32_e32 v18, vcc, v0, v22
v_lshlrev_b32_e32 v18, 2, v18
v_add_i32_e32 v26, vcc, s4, v18
s_mov_b32 m0, -1
ds_read_b32 v18, v26
v_add_i32_e32 v30, vcc, 8, v0
v_or_b32_e32 v38, 1, v0
v_cmp_lt_i32_e32 vcc, v38, v30
s_and_saveexec_b64 s[14:15], vcc
s_xor_b64 s[14:15], exec, s[14:15]
s_waitcnt lgkmcnt(0)
; mask branch BB4_46
s_cbranch_execz BB4_46
BB4_45: ; in Loop: Header=BB4_7 Depth=1
s_mov_b32 m0, -1
ds_read2_b32 v[65:66], v26 offset0:1 offset1:2
v_or_b32_e32 v30, 3, v0
v_add_i32_e32 v22, vcc, v30, v22
v_lshlrev_b32_e32 v22, 2, v22
ds_read2_b32 v[67:68], v26 offset0:3 offset1:4
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v18, v18, v65
v_add_i32_e32 v22, vcc, s4, v22
v_add_f32_e32 v18, v66, v18
ds_read2_b32 v[65:66], v22 offset0:2 offset1:3
ds_read_b32 v26, v26 offset:28
v_add_f32_e32 v18, v67, v18
v_add_f32_e32 v18, v68, v18
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v18, v65, v18
v_add_f32_e32 v18, v66, v18
v_add_f32_e32 v18, v26, v18
BB4_46: ; %._crit_edge.i118
; in Loop: Header=BB4_7 Depth=1
s_or_b64 exec, exec, s[14:15]
v_mul_lo_i32 v22, v64, 3
v_mov_b32_e32 v26, s13
s_mov_b64 s[14:15], s[30:31]
v_add_i32_e32 v64, vcc, v22, v2
v_ashrrev_i32_e32 v65, 31, v64
v_lshl_b64 v[66:67], v[64:65], 2
v_add_i32_e32 v64, vcc, s12, v66
v_addc_u32_e32 v65, vcc, v67, v26, vcc
buffer_load_dword v67, v[66:67], s[12:15], 0 addr64
s_mov_b64 s[14:15], 0
s_waitcnt vmcnt(0)
BB4_47: ; Parent Loop BB4_7 Depth=1
; => This Inner Loop Header: Depth=2
v_add_f32_e32 v66, v18, v67
v_mov_b32_e32 v69, v67
v_mov_b32_e32 v68, v66
buffer_atomic_cmpswap v[68:69], v[64:65], s[28:31], 0 addr64 glc
v_mov_b32_e32 v22, -1
v_mov_b32_e32 v22, 0xf000
s_waitcnt vmcnt(0) expcnt(0)
v_cmp_eq_u32_e32 vcc, v68, v67
s_or_b64 s[14:15], vcc, s[14:15]
v_mov_b32_e32 v67, v68
s_andn2_b64 exec, exec, s[14:15]
s_cbranch_execnz BB4_47
; BB#48: ; %Flow1177
; in Loop: Header=BB4_7 Depth=1
s_or_b64 exec, exec, s[14:15]
BB4_49: ; %Flow1178
; in Loop: Header=BB4_7 Depth=1
s_or_b64 exec, exec, s[18:19]
BB4_50: ; %Flow1187
; in Loop: Header=BB4_7 Depth=1
s_or_b64 exec, exec, s[34:35]
v_and_b32_e32 v18, 0xff00, v63
v_cmp_ne_u32_e32 vcc, 0, v18
s_and_saveexec_b64 s[14:15], vcc
s_xor_b64 s[14:15], exec, s[14:15]
; mask branch BB4_90
s_cbranch_execz BB4_90
BB4_51: ; in Loop: Header=BB4_7 Depth=1
s_mov_b32 m0, -1
ds_read_b32 v18, v52 offset:4
s_mov_b64 s[36:37], s[16:17]
s_mov_b64 s[38:39], s[30:31]
s_waitcnt lgkmcnt(0)
v_lshlrev_b32_e32 v22, 3, v18
v_add_i32_e32 v64, vcc, v22, v1
v_ashrrev_i32_e32 v65, 31, v64
v_lshl_b64 v[66:67], v[64:65], 4
buffer_load_dwordx4 v[67:70], v[66:67], s[36:39], 0 addr64
v_lshl_b64 v[71:72], v[64:65], 3
s_mov_b64 s[36:37], s[20:21]
buffer_load_dwordx2 v[65:66], v[71:72], s[36:39], 0 addr64
v_lshrrev_b32_e32 v22, 8, v63
v_mov_b32_e32 v71, 0
v_and_b32_e32 v22, 1, v22
v_cmp_eq_u32_e32 vcc, 1, v22
v_mov_b32_e32 v73, v71
v_mov_b32_e32 v76, v71
s_and_saveexec_b64 s[18:19], vcc
s_xor_b64 s[18:19], exec, s[18:19]
s_waitcnt vmcnt(0)
; mask branch BB4_55
s_cbranch_execz BB4_55
BB4_52: ; in Loop: Header=BB4_7 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[77:80], v53 offset1:1
v_cmp_ne_u32_e32 vcc, v40, v18
s_and_b64 s[22:23], exec, s[2:3]
s_or_b64 s[22:23], s[22:23], vcc
s_and_b64 s[34:35], exec, s[0:1]
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v22, v68, v78
s_or_b64 s[22:23], s[34:35], s[22:23]
v_subrev_f32_e32 v26, v67, v77
v_mul_f32_e32 v38, v22, v22
v_cndmask_b32_e64 v46, 0, 1.0, s[22:23]
v_subrev_f32_e32 v30, v69, v79
v_mac_f32_e32 v38, v26, v26
v_mov_b32_e32 v71, 0
v_mac_f32_e32 v38, v30, v30
v_mul_f32_e32 v46, s5, v46
v_cmp_lt_f32_e32 vcc, v38, v46
v_mov_b32_e32 v73, v71
v_mov_b32_e32 v76, v71
s_and_saveexec_b64 s[22:23], vcc
s_xor_b64 s[22:23], exec, s[22:23]
; mask branch BB4_54
s_cbranch_execz BB4_54
BB4_53: ; in Loop: Header=BB4_7 Depth=1
v_max_f32_e32 v38, 0x34cd15ae, v38
v_rsq_f32_e32 v74, v38
v_lshrrev_b32_e32 v71, 8, v14
v_and_b32_e32 v71, 1, v71
v_cmp_eq_u32_e32 vcc, 1, v71
v_mul_f32_e32 v77, v74, v74
v_cndmask_b32_e64 v73, 0, 1.0, vcc
v_mul_f32_e32 v71, v77, v77
v_mul_f32_e32 v71, v73, v71
s_mov_b32 m0, -1
v_mul_f32_e32 v78, v77, v71
ds_read_b64 v[71:72], v54
v_mul_f32_e32 v38, v47, v38
v_mul_f32_e32 v75, v38, v38
v_mov_b32_e32 v76, 0x3a92b707
v_mov_b32_e32 v79, 0x35c55945
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v72, v66, v72
v_mul_f32_e32 v72, v78, v72
v_mad_f32 v71, v65, v71, -v72
v_madak_f32_e32 v72, v76, v75, 0x3ded3cb2
v_mov_b32_e32 v76, 0x3c739487
v_madak_f32_e32 v76, v76, v75, 0x3f01e2bc
v_mad_f32 v72, v72, v75, 1.0
v_mac_f32_e32 v72, v38, v76
v_mov_b32_e32 v76, 0xb2951928
v_madak_f32_e32 v76, v76, v75, 0xb85ffb93
v_madak_f32_e32 v79, v79, v75, 0x3a83ca0c
v_madak_f32_e32 v76, v76, v75, 0xbc9ded90
v_madak_f32_e32 v79, v79, v75, 0x3d8eaf3b
v_madak_f32_e32 v75, v76, v75, 0xbf409397
v_mac_f32_e32 v75, v38, v79
v_rcp_f32_e32 v38, v72
v_mul_f32_e32 v72, v73, v77
v_mul_f32_e32 v46, v70, v80
v_mul_f32_e32 v38, v51, v38
v_mul_f32_e32 v38, v75, v38
v_mac_f32_e32 v38, v74, v72
v_mul_f32_e32 v72, v77, v78
v_mul_f32_e32 v73, v71, v72
v_mad_f32 v74, v46, v38, -v73
v_mul_f32_e32 v38, v38, v46
v_mad_f32 v38, v72, v71, -v38
v_mad_f32 v45, v38, -v30, v45
v_mad_f32 v44, v38, -v22, v44
v_mad_f32 v43, v38, -v26, v43
v_mul_f32_e64 v76, v74, -v30
v_mul_f32_e64 v73, v74, -v22
v_mul_f32_e64 v71, v74, -v26
BB4_54: ; %Flow1175
; in Loop: Header=BB4_7 Depth=1
s_or_b64 exec, exec, s[22:23]
BB4_55: ; in Loop: Header=BB4_7 Depth=1
s_or_b64 exec, exec, s[18:19]
v_lshrrev_b32_e32 v22, 9, v63
v_and_b32_e32 v22, 1, v22
v_cmp_eq_u32_e32 vcc, 1, v22
s_and_saveexec_b64 s[18:19], vcc
s_xor_b64 s[18:19], exec, s[18:19]
; mask branch BB4_59
s_cbranch_execz BB4_59
BB4_56: ; in Loop: Header=BB4_7 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[77:80], v53 offset0:16 offset1:17
v_cmp_ne_u32_e32 vcc, v62, v18
s_and_b64 s[22:23], exec, s[2:3]
s_or_b64 s[22:23], s[22:23], vcc
s_and_b64 s[34:35], exec, s[0:1]
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v26, v68, v78
s_or_b64 s[22:23], s[34:35], s[22:23]
v_subrev_f32_e32 v22, v67, v77
v_mul_f32_e32 v38, v26, v26
v_cndmask_b32_e64 v46, 0, 1.0, s[22:23]
v_subrev_f32_e32 v30, v69, v79
v_mac_f32_e32 v38, v22, v22
v_mac_f32_e32 v38, v30, v30
v_mul_f32_e32 v46, s5, v46
v_cmp_lt_f32_e32 vcc, v38, v46
s_and_saveexec_b64 s[22:23], vcc
s_xor_b64 s[22:23], exec, s[22:23]
; mask branch BB4_58
s_cbranch_execz BB4_58
BB4_57: ; in Loop: Header=BB4_7 Depth=1
v_lshrrev_b32_e32 v72, 9, v14
v_and_b32_e32 v72, 1, v72
v_max_f32_e32 v38, 0x34cd15ae, v38
v_cmp_eq_u32_e32 vcc, 1, v72
v_rsq_f32_e32 v72, v38
v_cndmask_b32_e64 v77, 0, 1.0, vcc
s_mov_b32 m0, -1
v_mul_f32_e32 v38, v47, v38
v_mul_f32_e32 v78, v72, v72
v_mul_f32_e32 v74, v78, v78
v_mul_f32_e32 v74, v77, v74
v_mul_f32_e32 v79, v78, v74
ds_read_b64 v[74:75], v54 offset:64
v_mul_f32_e32 v46, v70, v80
v_mov_b32_e32 v80, 0x3a92b707
v_mov_b32_e32 v81, 0x3c739487
v_mov_b32_e32 v82, 0x35c55945
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v75, v66, v75
v_mul_f32_e32 v75, v79, v75
v_mad_f32 v74, v65, v74, -v75
v_mul_f32_e32 v75, v38, v38
v_madak_f32_e32 v80, v80, v75, 0x3ded3cb2
v_madak_f32_e32 v81, v81, v75, 0x3f01e2bc
v_mad_f32 v80, v80, v75, 1.0
v_mac_f32_e32 v80, v38, v81
v_mov_b32_e32 v81, 0xb2951928
v_madak_f32_e32 v81, v81, v75, 0xb85ffb93
v_madak_f32_e32 v82, v82, v75, 0x3a83ca0c
v_madak_f32_e32 v81, v81, v75, 0xbc9ded90
v_madak_f32_e32 v82, v82, v75, 0x3d8eaf3b
v_madak_f32_e32 v75, v81, v75, 0xbf409397
v_mac_f32_e32 v75, v38, v82
v_rcp_f32_e32 v38, v80
v_mul_f32_e32 v38, v51, v38
v_mul_f32_e32 v38, v75, v38
v_mul_f32_e32 v75, v77, v78
v_mac_f32_e32 v38, v72, v75
v_mul_f32_e32 v72, v78, v79
v_mul_f32_e32 v75, v74, v72
v_mad_f32 v75, v46, v38, -v75
v_mul_f32_e32 v38, v38, v46
v_mad_f32 v38, v72, v74, -v38
v_mad_f32 v76, v30, v38, v76
v_mad_f32 v73, v26, v38, v73
v_mac_f32_e32 v71, v22, v38
v_mad_f32 v37, v30, v75, v37
v_mad_f32 v36, v26, v75, v36
v_mac_f32_e32 v35, v22, v75
BB4_58: ; %Flow1174
; in Loop: Header=BB4_7 Depth=1
s_or_b64 exec, exec, s[22:23]
BB4_59: ; in Loop: Header=BB4_7 Depth=1
s_or_b64 exec, exec, s[18:19]
v_lshrrev_b32_e32 v22, 10, v63
v_and_b32_e32 v22, 1, v22
v_cmp_eq_u32_e32 vcc, 1, v22
s_and_saveexec_b64 s[18:19], vcc
s_xor_b64 s[18:19], exec, s[18:19]
; mask branch BB4_63
s_cbranch_execz BB4_63
BB4_60: ; in Loop: Header=BB4_7 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[77:80], v53 offset0:32 offset1:33
v_cmp_ne_u32_e32 vcc, v61, v18
s_and_b64 s[22:23], exec, s[2:3]
s_or_b64 s[22:23], s[22:23], vcc
s_and_b64 s[34:35], exec, s[0:1]
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v26, v68, v78
s_or_b64 s[22:23], s[34:35], s[22:23]
v_subrev_f32_e32 v22, v67, v77
v_mul_f32_e32 v38, v26, v26
v_cndmask_b32_e64 v46, 0, 1.0, s[22:23]
v_subrev_f32_e32 v30, v69, v79
v_mac_f32_e32 v38, v22, v22
v_mac_f32_e32 v38, v30, v30
v_mul_f32_e32 v46, s5, v46
v_cmp_lt_f32_e32 vcc, v38, v46
s_and_saveexec_b64 s[22:23], vcc
s_xor_b64 s[22:23], exec, s[22:23]
; mask branch BB4_62
s_cbranch_execz BB4_62
BB4_61: ; in Loop: Header=BB4_7 Depth=1
v_lshrrev_b32_e32 v72, 10, v14
v_and_b32_e32 v72, 1, v72
v_max_f32_e32 v38, 0x34cd15ae, v38
v_cmp_eq_u32_e32 vcc, 1, v72
v_rsq_f32_e32 v72, v38
v_cndmask_b32_e64 v77, 0, 1.0, vcc
s_mov_b32 m0, -1
v_mul_f32_e32 v38, v47, v38
v_mul_f32_e32 v78, v72, v72
v_mul_f32_e32 v74, v78, v78
v_mul_f32_e32 v74, v77, v74
v_mul_f32_e32 v79, v78, v74
ds_read_b64 v[74:75], v54 offset:128
v_mul_f32_e32 v46, v70, v80
v_mov_b32_e32 v80, 0x3a92b707
v_mov_b32_e32 v81, 0x3c739487
v_mov_b32_e32 v82, 0x35c55945
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v75, v66, v75
v_mul_f32_e32 v75, v79, v75
v_mad_f32 v74, v65, v74, -v75
v_mul_f32_e32 v75, v38, v38
v_madak_f32_e32 v80, v80, v75, 0x3ded3cb2
v_madak_f32_e32 v81, v81, v75, 0x3f01e2bc
v_mad_f32 v80, v80, v75, 1.0
v_mac_f32_e32 v80, v38, v81
v_mov_b32_e32 v81, 0xb2951928
v_madak_f32_e32 v81, v81, v75, 0xb85ffb93
v_madak_f32_e32 v82, v82, v75, 0x3a83ca0c
v_madak_f32_e32 v81, v81, v75, 0xbc9ded90
v_madak_f32_e32 v82, v82, v75, 0x3d8eaf3b
v_madak_f32_e32 v75, v81, v75, 0xbf409397
v_mac_f32_e32 v75, v38, v82
v_rcp_f32_e32 v38, v80
v_mul_f32_e32 v38, v51, v38
v_mul_f32_e32 v38, v75, v38
v_mul_f32_e32 v75, v77, v78
v_mac_f32_e32 v38, v72, v75
v_mul_f32_e32 v72, v78, v79
v_mul_f32_e32 v75, v74, v72
v_mad_f32 v75, v46, v38, -v75
v_mul_f32_e32 v38, v38, v46
v_mad_f32 v38, v72, v74, -v38
v_mad_f32 v76, v30, v38, v76
v_mad_f32 v73, v26, v38, v73
v_mac_f32_e32 v71, v22, v38
v_mad_f32 v29, v30, v75, v29
v_mad_f32 v28, v26, v75, v28
v_mac_f32_e32 v27, v22, v75
BB4_62: ; %Flow1173
; in Loop: Header=BB4_7 Depth=1
s_or_b64 exec, exec, s[22:23]
BB4_63: ; in Loop: Header=BB4_7 Depth=1
s_or_b64 exec, exec, s[18:19]
v_lshrrev_b32_e32 v22, 11, v63
v_and_b32_e32 v22, 1, v22
v_cmp_eq_u32_e32 vcc, 1, v22
s_and_saveexec_b64 s[18:19], vcc
s_xor_b64 s[18:19], exec, s[18:19]
; mask branch BB4_67
s_cbranch_execz BB4_67
BB4_64: ; in Loop: Header=BB4_7 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[77:80], v53 offset0:48 offset1:49
v_cmp_ne_u32_e32 vcc, v60, v18
s_and_b64 s[22:23], exec, s[2:3]
s_or_b64 s[22:23], s[22:23], vcc
s_and_b64 s[34:35], exec, s[0:1]
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v26, v68, v78
s_or_b64 s[22:23], s[34:35], s[22:23]
v_subrev_f32_e32 v22, v67, v77
v_mul_f32_e32 v38, v26, v26
v_cndmask_b32_e64 v46, 0, 1.0, s[22:23]
v_subrev_f32_e32 v30, v69, v79
v_mac_f32_e32 v38, v22, v22
v_mac_f32_e32 v38, v30, v30
v_mul_f32_e32 v46, s5, v46
v_cmp_lt_f32_e32 vcc, v38, v46
s_and_saveexec_b64 s[22:23], vcc
s_xor_b64 s[22:23], exec, s[22:23]
; mask branch BB4_66
s_cbranch_execz BB4_66
BB4_65: ; in Loop: Header=BB4_7 Depth=1
v_lshrrev_b32_e32 v72, 11, v14
v_and_b32_e32 v72, 1, v72
v_max_f32_e32 v38, 0x34cd15ae, v38
v_cmp_eq_u32_e32 vcc, 1, v72
v_rsq_f32_e32 v72, v38
v_cndmask_b32_e64 v77, 0, 1.0, vcc
s_mov_b32 m0, -1
v_mul_f32_e32 v38, v47, v38
v_mul_f32_e32 v78, v72, v72
v_mul_f32_e32 v74, v78, v78
v_mul_f32_e32 v74, v77, v74
v_mul_f32_e32 v79, v78, v74
ds_read_b64 v[74:75], v54 offset:192
v_mul_f32_e32 v46, v70, v80
v_mov_b32_e32 v80, 0x3a92b707
v_mov_b32_e32 v81, 0x3c739487
v_mov_b32_e32 v82, 0x35c55945
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v75, v66, v75
v_mul_f32_e32 v75, v79, v75
v_mad_f32 v74, v65, v74, -v75
v_mul_f32_e32 v75, v38, v38
v_madak_f32_e32 v80, v80, v75, 0x3ded3cb2
v_madak_f32_e32 v81, v81, v75, 0x3f01e2bc
v_mad_f32 v80, v80, v75, 1.0
v_mac_f32_e32 v80, v38, v81
v_mov_b32_e32 v81, 0xb2951928
v_madak_f32_e32 v81, v81, v75, 0xb85ffb93
v_madak_f32_e32 v82, v82, v75, 0x3a83ca0c
v_madak_f32_e32 v81, v81, v75, 0xbc9ded90
v_madak_f32_e32 v82, v82, v75, 0x3d8eaf3b
v_madak_f32_e32 v75, v81, v75, 0xbf409397
v_mac_f32_e32 v75, v38, v82
v_rcp_f32_e32 v38, v80
v_mul_f32_e32 v38, v51, v38
v_mul_f32_e32 v38, v75, v38
v_mul_f32_e32 v75, v77, v78
v_mac_f32_e32 v38, v72, v75
v_mul_f32_e32 v72, v78, v79
v_mul_f32_e32 v75, v74, v72
v_mad_f32 v75, v46, v38, -v75
v_mul_f32_e32 v38, v38, v46
v_mad_f32 v38, v72, v74, -v38
v_mad_f32 v76, v30, v38, v76
v_mad_f32 v73, v26, v38, v73
v_mac_f32_e32 v71, v22, v38
v_mad_f32 v25, v30, v75, v25
v_mad_f32 v24, v26, v75, v24
v_mac_f32_e32 v23, v22, v75
BB4_66: ; %Flow1172
; in Loop: Header=BB4_7 Depth=1
s_or_b64 exec, exec, s[22:23]
BB4_67: ; in Loop: Header=BB4_7 Depth=1
s_or_b64 exec, exec, s[18:19]
v_lshrrev_b32_e32 v22, 12, v63
v_and_b32_e32 v22, 1, v22
v_cmp_eq_u32_e32 vcc, 1, v22
s_and_saveexec_b64 s[18:19], vcc
s_xor_b64 s[18:19], exec, s[18:19]
; mask branch BB4_71
s_cbranch_execz BB4_71
BB4_68: ; in Loop: Header=BB4_7 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[77:80], v53 offset0:64 offset1:65
v_cmp_ne_u32_e32 vcc, v59, v18
s_and_b64 s[22:23], exec, s[2:3]
s_or_b64 s[22:23], s[22:23], vcc
s_and_b64 s[34:35], exec, s[0:1]
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v26, v68, v78
s_or_b64 s[22:23], s[34:35], s[22:23]
v_subrev_f32_e32 v22, v67, v77
v_mul_f32_e32 v38, v26, v26
v_cndmask_b32_e64 v46, 0, 1.0, s[22:23]
v_subrev_f32_e32 v30, v69, v79
v_mac_f32_e32 v38, v22, v22
v_mac_f32_e32 v38, v30, v30
v_mul_f32_e32 v46, s5, v46
v_cmp_lt_f32_e32 vcc, v38, v46
s_and_saveexec_b64 s[22:23], vcc
s_xor_b64 s[22:23], exec, s[22:23]
; mask branch BB4_70
s_cbranch_execz BB4_70
BB4_69: ; in Loop: Header=BB4_7 Depth=1
v_lshrrev_b32_e32 v72, 12, v14
v_and_b32_e32 v72, 1, v72
v_max_f32_e32 v38, 0x34cd15ae, v38
v_cmp_eq_u32_e32 vcc, 1, v72
v_rsq_f32_e32 v72, v38
v_cndmask_b32_e64 v77, 0, 1.0, vcc
s_mov_b32 m0, -1
v_mul_f32_e32 v38, v47, v38
v_mul_f32_e32 v78, v72, v72
v_mul_f32_e32 v74, v78, v78
v_mul_f32_e32 v74, v77, v74
v_mul_f32_e32 v79, v78, v74
ds_read_b64 v[74:75], v54 offset:256
v_mul_f32_e32 v46, v70, v80
v_mov_b32_e32 v80, 0x3a92b707
v_mov_b32_e32 v81, 0x3c739487
v_mov_b32_e32 v82, 0x35c55945
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v75, v66, v75
v_mul_f32_e32 v75, v79, v75
v_mad_f32 v74, v65, v74, -v75
v_mul_f32_e32 v75, v38, v38
v_madak_f32_e32 v80, v80, v75, 0x3ded3cb2
v_madak_f32_e32 v81, v81, v75, 0x3f01e2bc
v_mad_f32 v80, v80, v75, 1.0
v_mac_f32_e32 v80, v38, v81
v_mov_b32_e32 v81, 0xb2951928
v_madak_f32_e32 v81, v81, v75, 0xb85ffb93
v_madak_f32_e32 v82, v82, v75, 0x3a83ca0c
v_madak_f32_e32 v81, v81, v75, 0xbc9ded90
v_madak_f32_e32 v82, v82, v75, 0x3d8eaf3b
v_madak_f32_e32 v75, v81, v75, 0xbf409397
v_mac_f32_e32 v75, v38, v82
v_rcp_f32_e32 v38, v80
v_mul_f32_e32 v38, v51, v38
v_mul_f32_e32 v38, v75, v38
v_mul_f32_e32 v75, v77, v78
v_mac_f32_e32 v38, v72, v75
v_mul_f32_e32 v72, v78, v79
v_mul_f32_e32 v75, v74, v72
v_mad_f32 v75, v46, v38, -v75
v_mul_f32_e32 v38, v38, v46
v_mad_f32 v38, v72, v74, -v38
v_mad_f32 v76, v30, v38, v76
v_mad_f32 v73, v26, v38, v73
v_mac_f32_e32 v71, v22, v38
v_mad_f32 v21, v30, v75, v21
v_mad_f32 v20, v26, v75, v20
v_mac_f32_e32 v19, v22, v75
BB4_70: ; %Flow1171
; in Loop: Header=BB4_7 Depth=1
s_or_b64 exec, exec, s[22:23]
BB4_71: ; in Loop: Header=BB4_7 Depth=1
s_or_b64 exec, exec, s[18:19]
v_lshrrev_b32_e32 v22, 13, v63
v_and_b32_e32 v22, 1, v22
v_cmp_eq_u32_e32 vcc, 1, v22
s_and_saveexec_b64 s[18:19], vcc
s_xor_b64 s[18:19], exec, s[18:19]
; mask branch BB4_75
s_cbranch_execz BB4_75
BB4_72: ; in Loop: Header=BB4_7 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[77:80], v53 offset0:80 offset1:81
v_cmp_ne_u32_e32 vcc, v58, v18
s_and_b64 s[22:23], exec, s[2:3]
s_or_b64 s[22:23], s[22:23], vcc
s_and_b64 s[34:35], exec, s[0:1]
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v26, v68, v78
s_or_b64 s[22:23], s[34:35], s[22:23]
v_subrev_f32_e32 v22, v67, v77
v_mul_f32_e32 v38, v26, v26
v_cndmask_b32_e64 v46, 0, 1.0, s[22:23]
v_subrev_f32_e32 v30, v69, v79
v_mac_f32_e32 v38, v22, v22
v_mac_f32_e32 v38, v30, v30
v_mul_f32_e32 v46, s5, v46
v_cmp_lt_f32_e32 vcc, v38, v46
s_and_saveexec_b64 s[22:23], vcc
s_xor_b64 s[22:23], exec, s[22:23]
; mask branch BB4_74
s_cbranch_execz BB4_74
BB4_73: ; in Loop: Header=BB4_7 Depth=1
v_lshrrev_b32_e32 v72, 13, v14
v_and_b32_e32 v72, 1, v72
v_max_f32_e32 v38, 0x34cd15ae, v38
v_cmp_eq_u32_e32 vcc, 1, v72
v_rsq_f32_e32 v72, v38
v_cndmask_b32_e64 v77, 0, 1.0, vcc
s_mov_b32 m0, -1
v_mul_f32_e32 v38, v47, v38
v_mul_f32_e32 v78, v72, v72
v_mul_f32_e32 v74, v78, v78
v_mul_f32_e32 v74, v77, v74
v_mul_f32_e32 v79, v78, v74
ds_read_b64 v[74:75], v54 offset:320
v_mul_f32_e32 v46, v70, v80
v_mov_b32_e32 v80, 0x3a92b707
v_mov_b32_e32 v81, 0x3c739487
v_mov_b32_e32 v82, 0x35c55945
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v75, v66, v75
v_mul_f32_e32 v75, v79, v75
v_mad_f32 v74, v65, v74, -v75
v_mul_f32_e32 v75, v38, v38
v_madak_f32_e32 v80, v80, v75, 0x3ded3cb2
v_madak_f32_e32 v81, v81, v75, 0x3f01e2bc
v_mad_f32 v80, v80, v75, 1.0
v_mac_f32_e32 v80, v38, v81
v_mov_b32_e32 v81, 0xb2951928
v_madak_f32_e32 v81, v81, v75, 0xb85ffb93
v_madak_f32_e32 v82, v82, v75, 0x3a83ca0c
v_madak_f32_e32 v81, v81, v75, 0xbc9ded90
v_madak_f32_e32 v82, v82, v75, 0x3d8eaf3b
v_madak_f32_e32 v75, v81, v75, 0xbf409397
v_mac_f32_e32 v75, v38, v82
v_rcp_f32_e32 v38, v80
v_mul_f32_e32 v38, v51, v38
v_mul_f32_e32 v38, v75, v38
v_mul_f32_e32 v75, v77, v78
v_mac_f32_e32 v38, v72, v75
v_mul_f32_e32 v72, v78, v79
v_mul_f32_e32 v75, v74, v72
v_mad_f32 v75, v46, v38, -v75
v_mul_f32_e32 v38, v38, v46
v_mad_f32 v38, v72, v74, -v38
v_mad_f32 v76, v30, v38, v76
v_mad_f32 v73, v26, v38, v73
v_mac_f32_e32 v71, v22, v38
v_mad_f32 v17, v30, v75, v17
v_mad_f32 v16, v26, v75, v16
v_mac_f32_e32 v15, v22, v75
BB4_74: ; %Flow1170
; in Loop: Header=BB4_7 Depth=1
s_or_b64 exec, exec, s[22:23]
BB4_75: ; in Loop: Header=BB4_7 Depth=1
s_or_b64 exec, exec, s[18:19]
v_lshrrev_b32_e32 v22, 14, v63
v_and_b32_e32 v22, 1, v22
v_cmp_eq_u32_e32 vcc, 1, v22
s_and_saveexec_b64 s[18:19], vcc
s_xor_b64 s[18:19], exec, s[18:19]
; mask branch BB4_79
s_cbranch_execz BB4_79
BB4_76: ; in Loop: Header=BB4_7 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[77:80], v53 offset0:96 offset1:97
v_cmp_ne_u32_e32 vcc, v57, v18
s_and_b64 s[22:23], exec, s[2:3]
s_or_b64 s[22:23], s[22:23], vcc
s_and_b64 s[34:35], exec, s[0:1]
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v26, v68, v78
s_or_b64 s[22:23], s[34:35], s[22:23]
v_subrev_f32_e32 v22, v67, v77
v_mul_f32_e32 v38, v26, v26
v_cndmask_b32_e64 v46, 0, 1.0, s[22:23]
v_subrev_f32_e32 v30, v69, v79
v_mac_f32_e32 v38, v22, v22
v_mac_f32_e32 v38, v30, v30
v_mul_f32_e32 v46, s5, v46
v_cmp_lt_f32_e32 vcc, v38, v46
s_and_saveexec_b64 s[22:23], vcc
s_xor_b64 s[22:23], exec, s[22:23]
; mask branch BB4_78
s_cbranch_execz BB4_78
BB4_77: ; in Loop: Header=BB4_7 Depth=1
v_lshrrev_b32_e32 v72, 14, v14
v_and_b32_e32 v72, 1, v72
v_max_f32_e32 v38, 0x34cd15ae, v38
v_cmp_eq_u32_e32 vcc, 1, v72
v_rsq_f32_e32 v72, v38
v_cndmask_b32_e64 v77, 0, 1.0, vcc
s_mov_b32 m0, -1
v_mul_f32_e32 v38, v47, v38
v_mul_f32_e32 v78, v72, v72
v_mul_f32_e32 v74, v78, v78
v_mul_f32_e32 v74, v77, v74
v_mul_f32_e32 v79, v78, v74
ds_read_b64 v[74:75], v54 offset:384
v_mul_f32_e32 v46, v70, v80
v_mov_b32_e32 v80, 0x3a92b707
v_mov_b32_e32 v81, 0x3c739487
v_mov_b32_e32 v82, 0x35c55945
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v75, v66, v75
v_mul_f32_e32 v75, v79, v75
v_mad_f32 v74, v65, v74, -v75
v_mul_f32_e32 v75, v38, v38
v_madak_f32_e32 v80, v80, v75, 0x3ded3cb2
v_madak_f32_e32 v81, v81, v75, 0x3f01e2bc
v_mad_f32 v80, v80, v75, 1.0
v_mac_f32_e32 v80, v38, v81
v_mov_b32_e32 v81, 0xb2951928
v_madak_f32_e32 v81, v81, v75, 0xb85ffb93
v_madak_f32_e32 v82, v82, v75, 0x3a83ca0c
v_madak_f32_e32 v81, v81, v75, 0xbc9ded90
v_madak_f32_e32 v82, v82, v75, 0x3d8eaf3b
v_madak_f32_e32 v75, v81, v75, 0xbf409397
v_mac_f32_e32 v75, v38, v82
v_rcp_f32_e32 v38, v80
v_mul_f32_e32 v38, v51, v38
v_mul_f32_e32 v38, v75, v38
v_mul_f32_e32 v75, v77, v78
v_mac_f32_e32 v38, v72, v75
v_mul_f32_e32 v72, v78, v79
v_mul_f32_e32 v75, v74, v72
v_mad_f32 v75, v46, v38, -v75
v_mul_f32_e32 v38, v38, v46
v_mad_f32 v38, v72, v74, -v38
v_mad_f32 v76, v30, v38, v76
v_mad_f32 v73, v26, v38, v73
v_mac_f32_e32 v71, v22, v38
v_mad_f32 v13, v30, v75, v13
v_mad_f32 v12, v26, v75, v12
v_mac_f32_e32 v11, v22, v75
BB4_78: ; %Flow1169
; in Loop: Header=BB4_7 Depth=1
s_or_b64 exec, exec, s[22:23]
BB4_79: ; in Loop: Header=BB4_7 Depth=1
s_or_b64 exec, exec, s[18:19]
v_lshrrev_b32_e32 v22, 15, v63
v_and_b32_e32 v22, 1, v22
v_cmp_eq_u32_e32 vcc, 1, v22
s_and_saveexec_b64 s[18:19], vcc
s_xor_b64 s[18:19], exec, s[18:19]
; mask branch BB4_83
s_cbranch_execz BB4_83
BB4_80: ; in Loop: Header=BB4_7 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[77:80], v53 offset0:112 offset1:113
v_cmp_ne_u32_e32 vcc, v33, v18
s_and_b64 s[22:23], exec, s[2:3]
s_or_b64 s[22:23], s[22:23], vcc
s_and_b64 s[34:35], exec, s[0:1]
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v22, v68, v78
s_or_b64 s[22:23], s[34:35], s[22:23]
v_subrev_f32_e32 v18, v67, v77
v_mul_f32_e32 v30, v22, v22
v_cndmask_b32_e64 v38, 0, 1.0, s[22:23]
v_subrev_f32_e32 v26, v69, v79
v_mac_f32_e32 v30, v18, v18
v_mac_f32_e32 v30, v26, v26
v_mul_f32_e32 v38, s5, v38
v_cmp_lt_f32_e32 vcc, v30, v38
s_and_saveexec_b64 s[22:23], vcc
s_xor_b64 s[22:23], exec, s[22:23]
; mask branch BB4_82
s_cbranch_execz BB4_82
BB4_81: ; in Loop: Header=BB4_7 Depth=1
v_lshrrev_b32_e32 v46, 15, v14
v_and_b32_e32 v46, 1, v46
v_max_f32_e32 v30, 0x34cd15ae, v30
v_cmp_eq_u32_e32 vcc, 1, v46
v_rsq_f32_e32 v46, v30
v_mul_f32_e32 v38, v70, v80
v_cndmask_b32_e64 v69, 0, 1.0, vcc
s_mov_b32 m0, -1
v_mul_f32_e32 v70, v46, v46
v_mul_f32_e32 v67, v70, v70
v_mul_f32_e32 v67, v69, v67
v_mul_f32_e32 v72, v70, v67
ds_read_b64 v[67:68], v54 offset:448
v_mul_f32_e32 v30, v47, v30
v_mov_b32_e32 v74, 0x35c55945
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v66, v66, v68
v_mul_f32_e32 v66, v72, v66
v_mad_f32 v65, v65, v67, -v66
v_mul_f32_e32 v66, v30, v30
v_mov_b32_e32 v67, 0x3a92b707
v_madak_f32_e32 v67, v67, v66, 0x3ded3cb2
v_mov_b32_e32 v68, 0x3c739487
v_madak_f32_e32 v68, v68, v66, 0x3f01e2bc
v_mad_f32 v67, v67, v66, 1.0
v_mac_f32_e32 v67, v30, v68
v_mov_b32_e32 v68, 0xb2951928
v_madak_f32_e32 v68, v68, v66, 0xb85ffb93
v_madak_f32_e32 v74, v74, v66, 0x3a83ca0c
v_madak_f32_e32 v68, v68, v66, 0xbc9ded90
v_madak_f32_e32 v74, v74, v66, 0x3d8eaf3b
v_madak_f32_e32 v66, v68, v66, 0xbf409397
v_mac_f32_e32 v66, v30, v74
v_rcp_f32_e32 v30, v67
v_mul_f32_e32 v30, v51, v30
v_mul_f32_e32 v30, v66, v30
v_mul_f32_e32 v66, v69, v70
v_mac_f32_e32 v30, v46, v66
v_mul_f32_e32 v46, v70, v72
v_mul_f32_e32 v66, v65, v46
v_mad_f32 v66, v38, v30, -v66
v_mul_f32_e32 v30, v30, v38
v_mad_f32 v30, v46, v65, -v30
v_mad_f32 v76, v26, v30, v76
v_mad_f32 v73, v22, v30, v73
v_mac_f32_e32 v71, v18, v30
v_mad_f32 v10, v26, v66, v10
v_mad_f32 v9, v22, v66, v9
v_mac_f32_e32 v8, v18, v66
BB4_82: ; %Flow1168
; in Loop: Header=BB4_7 Depth=1
s_or_b64 exec, exec, s[22:23]
BB4_83: ; in Loop: Header=BB4_7 Depth=1
s_or_b64 exec, exec, s[18:19]
s_mov_b32 m0, -1
v_cmp_gt_i32_e32 vcc, 3, v2
ds_write_b32 v5, v71
ds_write_b32 v6, v73
ds_write_b32 v7, v76
s_and_saveexec_b64 s[18:19], vcc
s_xor_b64 s[18:19], exec, s[18:19]
s_waitcnt lgkmcnt(0)
; mask branch BB4_89
s_cbranch_execz BB4_89
BB4_84: ; in Loop: Header=BB4_7 Depth=1
v_lshlrev_b32_e32 v22, 6, v2
v_add_i32_e32 v18, vcc, v0, v22
v_lshlrev_b32_e32 v18, 2, v18
v_add_i32_e32 v26, vcc, s4, v18
s_mov_b32 m0, -1
ds_read_b32 v18, v26
v_add_i32_e32 v30, vcc, 8, v0
v_or_b32_e32 v38, 1, v0
v_cmp_lt_i32_e32 vcc, v38, v30
s_and_saveexec_b64 s[22:23], vcc
s_xor_b64 s[22:23], exec, s[22:23]
s_waitcnt lgkmcnt(0)
; mask branch BB4_86
s_cbranch_execz BB4_86
BB4_85: ; in Loop: Header=BB4_7 Depth=1
s_mov_b32 m0, -1
ds_read2_b32 v[65:66], v26 offset0:1 offset1:2
v_or_b32_e32 v30, 3, v0
v_add_i32_e32 v22, vcc, v30, v22
v_lshlrev_b32_e32 v22, 2, v22
ds_read2_b32 v[67:68], v26 offset0:3 offset1:4
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v18, v18, v65
v_add_i32_e32 v22, vcc, s4, v22
v_add_f32_e32 v18, v66, v18
ds_read2_b32 v[65:66], v22 offset0:2 offset1:3
ds_read_b32 v26, v26 offset:28
v_add_f32_e32 v18, v67, v18
v_add_f32_e32 v18, v68, v18
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v18, v65, v18
v_add_f32_e32 v18, v66, v18
v_add_f32_e32 v18, v26, v18
BB4_86: ; %._crit_edge.i72
; in Loop: Header=BB4_7 Depth=1
s_or_b64 exec, exec, s[22:23]
v_mul_lo_i32 v22, v64, 3
v_mov_b32_e32 v26, s13
s_mov_b64 s[36:37], s[12:13]
s_mov_b64 s[38:39], s[30:31]
v_add_i32_e32 v64, vcc, v22, v2
v_ashrrev_i32_e32 v65, 31, v64
v_lshl_b64 v[66:67], v[64:65], 2
v_add_i32_e32 v64, vcc, s12, v66
v_addc_u32_e32 v65, vcc, v67, v26, vcc
buffer_load_dword v67, v[66:67], s[36:39], 0 addr64
s_mov_b64 s[22:23], 0
s_waitcnt vmcnt(0)
BB4_87: ; Parent Loop BB4_7 Depth=1
; => This Inner Loop Header: Depth=2
v_add_f32_e32 v66, v18, v67
v_mov_b32_e32 v69, v67
v_mov_b32_e32 v68, v66
buffer_atomic_cmpswap v[68:69], v[64:65], s[28:31], 0 addr64 glc
v_mov_b32_e32 v22, -1
v_mov_b32_e32 v22, 0xf000
s_waitcnt vmcnt(0) expcnt(0)
v_cmp_eq_u32_e32 vcc, v68, v67
s_or_b64 s[22:23], vcc, s[22:23]
v_mov_b32_e32 v67, v68
s_andn2_b64 exec, exec, s[22:23]
s_cbranch_execnz BB4_87
; BB#88: ; %Flow1166
; in Loop: Header=BB4_7 Depth=1
s_or_b64 exec, exec, s[22:23]
BB4_89: ; %Flow1167
; in Loop: Header=BB4_7 Depth=1
s_or_b64 exec, exec, s[18:19]
BB4_90: ; %Flow1176
; in Loop: Header=BB4_7 Depth=1
s_or_b64 exec, exec, s[14:15]
v_and_b32_e32 v18, 0xff0000, v63
v_cmp_ne_u32_e32 vcc, 0, v18
s_and_saveexec_b64 s[14:15], vcc
s_xor_b64 s[14:15], exec, s[14:15]
; mask branch BB4_130
s_cbranch_execz BB4_130
BB4_91: ; in Loop: Header=BB4_7 Depth=1
s_mov_b32 m0, -1
ds_read_b32 v18, v52 offset:8
s_mov_b64 s[36:37], s[16:17]
s_mov_b64 s[38:39], s[30:31]
s_waitcnt lgkmcnt(0)
v_lshlrev_b32_e32 v22, 3, v18
v_add_i32_e32 v64, vcc, v22, v1
v_ashrrev_i32_e32 v65, 31, v64
v_lshl_b64 v[66:67], v[64:65], 4
buffer_load_dwordx4 v[67:70], v[66:67], s[36:39], 0 addr64
v_lshl_b64 v[71:72], v[64:65], 3
s_mov_b64 s[36:37], s[20:21]
buffer_load_dwordx2 v[65:66], v[71:72], s[36:39], 0 addr64
v_lshrrev_b32_e32 v22, 16, v63
v_mov_b32_e32 v71, 0
v_and_b32_e32 v22, 1, v22
v_cmp_eq_u32_e32 vcc, 1, v22
v_mov_b32_e32 v73, v71
v_mov_b32_e32 v76, v71
s_and_saveexec_b64 s[18:19], vcc
s_xor_b64 s[18:19], exec, s[18:19]
s_waitcnt vmcnt(0)
; mask branch BB4_95
s_cbranch_execz BB4_95
BB4_92: ; in Loop: Header=BB4_7 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[77:80], v53 offset1:1
v_cmp_ne_u32_e32 vcc, v40, v18
s_and_b64 s[22:23], exec, s[2:3]
s_or_b64 s[22:23], s[22:23], vcc
s_and_b64 s[34:35], exec, s[0:1]
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v22, v68, v78
s_or_b64 s[22:23], s[34:35], s[22:23]
v_subrev_f32_e32 v26, v67, v77
v_mul_f32_e32 v38, v22, v22
v_cndmask_b32_e64 v46, 0, 1.0, s[22:23]
v_subrev_f32_e32 v30, v69, v79
v_mac_f32_e32 v38, v26, v26
v_mov_b32_e32 v71, 0
v_mac_f32_e32 v38, v30, v30
v_mul_f32_e32 v46, s5, v46
v_cmp_lt_f32_e32 vcc, v38, v46
v_mov_b32_e32 v73, v71
v_mov_b32_e32 v76, v71
s_and_saveexec_b64 s[22:23], vcc
s_xor_b64 s[22:23], exec, s[22:23]
; mask branch BB4_94
s_cbranch_execz BB4_94
BB4_93: ; in Loop: Header=BB4_7 Depth=1
v_max_f32_e32 v38, 0x34cd15ae, v38
v_rsq_f32_e32 v74, v38
v_lshrrev_b32_e32 v71, 16, v14
v_and_b32_e32 v71, 1, v71
v_cmp_eq_u32_e32 vcc, 1, v71
v_mul_f32_e32 v77, v74, v74
v_cndmask_b32_e64 v73, 0, 1.0, vcc
v_mul_f32_e32 v71, v77, v77
v_mul_f32_e32 v71, v73, v71
s_mov_b32 m0, -1
v_mul_f32_e32 v78, v77, v71
ds_read_b64 v[71:72], v54
v_mul_f32_e32 v38, v47, v38
v_mul_f32_e32 v75, v38, v38
v_mov_b32_e32 v76, 0x3a92b707
v_mov_b32_e32 v79, 0x35c55945
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v72, v66, v72
v_mul_f32_e32 v72, v78, v72
v_mad_f32 v71, v65, v71, -v72
v_madak_f32_e32 v72, v76, v75, 0x3ded3cb2
v_mov_b32_e32 v76, 0x3c739487
v_madak_f32_e32 v76, v76, v75, 0x3f01e2bc
v_mad_f32 v72, v72, v75, 1.0
v_mac_f32_e32 v72, v38, v76
v_mov_b32_e32 v76, 0xb2951928
v_madak_f32_e32 v76, v76, v75, 0xb85ffb93
v_madak_f32_e32 v79, v79, v75, 0x3a83ca0c
v_madak_f32_e32 v76, v76, v75, 0xbc9ded90
v_madak_f32_e32 v79, v79, v75, 0x3d8eaf3b
v_madak_f32_e32 v75, v76, v75, 0xbf409397
v_mac_f32_e32 v75, v38, v79
v_rcp_f32_e32 v38, v72
v_mul_f32_e32 v72, v73, v77
v_mul_f32_e32 v46, v70, v80
v_mul_f32_e32 v38, v51, v38
v_mul_f32_e32 v38, v75, v38
v_mac_f32_e32 v38, v74, v72
v_mul_f32_e32 v72, v77, v78
v_mul_f32_e32 v73, v71, v72
v_mad_f32 v74, v46, v38, -v73
v_mul_f32_e32 v38, v38, v46
v_mad_f32 v38, v72, v71, -v38
v_mad_f32 v45, v38, -v30, v45
v_mad_f32 v44, v38, -v22, v44
v_mad_f32 v43, v38, -v26, v43
v_mul_f32_e64 v76, v74, -v30
v_mul_f32_e64 v73, v74, -v22
v_mul_f32_e64 v71, v74, -v26
BB4_94: ; %Flow1164
; in Loop: Header=BB4_7 Depth=1
s_or_b64 exec, exec, s[22:23]
BB4_95: ; in Loop: Header=BB4_7 Depth=1
s_or_b64 exec, exec, s[18:19]
v_lshrrev_b32_e32 v22, 17, v63
v_and_b32_e32 v22, 1, v22
v_cmp_eq_u32_e32 vcc, 1, v22
s_and_saveexec_b64 s[18:19], vcc
s_xor_b64 s[18:19], exec, s[18:19]
; mask branch BB4_99
s_cbranch_execz BB4_99
BB4_96: ; in Loop: Header=BB4_7 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[77:80], v53 offset0:16 offset1:17
v_cmp_ne_u32_e32 vcc, v62, v18
s_and_b64 s[22:23], exec, s[2:3]
s_or_b64 s[22:23], s[22:23], vcc
s_and_b64 s[34:35], exec, s[0:1]
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v26, v68, v78
s_or_b64 s[22:23], s[34:35], s[22:23]
v_subrev_f32_e32 v22, v67, v77
v_mul_f32_e32 v38, v26, v26
v_cndmask_b32_e64 v46, 0, 1.0, s[22:23]
v_subrev_f32_e32 v30, v69, v79
v_mac_f32_e32 v38, v22, v22
v_mac_f32_e32 v38, v30, v30
v_mul_f32_e32 v46, s5, v46
v_cmp_lt_f32_e32 vcc, v38, v46
s_and_saveexec_b64 s[22:23], vcc
s_xor_b64 s[22:23], exec, s[22:23]
; mask branch BB4_98
s_cbranch_execz BB4_98
BB4_97: ; in Loop: Header=BB4_7 Depth=1
v_lshrrev_b32_e32 v72, 17, v14
v_and_b32_e32 v72, 1, v72
v_max_f32_e32 v38, 0x34cd15ae, v38
v_cmp_eq_u32_e32 vcc, 1, v72
v_rsq_f32_e32 v72, v38
v_cndmask_b32_e64 v77, 0, 1.0, vcc
s_mov_b32 m0, -1
v_mul_f32_e32 v38, v47, v38
v_mul_f32_e32 v78, v72, v72
v_mul_f32_e32 v74, v78, v78
v_mul_f32_e32 v74, v77, v74
v_mul_f32_e32 v79, v78, v74
ds_read_b64 v[74:75], v54 offset:64
v_mul_f32_e32 v46, v70, v80
v_mov_b32_e32 v80, 0x3a92b707
v_mov_b32_e32 v81, 0x3c739487
v_mov_b32_e32 v82, 0x35c55945
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v75, v66, v75
v_mul_f32_e32 v75, v79, v75
v_mad_f32 v74, v65, v74, -v75
v_mul_f32_e32 v75, v38, v38
v_madak_f32_e32 v80, v80, v75, 0x3ded3cb2
v_madak_f32_e32 v81, v81, v75, 0x3f01e2bc
v_mad_f32 v80, v80, v75, 1.0
v_mac_f32_e32 v80, v38, v81
v_mov_b32_e32 v81, 0xb2951928
v_madak_f32_e32 v81, v81, v75, 0xb85ffb93
v_madak_f32_e32 v82, v82, v75, 0x3a83ca0c
v_madak_f32_e32 v81, v81, v75, 0xbc9ded90
v_madak_f32_e32 v82, v82, v75, 0x3d8eaf3b
v_madak_f32_e32 v75, v81, v75, 0xbf409397
v_mac_f32_e32 v75, v38, v82
v_rcp_f32_e32 v38, v80
v_mul_f32_e32 v38, v51, v38
v_mul_f32_e32 v38, v75, v38
v_mul_f32_e32 v75, v77, v78
v_mac_f32_e32 v38, v72, v75
v_mul_f32_e32 v72, v78, v79
v_mul_f32_e32 v75, v74, v72
v_mad_f32 v75, v46, v38, -v75
v_mul_f32_e32 v38, v38, v46
v_mad_f32 v38, v72, v74, -v38
v_mad_f32 v76, v30, v38, v76
v_mad_f32 v73, v26, v38, v73
v_mac_f32_e32 v71, v22, v38
v_mad_f32 v37, v30, v75, v37
v_mad_f32 v36, v26, v75, v36
v_mac_f32_e32 v35, v22, v75
BB4_98: ; %Flow1163
; in Loop: Header=BB4_7 Depth=1
s_or_b64 exec, exec, s[22:23]
BB4_99: ; in Loop: Header=BB4_7 Depth=1
s_or_b64 exec, exec, s[18:19]
v_lshrrev_b32_e32 v22, 18, v63
v_and_b32_e32 v22, 1, v22
v_cmp_eq_u32_e32 vcc, 1, v22
s_and_saveexec_b64 s[18:19], vcc
s_xor_b64 s[18:19], exec, s[18:19]
; mask branch BB4_103
s_cbranch_execz BB4_103
BB4_100: ; in Loop: Header=BB4_7 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[77:80], v53 offset0:32 offset1:33
v_cmp_ne_u32_e32 vcc, v61, v18
s_and_b64 s[22:23], exec, s[2:3]
s_or_b64 s[22:23], s[22:23], vcc
s_and_b64 s[34:35], exec, s[0:1]
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v26, v68, v78
s_or_b64 s[22:23], s[34:35], s[22:23]
v_subrev_f32_e32 v22, v67, v77
v_mul_f32_e32 v38, v26, v26
v_cndmask_b32_e64 v46, 0, 1.0, s[22:23]
v_subrev_f32_e32 v30, v69, v79
v_mac_f32_e32 v38, v22, v22
v_mac_f32_e32 v38, v30, v30
v_mul_f32_e32 v46, s5, v46
v_cmp_lt_f32_e32 vcc, v38, v46
s_and_saveexec_b64 s[22:23], vcc
s_xor_b64 s[22:23], exec, s[22:23]
; mask branch BB4_102
s_cbranch_execz BB4_102
BB4_101: ; in Loop: Header=BB4_7 Depth=1
v_lshrrev_b32_e32 v72, 18, v14
v_and_b32_e32 v72, 1, v72
v_max_f32_e32 v38, 0x34cd15ae, v38
v_cmp_eq_u32_e32 vcc, 1, v72
v_rsq_f32_e32 v72, v38
v_cndmask_b32_e64 v77, 0, 1.0, vcc
s_mov_b32 m0, -1
v_mul_f32_e32 v38, v47, v38
v_mul_f32_e32 v78, v72, v72
v_mul_f32_e32 v74, v78, v78
v_mul_f32_e32 v74, v77, v74
v_mul_f32_e32 v79, v78, v74
ds_read_b64 v[74:75], v54 offset:128
v_mul_f32_e32 v46, v70, v80
v_mov_b32_e32 v80, 0x3a92b707
v_mov_b32_e32 v81, 0x3c739487
v_mov_b32_e32 v82, 0x35c55945
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v75, v66, v75
v_mul_f32_e32 v75, v79, v75
v_mad_f32 v74, v65, v74, -v75
v_mul_f32_e32 v75, v38, v38
v_madak_f32_e32 v80, v80, v75, 0x3ded3cb2
v_madak_f32_e32 v81, v81, v75, 0x3f01e2bc
v_mad_f32 v80, v80, v75, 1.0
v_mac_f32_e32 v80, v38, v81
v_mov_b32_e32 v81, 0xb2951928
v_madak_f32_e32 v81, v81, v75, 0xb85ffb93
v_madak_f32_e32 v82, v82, v75, 0x3a83ca0c
v_madak_f32_e32 v81, v81, v75, 0xbc9ded90
v_madak_f32_e32 v82, v82, v75, 0x3d8eaf3b
v_madak_f32_e32 v75, v81, v75, 0xbf409397
v_mac_f32_e32 v75, v38, v82
v_rcp_f32_e32 v38, v80
v_mul_f32_e32 v38, v51, v38
v_mul_f32_e32 v38, v75, v38
v_mul_f32_e32 v75, v77, v78
v_mac_f32_e32 v38, v72, v75
v_mul_f32_e32 v72, v78, v79
v_mul_f32_e32 v75, v74, v72
v_mad_f32 v75, v46, v38, -v75
v_mul_f32_e32 v38, v38, v46
v_mad_f32 v38, v72, v74, -v38
v_mad_f32 v76, v30, v38, v76
v_mad_f32 v73, v26, v38, v73
v_mac_f32_e32 v71, v22, v38
v_mad_f32 v29, v30, v75, v29
v_mad_f32 v28, v26, v75, v28
v_mac_f32_e32 v27, v22, v75
BB4_102: ; %Flow1162
; in Loop: Header=BB4_7 Depth=1
s_or_b64 exec, exec, s[22:23]
BB4_103: ; in Loop: Header=BB4_7 Depth=1
s_or_b64 exec, exec, s[18:19]
v_lshrrev_b32_e32 v22, 19, v63
v_and_b32_e32 v22, 1, v22
v_cmp_eq_u32_e32 vcc, 1, v22
s_and_saveexec_b64 s[18:19], vcc
s_xor_b64 s[18:19], exec, s[18:19]
; mask branch BB4_107
s_cbranch_execz BB4_107
BB4_104: ; in Loop: Header=BB4_7 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[77:80], v53 offset0:48 offset1:49
v_cmp_ne_u32_e32 vcc, v60, v18
s_and_b64 s[22:23], exec, s[2:3]
s_or_b64 s[22:23], s[22:23], vcc
s_and_b64 s[34:35], exec, s[0:1]
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v26, v68, v78
s_or_b64 s[22:23], s[34:35], s[22:23]
v_subrev_f32_e32 v22, v67, v77
v_mul_f32_e32 v38, v26, v26
v_cndmask_b32_e64 v46, 0, 1.0, s[22:23]
v_subrev_f32_e32 v30, v69, v79
v_mac_f32_e32 v38, v22, v22
v_mac_f32_e32 v38, v30, v30
v_mul_f32_e32 v46, s5, v46
v_cmp_lt_f32_e32 vcc, v38, v46
s_and_saveexec_b64 s[22:23], vcc
s_xor_b64 s[22:23], exec, s[22:23]
; mask branch BB4_106
s_cbranch_execz BB4_106
BB4_105: ; in Loop: Header=BB4_7 Depth=1
v_lshrrev_b32_e32 v72, 19, v14
v_and_b32_e32 v72, 1, v72
v_max_f32_e32 v38, 0x34cd15ae, v38
v_cmp_eq_u32_e32 vcc, 1, v72
v_rsq_f32_e32 v72, v38
v_cndmask_b32_e64 v77, 0, 1.0, vcc
s_mov_b32 m0, -1
v_mul_f32_e32 v38, v47, v38
v_mul_f32_e32 v78, v72, v72
v_mul_f32_e32 v74, v78, v78
v_mul_f32_e32 v74, v77, v74
v_mul_f32_e32 v79, v78, v74
ds_read_b64 v[74:75], v54 offset:192
v_mul_f32_e32 v46, v70, v80
v_mov_b32_e32 v80, 0x3a92b707
v_mov_b32_e32 v81, 0x3c739487
v_mov_b32_e32 v82, 0x35c55945
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v75, v66, v75
v_mul_f32_e32 v75, v79, v75
v_mad_f32 v74, v65, v74, -v75
v_mul_f32_e32 v75, v38, v38
v_madak_f32_e32 v80, v80, v75, 0x3ded3cb2
v_madak_f32_e32 v81, v81, v75, 0x3f01e2bc
v_mad_f32 v80, v80, v75, 1.0
v_mac_f32_e32 v80, v38, v81
v_mov_b32_e32 v81, 0xb2951928
v_madak_f32_e32 v81, v81, v75, 0xb85ffb93
v_madak_f32_e32 v82, v82, v75, 0x3a83ca0c
v_madak_f32_e32 v81, v81, v75, 0xbc9ded90
v_madak_f32_e32 v82, v82, v75, 0x3d8eaf3b
v_madak_f32_e32 v75, v81, v75, 0xbf409397
v_mac_f32_e32 v75, v38, v82
v_rcp_f32_e32 v38, v80
v_mul_f32_e32 v38, v51, v38
v_mul_f32_e32 v38, v75, v38
v_mul_f32_e32 v75, v77, v78
v_mac_f32_e32 v38, v72, v75
v_mul_f32_e32 v72, v78, v79
v_mul_f32_e32 v75, v74, v72
v_mad_f32 v75, v46, v38, -v75
v_mul_f32_e32 v38, v38, v46
v_mad_f32 v38, v72, v74, -v38
v_mad_f32 v76, v30, v38, v76
v_mad_f32 v73, v26, v38, v73
v_mac_f32_e32 v71, v22, v38
v_mad_f32 v25, v30, v75, v25
v_mad_f32 v24, v26, v75, v24
v_mac_f32_e32 v23, v22, v75
BB4_106: ; %Flow1161
; in Loop: Header=BB4_7 Depth=1
s_or_b64 exec, exec, s[22:23]
BB4_107: ; in Loop: Header=BB4_7 Depth=1
s_or_b64 exec, exec, s[18:19]
v_lshrrev_b32_e32 v22, 20, v63
v_and_b32_e32 v22, 1, v22
v_cmp_eq_u32_e32 vcc, 1, v22
s_and_saveexec_b64 s[18:19], vcc
s_xor_b64 s[18:19], exec, s[18:19]
; mask branch BB4_111
s_cbranch_execz BB4_111
BB4_108: ; in Loop: Header=BB4_7 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[77:80], v53 offset0:64 offset1:65
v_cmp_ne_u32_e32 vcc, v59, v18
s_and_b64 s[22:23], exec, s[2:3]
s_or_b64 s[22:23], s[22:23], vcc
s_and_b64 s[34:35], exec, s[0:1]
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v26, v68, v78
s_or_b64 s[22:23], s[34:35], s[22:23]
v_subrev_f32_e32 v22, v67, v77
v_mul_f32_e32 v38, v26, v26
v_cndmask_b32_e64 v46, 0, 1.0, s[22:23]
v_subrev_f32_e32 v30, v69, v79
v_mac_f32_e32 v38, v22, v22
v_mac_f32_e32 v38, v30, v30
v_mul_f32_e32 v46, s5, v46
v_cmp_lt_f32_e32 vcc, v38, v46
s_and_saveexec_b64 s[22:23], vcc
s_xor_b64 s[22:23], exec, s[22:23]
; mask branch BB4_110
s_cbranch_execz BB4_110
BB4_109: ; in Loop: Header=BB4_7 Depth=1
v_lshrrev_b32_e32 v72, 20, v14
v_and_b32_e32 v72, 1, v72
v_max_f32_e32 v38, 0x34cd15ae, v38
v_cmp_eq_u32_e32 vcc, 1, v72
v_rsq_f32_e32 v72, v38
v_cndmask_b32_e64 v77, 0, 1.0, vcc
s_mov_b32 m0, -1
v_mul_f32_e32 v38, v47, v38
v_mul_f32_e32 v78, v72, v72
v_mul_f32_e32 v74, v78, v78
v_mul_f32_e32 v74, v77, v74
v_mul_f32_e32 v79, v78, v74
ds_read_b64 v[74:75], v54 offset:256
v_mul_f32_e32 v46, v70, v80
v_mov_b32_e32 v80, 0x3a92b707
v_mov_b32_e32 v81, 0x3c739487
v_mov_b32_e32 v82, 0x35c55945
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v75, v66, v75
v_mul_f32_e32 v75, v79, v75
v_mad_f32 v74, v65, v74, -v75
v_mul_f32_e32 v75, v38, v38
v_madak_f32_e32 v80, v80, v75, 0x3ded3cb2
v_madak_f32_e32 v81, v81, v75, 0x3f01e2bc
v_mad_f32 v80, v80, v75, 1.0
v_mac_f32_e32 v80, v38, v81
v_mov_b32_e32 v81, 0xb2951928
v_madak_f32_e32 v81, v81, v75, 0xb85ffb93
v_madak_f32_e32 v82, v82, v75, 0x3a83ca0c
v_madak_f32_e32 v81, v81, v75, 0xbc9ded90
v_madak_f32_e32 v82, v82, v75, 0x3d8eaf3b
v_madak_f32_e32 v75, v81, v75, 0xbf409397
v_mac_f32_e32 v75, v38, v82
v_rcp_f32_e32 v38, v80
v_mul_f32_e32 v38, v51, v38
v_mul_f32_e32 v38, v75, v38
v_mul_f32_e32 v75, v77, v78
v_mac_f32_e32 v38, v72, v75
v_mul_f32_e32 v72, v78, v79
v_mul_f32_e32 v75, v74, v72
v_mad_f32 v75, v46, v38, -v75
v_mul_f32_e32 v38, v38, v46
v_mad_f32 v38, v72, v74, -v38
v_mad_f32 v76, v30, v38, v76
v_mad_f32 v73, v26, v38, v73
v_mac_f32_e32 v71, v22, v38
v_mad_f32 v21, v30, v75, v21
v_mad_f32 v20, v26, v75, v20
v_mac_f32_e32 v19, v22, v75
BB4_110: ; %Flow1160
; in Loop: Header=BB4_7 Depth=1
s_or_b64 exec, exec, s[22:23]
BB4_111: ; in Loop: Header=BB4_7 Depth=1
s_or_b64 exec, exec, s[18:19]
v_lshrrev_b32_e32 v22, 21, v63
v_and_b32_e32 v22, 1, v22
v_cmp_eq_u32_e32 vcc, 1, v22
s_and_saveexec_b64 s[18:19], vcc
s_xor_b64 s[18:19], exec, s[18:19]
; mask branch BB4_115
s_cbranch_execz BB4_115
BB4_112: ; in Loop: Header=BB4_7 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[77:80], v53 offset0:80 offset1:81
v_cmp_ne_u32_e32 vcc, v58, v18
s_and_b64 s[22:23], exec, s[2:3]
s_or_b64 s[22:23], s[22:23], vcc
s_and_b64 s[34:35], exec, s[0:1]
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v26, v68, v78
s_or_b64 s[22:23], s[34:35], s[22:23]
v_subrev_f32_e32 v22, v67, v77
v_mul_f32_e32 v38, v26, v26
v_cndmask_b32_e64 v46, 0, 1.0, s[22:23]
v_subrev_f32_e32 v30, v69, v79
v_mac_f32_e32 v38, v22, v22
v_mac_f32_e32 v38, v30, v30
v_mul_f32_e32 v46, s5, v46
v_cmp_lt_f32_e32 vcc, v38, v46
s_and_saveexec_b64 s[22:23], vcc
s_xor_b64 s[22:23], exec, s[22:23]
; mask branch BB4_114
s_cbranch_execz BB4_114
BB4_113: ; in Loop: Header=BB4_7 Depth=1
v_lshrrev_b32_e32 v72, 21, v14
v_and_b32_e32 v72, 1, v72
v_max_f32_e32 v38, 0x34cd15ae, v38
v_cmp_eq_u32_e32 vcc, 1, v72
v_rsq_f32_e32 v72, v38
v_cndmask_b32_e64 v77, 0, 1.0, vcc
s_mov_b32 m0, -1
v_mul_f32_e32 v38, v47, v38
v_mul_f32_e32 v78, v72, v72
v_mul_f32_e32 v74, v78, v78
v_mul_f32_e32 v74, v77, v74
v_mul_f32_e32 v79, v78, v74
ds_read_b64 v[74:75], v54 offset:320
v_mul_f32_e32 v46, v70, v80
v_mov_b32_e32 v80, 0x3a92b707
v_mov_b32_e32 v81, 0x3c739487
v_mov_b32_e32 v82, 0x35c55945
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v75, v66, v75
v_mul_f32_e32 v75, v79, v75
v_mad_f32 v74, v65, v74, -v75
v_mul_f32_e32 v75, v38, v38
v_madak_f32_e32 v80, v80, v75, 0x3ded3cb2
v_madak_f32_e32 v81, v81, v75, 0x3f01e2bc
v_mad_f32 v80, v80, v75, 1.0
v_mac_f32_e32 v80, v38, v81
v_mov_b32_e32 v81, 0xb2951928
v_madak_f32_e32 v81, v81, v75, 0xb85ffb93
v_madak_f32_e32 v82, v82, v75, 0x3a83ca0c
v_madak_f32_e32 v81, v81, v75, 0xbc9ded90
v_madak_f32_e32 v82, v82, v75, 0x3d8eaf3b
v_madak_f32_e32 v75, v81, v75, 0xbf409397
v_mac_f32_e32 v75, v38, v82
v_rcp_f32_e32 v38, v80
v_mul_f32_e32 v38, v51, v38
v_mul_f32_e32 v38, v75, v38
v_mul_f32_e32 v75, v77, v78
v_mac_f32_e32 v38, v72, v75
v_mul_f32_e32 v72, v78, v79
v_mul_f32_e32 v75, v74, v72
v_mad_f32 v75, v46, v38, -v75
v_mul_f32_e32 v38, v38, v46
v_mad_f32 v38, v72, v74, -v38
v_mad_f32 v76, v30, v38, v76
v_mad_f32 v73, v26, v38, v73
v_mac_f32_e32 v71, v22, v38
v_mad_f32 v17, v30, v75, v17
v_mad_f32 v16, v26, v75, v16
v_mac_f32_e32 v15, v22, v75
BB4_114: ; %Flow1159
; in Loop: Header=BB4_7 Depth=1
s_or_b64 exec, exec, s[22:23]
BB4_115: ; in Loop: Header=BB4_7 Depth=1
s_or_b64 exec, exec, s[18:19]
v_lshrrev_b32_e32 v22, 22, v63
v_and_b32_e32 v22, 1, v22
v_cmp_eq_u32_e32 vcc, 1, v22
s_and_saveexec_b64 s[18:19], vcc
s_xor_b64 s[18:19], exec, s[18:19]
; mask branch BB4_119
s_cbranch_execz BB4_119
BB4_116: ; in Loop: Header=BB4_7 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[77:80], v53 offset0:96 offset1:97
v_cmp_ne_u32_e32 vcc, v57, v18
s_and_b64 s[22:23], exec, s[2:3]
s_or_b64 s[22:23], s[22:23], vcc
s_and_b64 s[34:35], exec, s[0:1]
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v26, v68, v78
s_or_b64 s[22:23], s[34:35], s[22:23]
v_subrev_f32_e32 v22, v67, v77
v_mul_f32_e32 v38, v26, v26
v_cndmask_b32_e64 v46, 0, 1.0, s[22:23]
v_subrev_f32_e32 v30, v69, v79
v_mac_f32_e32 v38, v22, v22
v_mac_f32_e32 v38, v30, v30
v_mul_f32_e32 v46, s5, v46
v_cmp_lt_f32_e32 vcc, v38, v46
s_and_saveexec_b64 s[22:23], vcc
s_xor_b64 s[22:23], exec, s[22:23]
; mask branch BB4_118
s_cbranch_execz BB4_118
BB4_117: ; in Loop: Header=BB4_7 Depth=1
v_lshrrev_b32_e32 v72, 22, v14
v_and_b32_e32 v72, 1, v72
v_max_f32_e32 v38, 0x34cd15ae, v38
v_cmp_eq_u32_e32 vcc, 1, v72
v_rsq_f32_e32 v72, v38
v_cndmask_b32_e64 v77, 0, 1.0, vcc
s_mov_b32 m0, -1
v_mul_f32_e32 v38, v47, v38
v_mul_f32_e32 v78, v72, v72
v_mul_f32_e32 v74, v78, v78
v_mul_f32_e32 v74, v77, v74
v_mul_f32_e32 v79, v78, v74
ds_read_b64 v[74:75], v54 offset:384
v_mul_f32_e32 v46, v70, v80
v_mov_b32_e32 v80, 0x3a92b707
v_mov_b32_e32 v81, 0x3c739487
v_mov_b32_e32 v82, 0x35c55945
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v75, v66, v75
v_mul_f32_e32 v75, v79, v75
v_mad_f32 v74, v65, v74, -v75
v_mul_f32_e32 v75, v38, v38
v_madak_f32_e32 v80, v80, v75, 0x3ded3cb2
v_madak_f32_e32 v81, v81, v75, 0x3f01e2bc
v_mad_f32 v80, v80, v75, 1.0
v_mac_f32_e32 v80, v38, v81
v_mov_b32_e32 v81, 0xb2951928
v_madak_f32_e32 v81, v81, v75, 0xb85ffb93
v_madak_f32_e32 v82, v82, v75, 0x3a83ca0c
v_madak_f32_e32 v81, v81, v75, 0xbc9ded90
v_madak_f32_e32 v82, v82, v75, 0x3d8eaf3b
v_madak_f32_e32 v75, v81, v75, 0xbf409397
v_mac_f32_e32 v75, v38, v82
v_rcp_f32_e32 v38, v80
v_mul_f32_e32 v38, v51, v38
v_mul_f32_e32 v38, v75, v38
v_mul_f32_e32 v75, v77, v78
v_mac_f32_e32 v38, v72, v75
v_mul_f32_e32 v72, v78, v79
v_mul_f32_e32 v75, v74, v72
v_mad_f32 v75, v46, v38, -v75
v_mul_f32_e32 v38, v38, v46
v_mad_f32 v38, v72, v74, -v38
v_mad_f32 v76, v30, v38, v76
v_mad_f32 v73, v26, v38, v73
v_mac_f32_e32 v71, v22, v38
v_mad_f32 v13, v30, v75, v13
v_mad_f32 v12, v26, v75, v12
v_mac_f32_e32 v11, v22, v75
BB4_118: ; %Flow1158
; in Loop: Header=BB4_7 Depth=1
s_or_b64 exec, exec, s[22:23]
BB4_119: ; in Loop: Header=BB4_7 Depth=1
s_or_b64 exec, exec, s[18:19]
v_lshrrev_b32_e32 v22, 23, v63
v_and_b32_e32 v22, 1, v22
v_cmp_eq_u32_e32 vcc, 1, v22
s_and_saveexec_b64 s[18:19], vcc
s_xor_b64 s[18:19], exec, s[18:19]
; mask branch BB4_123
s_cbranch_execz BB4_123
BB4_120: ; in Loop: Header=BB4_7 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[77:80], v53 offset0:112 offset1:113
v_cmp_ne_u32_e32 vcc, v33, v18
s_and_b64 s[22:23], exec, s[2:3]
s_or_b64 s[22:23], s[22:23], vcc
s_and_b64 s[34:35], exec, s[0:1]
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v22, v68, v78
s_or_b64 s[22:23], s[34:35], s[22:23]
v_subrev_f32_e32 v18, v67, v77
v_mul_f32_e32 v30, v22, v22
v_cndmask_b32_e64 v38, 0, 1.0, s[22:23]
v_subrev_f32_e32 v26, v69, v79
v_mac_f32_e32 v30, v18, v18
v_mac_f32_e32 v30, v26, v26
v_mul_f32_e32 v38, s5, v38
v_cmp_lt_f32_e32 vcc, v30, v38
s_and_saveexec_b64 s[22:23], vcc
s_xor_b64 s[22:23], exec, s[22:23]
; mask branch BB4_122
s_cbranch_execz BB4_122
BB4_121: ; in Loop: Header=BB4_7 Depth=1
v_lshrrev_b32_e32 v46, 23, v14
v_and_b32_e32 v46, 1, v46
v_max_f32_e32 v30, 0x34cd15ae, v30
v_cmp_eq_u32_e32 vcc, 1, v46
v_rsq_f32_e32 v46, v30
v_mul_f32_e32 v38, v70, v80
v_cndmask_b32_e64 v69, 0, 1.0, vcc
s_mov_b32 m0, -1
v_mul_f32_e32 v70, v46, v46
v_mul_f32_e32 v67, v70, v70
v_mul_f32_e32 v67, v69, v67
v_mul_f32_e32 v72, v70, v67
ds_read_b64 v[67:68], v54 offset:448
v_mul_f32_e32 v30, v47, v30
v_mov_b32_e32 v74, 0x35c55945
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v66, v66, v68
v_mul_f32_e32 v66, v72, v66
v_mad_f32 v65, v65, v67, -v66
v_mul_f32_e32 v66, v30, v30
v_mov_b32_e32 v67, 0x3a92b707
v_madak_f32_e32 v67, v67, v66, 0x3ded3cb2
v_mov_b32_e32 v68, 0x3c739487
v_madak_f32_e32 v68, v68, v66, 0x3f01e2bc
v_mad_f32 v67, v67, v66, 1.0
v_mac_f32_e32 v67, v30, v68
v_mov_b32_e32 v68, 0xb2951928
v_madak_f32_e32 v68, v68, v66, 0xb85ffb93
v_madak_f32_e32 v74, v74, v66, 0x3a83ca0c
v_madak_f32_e32 v68, v68, v66, 0xbc9ded90
v_madak_f32_e32 v74, v74, v66, 0x3d8eaf3b
v_madak_f32_e32 v66, v68, v66, 0xbf409397
v_mac_f32_e32 v66, v30, v74
v_rcp_f32_e32 v30, v67
v_mul_f32_e32 v30, v51, v30
v_mul_f32_e32 v30, v66, v30
v_mul_f32_e32 v66, v69, v70
v_mac_f32_e32 v30, v46, v66
v_mul_f32_e32 v46, v70, v72
v_mul_f32_e32 v66, v65, v46
v_mad_f32 v66, v38, v30, -v66
v_mul_f32_e32 v30, v30, v38
v_mad_f32 v30, v46, v65, -v30
v_mad_f32 v76, v26, v30, v76
v_mad_f32 v73, v22, v30, v73
v_mac_f32_e32 v71, v18, v30
v_mad_f32 v10, v26, v66, v10
v_mad_f32 v9, v22, v66, v9
v_mac_f32_e32 v8, v18, v66
BB4_122: ; %Flow1157
; in Loop: Header=BB4_7 Depth=1
s_or_b64 exec, exec, s[22:23]
BB4_123: ; in Loop: Header=BB4_7 Depth=1
s_or_b64 exec, exec, s[18:19]
s_mov_b32 m0, -1
v_cmp_gt_i32_e32 vcc, 3, v2
ds_write_b32 v5, v71
ds_write_b32 v6, v73
ds_write_b32 v7, v76
s_and_saveexec_b64 s[18:19], vcc
s_xor_b64 s[18:19], exec, s[18:19]
s_waitcnt lgkmcnt(0)
; mask branch BB4_129
s_cbranch_execz BB4_129
BB4_124: ; in Loop: Header=BB4_7 Depth=1
v_lshlrev_b32_e32 v22, 6, v2
v_add_i32_e32 v18, vcc, v0, v22
v_lshlrev_b32_e32 v18, 2, v18
v_add_i32_e32 v26, vcc, s4, v18
s_mov_b32 m0, -1
ds_read_b32 v18, v26
v_add_i32_e32 v30, vcc, 8, v0
v_or_b32_e32 v38, 1, v0
v_cmp_lt_i32_e32 vcc, v38, v30
s_and_saveexec_b64 s[22:23], vcc
s_xor_b64 s[22:23], exec, s[22:23]
s_waitcnt lgkmcnt(0)
; mask branch BB4_126
s_cbranch_execz BB4_126
BB4_125: ; in Loop: Header=BB4_7 Depth=1
s_mov_b32 m0, -1
ds_read2_b32 v[65:66], v26 offset0:1 offset1:2
v_or_b32_e32 v30, 3, v0
v_add_i32_e32 v22, vcc, v30, v22
v_lshlrev_b32_e32 v22, 2, v22
ds_read2_b32 v[67:68], v26 offset0:3 offset1:4
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v18, v18, v65
v_add_i32_e32 v22, vcc, s4, v22
v_add_f32_e32 v18, v66, v18
ds_read2_b32 v[65:66], v22 offset0:2 offset1:3
ds_read_b32 v26, v26 offset:28
v_add_f32_e32 v18, v67, v18
v_add_f32_e32 v18, v68, v18
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v18, v65, v18
v_add_f32_e32 v18, v66, v18
v_add_f32_e32 v18, v26, v18
BB4_126: ; %._crit_edge.i26
; in Loop: Header=BB4_7 Depth=1
s_or_b64 exec, exec, s[22:23]
v_mul_lo_i32 v22, v64, 3
v_mov_b32_e32 v26, s13
s_mov_b64 s[36:37], s[12:13]
s_mov_b64 s[38:39], s[30:31]
v_add_i32_e32 v64, vcc, v22, v2
v_ashrrev_i32_e32 v65, 31, v64
v_lshl_b64 v[66:67], v[64:65], 2
v_add_i32_e32 v64, vcc, s12, v66
v_addc_u32_e32 v65, vcc, v67, v26, vcc
buffer_load_dword v67, v[66:67], s[36:39], 0 addr64
s_mov_b64 s[22:23], 0
s_waitcnt vmcnt(0)
BB4_127: ; Parent Loop BB4_7 Depth=1
; => This Inner Loop Header: Depth=2
v_add_f32_e32 v66, v18, v67
v_mov_b32_e32 v69, v67
v_mov_b32_e32 v68, v66
buffer_atomic_cmpswap v[68:69], v[64:65], s[28:31], 0 addr64 glc
v_mov_b32_e32 v22, -1
v_mov_b32_e32 v22, 0xf000
s_waitcnt vmcnt(0) expcnt(0)
v_cmp_eq_u32_e32 vcc, v68, v67
s_or_b64 s[22:23], vcc, s[22:23]
v_mov_b32_e32 v67, v68
s_andn2_b64 exec, exec, s[22:23]
s_cbranch_execnz BB4_127
; BB#128: ; %Flow1155
; in Loop: Header=BB4_7 Depth=1
s_or_b64 exec, exec, s[22:23]
BB4_129: ; %Flow1156
; in Loop: Header=BB4_7 Depth=1
s_or_b64 exec, exec, s[18:19]
BB4_130: ; %Flow1165
; in Loop: Header=BB4_7 Depth=1
s_or_b64 exec, exec, s[14:15]
v_mov_b32_e32 v18, 0xffffff
v_cmp_lt_u32_e32 vcc, v18, v63
s_and_saveexec_b64 s[14:15], vcc
s_xor_b64 s[14:15], exec, s[14:15]
; mask branch BB4_170
s_cbranch_execz BB4_170
BB4_131: ; in Loop: Header=BB4_7 Depth=1
s_mov_b32 m0, -1
ds_read_b32 v18, v52 offset:12
s_mov_b64 s[36:37], s[16:17]
s_mov_b64 s[38:39], s[30:31]
s_waitcnt lgkmcnt(0)
v_lshlrev_b32_e32 v22, 3, v18
v_add_i32_e32 v64, vcc, v22, v1
v_ashrrev_i32_e32 v65, 31, v64
v_lshl_b64 v[66:67], v[64:65], 4
buffer_load_dwordx4 v[67:70], v[66:67], s[36:39], 0 addr64
v_lshl_b64 v[71:72], v[64:65], 3
s_mov_b64 s[36:37], s[20:21]
buffer_load_dwordx2 v[65:66], v[71:72], s[36:39], 0 addr64
v_lshrrev_b32_e32 v22, 24, v63
v_mov_b32_e32 v71, 0
v_and_b32_e32 v22, 1, v22
v_cmp_eq_u32_e32 vcc, 1, v22
v_mov_b32_e32 v73, v71
v_mov_b32_e32 v76, v71
s_and_saveexec_b64 s[18:19], vcc
s_xor_b64 s[18:19], exec, s[18:19]
s_waitcnt vmcnt(0)
; mask branch BB4_135
s_cbranch_execz BB4_135
BB4_132: ; in Loop: Header=BB4_7 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[77:80], v53 offset1:1
v_cmp_ne_u32_e32 vcc, v40, v18
s_and_b64 s[22:23], exec, s[2:3]
s_or_b64 s[22:23], s[22:23], vcc
s_and_b64 s[34:35], exec, s[0:1]
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v22, v68, v78
s_or_b64 s[22:23], s[34:35], s[22:23]
v_subrev_f32_e32 v26, v67, v77
v_mul_f32_e32 v38, v22, v22
v_cndmask_b32_e64 v46, 0, 1.0, s[22:23]
v_subrev_f32_e32 v30, v69, v79
v_mac_f32_e32 v38, v26, v26
v_mov_b32_e32 v71, 0
v_mac_f32_e32 v38, v30, v30
v_mul_f32_e32 v46, s5, v46
v_cmp_lt_f32_e32 vcc, v38, v46
v_mov_b32_e32 v73, v71
v_mov_b32_e32 v76, v71
s_and_saveexec_b64 s[22:23], vcc
s_xor_b64 s[22:23], exec, s[22:23]
; mask branch BB4_134
s_cbranch_execz BB4_134
BB4_133: ; in Loop: Header=BB4_7 Depth=1
v_max_f32_e32 v38, 0x34cd15ae, v38
v_rsq_f32_e32 v74, v38
v_lshrrev_b32_e32 v71, 24, v14
v_and_b32_e32 v71, 1, v71
v_cmp_eq_u32_e32 vcc, 1, v71
v_mul_f32_e32 v77, v74, v74
v_cndmask_b32_e64 v73, 0, 1.0, vcc
v_mul_f32_e32 v71, v77, v77
v_mul_f32_e32 v71, v73, v71
s_mov_b32 m0, -1
v_mul_f32_e32 v78, v77, v71
ds_read_b64 v[71:72], v54
v_mul_f32_e32 v38, v47, v38
v_mul_f32_e32 v75, v38, v38
v_mov_b32_e32 v76, 0x3a92b707
v_mov_b32_e32 v79, 0x35c55945
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v72, v66, v72
v_mul_f32_e32 v72, v78, v72
v_mad_f32 v71, v65, v71, -v72
v_madak_f32_e32 v72, v76, v75, 0x3ded3cb2
v_mov_b32_e32 v76, 0x3c739487
v_madak_f32_e32 v76, v76, v75, 0x3f01e2bc
v_mad_f32 v72, v72, v75, 1.0
v_mac_f32_e32 v72, v38, v76
v_mov_b32_e32 v76, 0xb2951928
v_madak_f32_e32 v76, v76, v75, 0xb85ffb93
v_madak_f32_e32 v79, v79, v75, 0x3a83ca0c
v_madak_f32_e32 v76, v76, v75, 0xbc9ded90
v_madak_f32_e32 v79, v79, v75, 0x3d8eaf3b
v_madak_f32_e32 v75, v76, v75, 0xbf409397
v_mac_f32_e32 v75, v38, v79
v_rcp_f32_e32 v38, v72
v_mul_f32_e32 v72, v73, v77
v_mul_f32_e32 v46, v70, v80
v_mul_f32_e32 v38, v51, v38
v_mul_f32_e32 v38, v75, v38
v_mac_f32_e32 v38, v74, v72
v_mul_f32_e32 v72, v77, v78
v_mul_f32_e32 v73, v71, v72
v_mad_f32 v74, v46, v38, -v73
v_mul_f32_e32 v38, v38, v46
v_mad_f32 v38, v72, v71, -v38
v_mad_f32 v45, v38, -v30, v45
v_mad_f32 v44, v38, -v22, v44
v_mad_f32 v43, v38, -v26, v43
v_mul_f32_e64 v76, v74, -v30
v_mul_f32_e64 v73, v74, -v22
v_mul_f32_e64 v71, v74, -v26
BB4_134: ; %Flow1153
; in Loop: Header=BB4_7 Depth=1
s_or_b64 exec, exec, s[22:23]
BB4_135: ; in Loop: Header=BB4_7 Depth=1
s_or_b64 exec, exec, s[18:19]
v_lshrrev_b32_e32 v22, 25, v63
v_and_b32_e32 v22, 1, v22
v_cmp_eq_u32_e32 vcc, 1, v22
s_and_saveexec_b64 s[18:19], vcc
s_xor_b64 s[18:19], exec, s[18:19]
; mask branch BB4_139
s_cbranch_execz BB4_139
BB4_136: ; in Loop: Header=BB4_7 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[77:80], v53 offset0:16 offset1:17
v_cmp_ne_u32_e32 vcc, v62, v18
s_and_b64 s[22:23], exec, s[2:3]
s_or_b64 s[22:23], s[22:23], vcc
s_and_b64 s[34:35], exec, s[0:1]
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v26, v68, v78
s_or_b64 s[22:23], s[34:35], s[22:23]
v_subrev_f32_e32 v22, v67, v77
v_mul_f32_e32 v38, v26, v26
v_cndmask_b32_e64 v46, 0, 1.0, s[22:23]
v_subrev_f32_e32 v30, v69, v79
v_mac_f32_e32 v38, v22, v22
v_mac_f32_e32 v38, v30, v30
v_mul_f32_e32 v46, s5, v46
v_cmp_lt_f32_e32 vcc, v38, v46
s_and_saveexec_b64 s[22:23], vcc
s_xor_b64 s[22:23], exec, s[22:23]
; mask branch BB4_138
s_cbranch_execz BB4_138
BB4_137: ; in Loop: Header=BB4_7 Depth=1
v_lshrrev_b32_e32 v72, 25, v14
v_and_b32_e32 v72, 1, v72
v_max_f32_e32 v38, 0x34cd15ae, v38
v_cmp_eq_u32_e32 vcc, 1, v72
v_rsq_f32_e32 v72, v38
v_cndmask_b32_e64 v77, 0, 1.0, vcc
s_mov_b32 m0, -1
v_mul_f32_e32 v38, v47, v38
v_mul_f32_e32 v78, v72, v72
v_mul_f32_e32 v74, v78, v78
v_mul_f32_e32 v74, v77, v74
v_mul_f32_e32 v79, v78, v74
ds_read_b64 v[74:75], v54 offset:64
v_mul_f32_e32 v46, v70, v80
v_mov_b32_e32 v80, 0x3a92b707
v_mov_b32_e32 v81, 0x3c739487
v_mov_b32_e32 v82, 0x35c55945
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v75, v66, v75
v_mul_f32_e32 v75, v79, v75
v_mad_f32 v74, v65, v74, -v75
v_mul_f32_e32 v75, v38, v38
v_madak_f32_e32 v80, v80, v75, 0x3ded3cb2
v_madak_f32_e32 v81, v81, v75, 0x3f01e2bc
v_mad_f32 v80, v80, v75, 1.0
v_mac_f32_e32 v80, v38, v81
v_mov_b32_e32 v81, 0xb2951928
v_madak_f32_e32 v81, v81, v75, 0xb85ffb93
v_madak_f32_e32 v82, v82, v75, 0x3a83ca0c
v_madak_f32_e32 v81, v81, v75, 0xbc9ded90
v_madak_f32_e32 v82, v82, v75, 0x3d8eaf3b
v_madak_f32_e32 v75, v81, v75, 0xbf409397
v_mac_f32_e32 v75, v38, v82
v_rcp_f32_e32 v38, v80
v_mul_f32_e32 v38, v51, v38
v_mul_f32_e32 v38, v75, v38
v_mul_f32_e32 v75, v77, v78
v_mac_f32_e32 v38, v72, v75
v_mul_f32_e32 v72, v78, v79
v_mul_f32_e32 v75, v74, v72
v_mad_f32 v75, v46, v38, -v75
v_mul_f32_e32 v38, v38, v46
v_mad_f32 v38, v72, v74, -v38
v_mad_f32 v76, v30, v38, v76
v_mad_f32 v73, v26, v38, v73
v_mac_f32_e32 v71, v22, v38
v_mad_f32 v37, v30, v75, v37
v_mad_f32 v36, v26, v75, v36
v_mac_f32_e32 v35, v22, v75
BB4_138: ; %Flow1152
; in Loop: Header=BB4_7 Depth=1
s_or_b64 exec, exec, s[22:23]
BB4_139: ; in Loop: Header=BB4_7 Depth=1
s_or_b64 exec, exec, s[18:19]
v_lshrrev_b32_e32 v22, 26, v63
v_and_b32_e32 v22, 1, v22
v_cmp_eq_u32_e32 vcc, 1, v22
s_and_saveexec_b64 s[18:19], vcc
s_xor_b64 s[18:19], exec, s[18:19]
; mask branch BB4_143
s_cbranch_execz BB4_143
BB4_140: ; in Loop: Header=BB4_7 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[77:80], v53 offset0:32 offset1:33
v_cmp_ne_u32_e32 vcc, v61, v18
s_and_b64 s[22:23], exec, s[2:3]
s_or_b64 s[22:23], s[22:23], vcc
s_and_b64 s[34:35], exec, s[0:1]
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v26, v68, v78
s_or_b64 s[22:23], s[34:35], s[22:23]
v_subrev_f32_e32 v22, v67, v77
v_mul_f32_e32 v38, v26, v26
v_cndmask_b32_e64 v46, 0, 1.0, s[22:23]
v_subrev_f32_e32 v30, v69, v79
v_mac_f32_e32 v38, v22, v22
v_mac_f32_e32 v38, v30, v30
v_mul_f32_e32 v46, s5, v46
v_cmp_lt_f32_e32 vcc, v38, v46
s_and_saveexec_b64 s[22:23], vcc
s_xor_b64 s[22:23], exec, s[22:23]
; mask branch BB4_142
s_cbranch_execz BB4_142
BB4_141: ; in Loop: Header=BB4_7 Depth=1
v_lshrrev_b32_e32 v72, 26, v14
v_and_b32_e32 v72, 1, v72
v_max_f32_e32 v38, 0x34cd15ae, v38
v_cmp_eq_u32_e32 vcc, 1, v72
v_rsq_f32_e32 v72, v38
v_cndmask_b32_e64 v77, 0, 1.0, vcc
s_mov_b32 m0, -1
v_mul_f32_e32 v38, v47, v38
v_mul_f32_e32 v78, v72, v72
v_mul_f32_e32 v74, v78, v78
v_mul_f32_e32 v74, v77, v74
v_mul_f32_e32 v79, v78, v74
ds_read_b64 v[74:75], v54 offset:128
v_mul_f32_e32 v46, v70, v80
v_mov_b32_e32 v80, 0x3a92b707
v_mov_b32_e32 v81, 0x3c739487
v_mov_b32_e32 v82, 0x35c55945
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v75, v66, v75
v_mul_f32_e32 v75, v79, v75
v_mad_f32 v74, v65, v74, -v75
v_mul_f32_e32 v75, v38, v38
v_madak_f32_e32 v80, v80, v75, 0x3ded3cb2
v_madak_f32_e32 v81, v81, v75, 0x3f01e2bc
v_mad_f32 v80, v80, v75, 1.0
v_mac_f32_e32 v80, v38, v81
v_mov_b32_e32 v81, 0xb2951928
v_madak_f32_e32 v81, v81, v75, 0xb85ffb93
v_madak_f32_e32 v82, v82, v75, 0x3a83ca0c
v_madak_f32_e32 v81, v81, v75, 0xbc9ded90
v_madak_f32_e32 v82, v82, v75, 0x3d8eaf3b
v_madak_f32_e32 v75, v81, v75, 0xbf409397
v_mac_f32_e32 v75, v38, v82
v_rcp_f32_e32 v38, v80
v_mul_f32_e32 v38, v51, v38
v_mul_f32_e32 v38, v75, v38
v_mul_f32_e32 v75, v77, v78
v_mac_f32_e32 v38, v72, v75
v_mul_f32_e32 v72, v78, v79
v_mul_f32_e32 v75, v74, v72
v_mad_f32 v75, v46, v38, -v75
v_mul_f32_e32 v38, v38, v46
v_mad_f32 v38, v72, v74, -v38
v_mad_f32 v76, v30, v38, v76
v_mad_f32 v73, v26, v38, v73
v_mac_f32_e32 v71, v22, v38
v_mad_f32 v29, v30, v75, v29
v_mad_f32 v28, v26, v75, v28
v_mac_f32_e32 v27, v22, v75
BB4_142: ; %Flow1151
; in Loop: Header=BB4_7 Depth=1
s_or_b64 exec, exec, s[22:23]
BB4_143: ; in Loop: Header=BB4_7 Depth=1
s_or_b64 exec, exec, s[18:19]
v_lshrrev_b32_e32 v22, 27, v63
v_and_b32_e32 v22, 1, v22
v_cmp_eq_u32_e32 vcc, 1, v22
s_and_saveexec_b64 s[18:19], vcc
s_xor_b64 s[18:19], exec, s[18:19]
; mask branch BB4_147
s_cbranch_execz BB4_147
BB4_144: ; in Loop: Header=BB4_7 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[77:80], v53 offset0:48 offset1:49
v_cmp_ne_u32_e32 vcc, v60, v18
s_and_b64 s[22:23], exec, s[2:3]
s_or_b64 s[22:23], s[22:23], vcc
s_and_b64 s[34:35], exec, s[0:1]
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v26, v68, v78
s_or_b64 s[22:23], s[34:35], s[22:23]
v_subrev_f32_e32 v22, v67, v77
v_mul_f32_e32 v38, v26, v26
v_cndmask_b32_e64 v46, 0, 1.0, s[22:23]
v_subrev_f32_e32 v30, v69, v79
v_mac_f32_e32 v38, v22, v22
v_mac_f32_e32 v38, v30, v30
v_mul_f32_e32 v46, s5, v46
v_cmp_lt_f32_e32 vcc, v38, v46
s_and_saveexec_b64 s[22:23], vcc
s_xor_b64 s[22:23], exec, s[22:23]
; mask branch BB4_146
s_cbranch_execz BB4_146
BB4_145: ; in Loop: Header=BB4_7 Depth=1
v_lshrrev_b32_e32 v72, 27, v14
v_and_b32_e32 v72, 1, v72
v_max_f32_e32 v38, 0x34cd15ae, v38
v_cmp_eq_u32_e32 vcc, 1, v72
v_rsq_f32_e32 v72, v38
v_cndmask_b32_e64 v77, 0, 1.0, vcc
s_mov_b32 m0, -1
v_mul_f32_e32 v38, v47, v38
v_mul_f32_e32 v78, v72, v72
v_mul_f32_e32 v74, v78, v78
v_mul_f32_e32 v74, v77, v74
v_mul_f32_e32 v79, v78, v74
ds_read_b64 v[74:75], v54 offset:192
v_mul_f32_e32 v46, v70, v80
v_mov_b32_e32 v80, 0x3a92b707
v_mov_b32_e32 v81, 0x3c739487
v_mov_b32_e32 v82, 0x35c55945
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v75, v66, v75
v_mul_f32_e32 v75, v79, v75
v_mad_f32 v74, v65, v74, -v75
v_mul_f32_e32 v75, v38, v38
v_madak_f32_e32 v80, v80, v75, 0x3ded3cb2
v_madak_f32_e32 v81, v81, v75, 0x3f01e2bc
v_mad_f32 v80, v80, v75, 1.0
v_mac_f32_e32 v80, v38, v81
v_mov_b32_e32 v81, 0xb2951928
v_madak_f32_e32 v81, v81, v75, 0xb85ffb93
v_madak_f32_e32 v82, v82, v75, 0x3a83ca0c
v_madak_f32_e32 v81, v81, v75, 0xbc9ded90
v_madak_f32_e32 v82, v82, v75, 0x3d8eaf3b
v_madak_f32_e32 v75, v81, v75, 0xbf409397
v_mac_f32_e32 v75, v38, v82
v_rcp_f32_e32 v38, v80
v_mul_f32_e32 v38, v51, v38
v_mul_f32_e32 v38, v75, v38
v_mul_f32_e32 v75, v77, v78
v_mac_f32_e32 v38, v72, v75
v_mul_f32_e32 v72, v78, v79
v_mul_f32_e32 v75, v74, v72
v_mad_f32 v75, v46, v38, -v75
v_mul_f32_e32 v38, v38, v46
v_mad_f32 v38, v72, v74, -v38
v_mad_f32 v76, v30, v38, v76
v_mad_f32 v73, v26, v38, v73
v_mac_f32_e32 v71, v22, v38
v_mad_f32 v25, v30, v75, v25
v_mad_f32 v24, v26, v75, v24
v_mac_f32_e32 v23, v22, v75
BB4_146: ; %Flow1150
; in Loop: Header=BB4_7 Depth=1
s_or_b64 exec, exec, s[22:23]
BB4_147: ; in Loop: Header=BB4_7 Depth=1
s_or_b64 exec, exec, s[18:19]
v_lshrrev_b32_e32 v22, 28, v63
v_and_b32_e32 v22, 1, v22
v_cmp_eq_u32_e32 vcc, 1, v22
s_and_saveexec_b64 s[18:19], vcc
s_xor_b64 s[18:19], exec, s[18:19]
; mask branch BB4_151
s_cbranch_execz BB4_151
BB4_148: ; in Loop: Header=BB4_7 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[77:80], v53 offset0:64 offset1:65
v_cmp_ne_u32_e32 vcc, v59, v18
s_and_b64 s[22:23], exec, s[2:3]
s_or_b64 s[22:23], s[22:23], vcc
s_and_b64 s[34:35], exec, s[0:1]
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v26, v68, v78
s_or_b64 s[22:23], s[34:35], s[22:23]
v_subrev_f32_e32 v22, v67, v77
v_mul_f32_e32 v38, v26, v26
v_cndmask_b32_e64 v46, 0, 1.0, s[22:23]
v_subrev_f32_e32 v30, v69, v79
v_mac_f32_e32 v38, v22, v22
v_mac_f32_e32 v38, v30, v30
v_mul_f32_e32 v46, s5, v46
v_cmp_lt_f32_e32 vcc, v38, v46
s_and_saveexec_b64 s[22:23], vcc
s_xor_b64 s[22:23], exec, s[22:23]
; mask branch BB4_150
s_cbranch_execz BB4_150
BB4_149: ; in Loop: Header=BB4_7 Depth=1
v_lshrrev_b32_e32 v72, 28, v14
v_and_b32_e32 v72, 1, v72
v_max_f32_e32 v38, 0x34cd15ae, v38
v_cmp_eq_u32_e32 vcc, 1, v72
v_rsq_f32_e32 v72, v38
v_cndmask_b32_e64 v77, 0, 1.0, vcc
s_mov_b32 m0, -1
v_mul_f32_e32 v38, v47, v38
v_mul_f32_e32 v78, v72, v72
v_mul_f32_e32 v74, v78, v78
v_mul_f32_e32 v74, v77, v74
v_mul_f32_e32 v79, v78, v74
ds_read_b64 v[74:75], v54 offset:256
v_mul_f32_e32 v46, v70, v80
v_mov_b32_e32 v80, 0x3a92b707
v_mov_b32_e32 v81, 0x3c739487
v_mov_b32_e32 v82, 0x35c55945
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v75, v66, v75
v_mul_f32_e32 v75, v79, v75
v_mad_f32 v74, v65, v74, -v75
v_mul_f32_e32 v75, v38, v38
v_madak_f32_e32 v80, v80, v75, 0x3ded3cb2
v_madak_f32_e32 v81, v81, v75, 0x3f01e2bc
v_mad_f32 v80, v80, v75, 1.0
v_mac_f32_e32 v80, v38, v81
v_mov_b32_e32 v81, 0xb2951928
v_madak_f32_e32 v81, v81, v75, 0xb85ffb93
v_madak_f32_e32 v82, v82, v75, 0x3a83ca0c
v_madak_f32_e32 v81, v81, v75, 0xbc9ded90
v_madak_f32_e32 v82, v82, v75, 0x3d8eaf3b
v_madak_f32_e32 v75, v81, v75, 0xbf409397
v_mac_f32_e32 v75, v38, v82
v_rcp_f32_e32 v38, v80
v_mul_f32_e32 v38, v51, v38
v_mul_f32_e32 v38, v75, v38
v_mul_f32_e32 v75, v77, v78
v_mac_f32_e32 v38, v72, v75
v_mul_f32_e32 v72, v78, v79
v_mul_f32_e32 v75, v74, v72
v_mad_f32 v75, v46, v38, -v75
v_mul_f32_e32 v38, v38, v46
v_mad_f32 v38, v72, v74, -v38
v_mad_f32 v76, v30, v38, v76
v_mad_f32 v73, v26, v38, v73
v_mac_f32_e32 v71, v22, v38
v_mad_f32 v21, v30, v75, v21
v_mad_f32 v20, v26, v75, v20
v_mac_f32_e32 v19, v22, v75
BB4_150: ; %Flow1149
; in Loop: Header=BB4_7 Depth=1
s_or_b64 exec, exec, s[22:23]
BB4_151: ; in Loop: Header=BB4_7 Depth=1
s_or_b64 exec, exec, s[18:19]
v_lshrrev_b32_e32 v22, 29, v63
v_and_b32_e32 v22, 1, v22
v_cmp_eq_u32_e32 vcc, 1, v22
s_and_saveexec_b64 s[18:19], vcc
s_xor_b64 s[18:19], exec, s[18:19]
; mask branch BB4_155
s_cbranch_execz BB4_155
BB4_152: ; in Loop: Header=BB4_7 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[77:80], v53 offset0:80 offset1:81
v_cmp_ne_u32_e32 vcc, v58, v18
s_and_b64 s[22:23], exec, s[2:3]
s_or_b64 s[22:23], s[22:23], vcc
s_and_b64 s[34:35], exec, s[0:1]
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v26, v68, v78
s_or_b64 s[22:23], s[34:35], s[22:23]
v_subrev_f32_e32 v22, v67, v77
v_mul_f32_e32 v38, v26, v26
v_cndmask_b32_e64 v46, 0, 1.0, s[22:23]
v_subrev_f32_e32 v30, v69, v79
v_mac_f32_e32 v38, v22, v22
v_mac_f32_e32 v38, v30, v30
v_mul_f32_e32 v46, s5, v46
v_cmp_lt_f32_e32 vcc, v38, v46
s_and_saveexec_b64 s[22:23], vcc
s_xor_b64 s[22:23], exec, s[22:23]
; mask branch BB4_154
s_cbranch_execz BB4_154
BB4_153: ; in Loop: Header=BB4_7 Depth=1
v_lshrrev_b32_e32 v72, 29, v14
v_and_b32_e32 v72, 1, v72
v_max_f32_e32 v38, 0x34cd15ae, v38
v_cmp_eq_u32_e32 vcc, 1, v72
v_rsq_f32_e32 v72, v38
v_cndmask_b32_e64 v77, 0, 1.0, vcc
s_mov_b32 m0, -1
v_mul_f32_e32 v38, v47, v38
v_mul_f32_e32 v78, v72, v72
v_mul_f32_e32 v74, v78, v78
v_mul_f32_e32 v74, v77, v74
v_mul_f32_e32 v79, v78, v74
ds_read_b64 v[74:75], v54 offset:320
v_mul_f32_e32 v46, v70, v80
v_mov_b32_e32 v80, 0x3a92b707
v_mov_b32_e32 v81, 0x3c739487
v_mov_b32_e32 v82, 0x35c55945
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v75, v66, v75
v_mul_f32_e32 v75, v79, v75
v_mad_f32 v74, v65, v74, -v75
v_mul_f32_e32 v75, v38, v38
v_madak_f32_e32 v80, v80, v75, 0x3ded3cb2
v_madak_f32_e32 v81, v81, v75, 0x3f01e2bc
v_mad_f32 v80, v80, v75, 1.0
v_mac_f32_e32 v80, v38, v81
v_mov_b32_e32 v81, 0xb2951928
v_madak_f32_e32 v81, v81, v75, 0xb85ffb93
v_madak_f32_e32 v82, v82, v75, 0x3a83ca0c
v_madak_f32_e32 v81, v81, v75, 0xbc9ded90
v_madak_f32_e32 v82, v82, v75, 0x3d8eaf3b
v_madak_f32_e32 v75, v81, v75, 0xbf409397
v_mac_f32_e32 v75, v38, v82
v_rcp_f32_e32 v38, v80
v_mul_f32_e32 v38, v51, v38
v_mul_f32_e32 v38, v75, v38
v_mul_f32_e32 v75, v77, v78
v_mac_f32_e32 v38, v72, v75
v_mul_f32_e32 v72, v78, v79
v_mul_f32_e32 v75, v74, v72
v_mad_f32 v75, v46, v38, -v75
v_mul_f32_e32 v38, v38, v46
v_mad_f32 v38, v72, v74, -v38
v_mad_f32 v76, v30, v38, v76
v_mad_f32 v73, v26, v38, v73
v_mac_f32_e32 v71, v22, v38
v_mad_f32 v17, v30, v75, v17
v_mad_f32 v16, v26, v75, v16
v_mac_f32_e32 v15, v22, v75
BB4_154: ; %Flow1148
; in Loop: Header=BB4_7 Depth=1
s_or_b64 exec, exec, s[22:23]
BB4_155: ; in Loop: Header=BB4_7 Depth=1
s_or_b64 exec, exec, s[18:19]
v_lshrrev_b32_e32 v22, 30, v63
v_and_b32_e32 v22, 1, v22
v_cmp_eq_u32_e32 vcc, 1, v22
s_and_saveexec_b64 s[18:19], vcc
s_xor_b64 s[18:19], exec, s[18:19]
; mask branch BB4_159
s_cbranch_execz BB4_159
BB4_156: ; in Loop: Header=BB4_7 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[77:80], v53 offset0:96 offset1:97
v_cmp_ne_u32_e32 vcc, v57, v18
s_and_b64 s[22:23], exec, s[2:3]
s_or_b64 s[22:23], s[22:23], vcc
s_and_b64 s[34:35], exec, s[0:1]
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v26, v68, v78
s_or_b64 s[22:23], s[34:35], s[22:23]
v_subrev_f32_e32 v22, v67, v77
v_mul_f32_e32 v38, v26, v26
v_cndmask_b32_e64 v46, 0, 1.0, s[22:23]
v_subrev_f32_e32 v30, v69, v79
v_mac_f32_e32 v38, v22, v22
v_mac_f32_e32 v38, v30, v30
v_mul_f32_e32 v46, s5, v46
v_cmp_lt_f32_e32 vcc, v38, v46
s_and_saveexec_b64 s[22:23], vcc
s_xor_b64 s[22:23], exec, s[22:23]
; mask branch BB4_158
s_cbranch_execz BB4_158
BB4_157: ; in Loop: Header=BB4_7 Depth=1
v_lshrrev_b32_e32 v72, 30, v14
v_and_b32_e32 v72, 1, v72
v_max_f32_e32 v38, 0x34cd15ae, v38
v_cmp_eq_u32_e32 vcc, 1, v72
v_rsq_f32_e32 v72, v38
v_cndmask_b32_e64 v77, 0, 1.0, vcc
s_mov_b32 m0, -1
v_mul_f32_e32 v38, v47, v38
v_mul_f32_e32 v78, v72, v72
v_mul_f32_e32 v74, v78, v78
v_mul_f32_e32 v74, v77, v74
v_mul_f32_e32 v79, v78, v74
ds_read_b64 v[74:75], v54 offset:384
v_mul_f32_e32 v46, v70, v80
v_mov_b32_e32 v80, 0x3a92b707
v_mov_b32_e32 v81, 0x3c739487
v_mov_b32_e32 v82, 0x35c55945
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v75, v66, v75
v_mul_f32_e32 v75, v79, v75
v_mad_f32 v74, v65, v74, -v75
v_mul_f32_e32 v75, v38, v38
v_madak_f32_e32 v80, v80, v75, 0x3ded3cb2
v_madak_f32_e32 v81, v81, v75, 0x3f01e2bc
v_mad_f32 v80, v80, v75, 1.0
v_mac_f32_e32 v80, v38, v81
v_mov_b32_e32 v81, 0xb2951928
v_madak_f32_e32 v81, v81, v75, 0xb85ffb93
v_madak_f32_e32 v82, v82, v75, 0x3a83ca0c
v_madak_f32_e32 v81, v81, v75, 0xbc9ded90
v_madak_f32_e32 v82, v82, v75, 0x3d8eaf3b
v_madak_f32_e32 v75, v81, v75, 0xbf409397
v_mac_f32_e32 v75, v38, v82
v_rcp_f32_e32 v38, v80
v_mul_f32_e32 v38, v51, v38
v_mul_f32_e32 v38, v75, v38
v_mul_f32_e32 v75, v77, v78
v_mac_f32_e32 v38, v72, v75
v_mul_f32_e32 v72, v78, v79
v_mul_f32_e32 v75, v74, v72
v_mad_f32 v75, v46, v38, -v75
v_mul_f32_e32 v38, v38, v46
v_mad_f32 v38, v72, v74, -v38
v_mad_f32 v76, v30, v38, v76
v_mad_f32 v73, v26, v38, v73
v_mac_f32_e32 v71, v22, v38
v_mad_f32 v13, v30, v75, v13
v_mad_f32 v12, v26, v75, v12
v_mac_f32_e32 v11, v22, v75
BB4_158: ; %Flow1147
; in Loop: Header=BB4_7 Depth=1
s_or_b64 exec, exec, s[22:23]
BB4_159: ; in Loop: Header=BB4_7 Depth=1
s_or_b64 exec, exec, s[18:19]
v_cmp_gt_i32_e32 vcc, 0, v63
s_and_saveexec_b64 s[18:19], vcc
s_xor_b64 s[18:19], exec, s[18:19]
; mask branch BB4_163
s_cbranch_execz BB4_163
BB4_160: ; in Loop: Header=BB4_7 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[77:80], v53 offset0:112 offset1:113
v_cmp_ne_u32_e32 vcc, v33, v18
s_and_b64 s[22:23], exec, s[2:3]
s_or_b64 s[22:23], s[22:23], vcc
s_and_b64 s[34:35], exec, s[0:1]
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v22, v68, v78
s_or_b64 s[22:23], s[34:35], s[22:23]
v_subrev_f32_e32 v18, v67, v77
v_mul_f32_e32 v30, v22, v22
v_cndmask_b32_e64 v38, 0, 1.0, s[22:23]
v_subrev_f32_e32 v26, v69, v79
v_mac_f32_e32 v30, v18, v18
v_mac_f32_e32 v30, v26, v26
v_mul_f32_e32 v38, s5, v38
v_cmp_lt_f32_e32 vcc, v30, v38
s_and_saveexec_b64 s[22:23], vcc
s_xor_b64 s[22:23], exec, s[22:23]
; mask branch BB4_162
s_cbranch_execz BB4_162
BB4_161: ; in Loop: Header=BB4_7 Depth=1
v_cmp_gt_i32_e32 vcc, 0, v14
v_max_f32_e32 v14, 0x34cd15ae, v30
v_rsq_f32_e32 v30, v14
v_cndmask_b32_e64 v46, 0, 1.0, vcc
s_mov_b32 m0, -1
v_mul_f32_e32 v14, v47, v14
v_mul_f32_e32 v63, v30, v30
v_mul_f32_e32 v67, v63, v63
v_mul_f32_e32 v67, v46, v67
v_mul_f32_e32 v69, v63, v67
ds_read_b64 v[67:68], v54 offset:448
v_mul_f32_e32 v38, v70, v80
v_mov_b32_e32 v70, 0x35c55945
v_mul_f32_e32 v46, v46, v63
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v66, v66, v68
v_mul_f32_e32 v66, v69, v66
v_mad_f32 v65, v65, v67, -v66
v_mul_f32_e32 v66, v14, v14
v_mov_b32_e32 v67, 0x3a92b707
v_madak_f32_e32 v67, v67, v66, 0x3ded3cb2
v_mov_b32_e32 v68, 0x3c739487
v_madak_f32_e32 v68, v68, v66, 0x3f01e2bc
v_mad_f32 v67, v67, v66, 1.0
v_mac_f32_e32 v67, v14, v68
v_mov_b32_e32 v68, 0xb2951928
v_madak_f32_e32 v68, v68, v66, 0xb85ffb93
v_madak_f32_e32 v70, v70, v66, 0x3a83ca0c
v_madak_f32_e32 v68, v68, v66, 0xbc9ded90
v_madak_f32_e32 v70, v70, v66, 0x3d8eaf3b
v_madak_f32_e32 v66, v68, v66, 0xbf409397
v_mac_f32_e32 v66, v14, v70
v_rcp_f32_e32 v14, v67
v_mul_f32_e32 v14, v51, v14
v_mul_f32_e32 v14, v66, v14
v_mac_f32_e32 v14, v30, v46
v_mul_f32_e32 v30, v63, v69
v_mul_f32_e32 v46, v65, v30
v_mad_f32 v46, v38, v14, -v46
v_mul_f32_e32 v14, v14, v38
v_mad_f32 v14, v30, v65, -v14
v_mad_f32 v76, v26, v14, v76
v_mad_f32 v73, v22, v14, v73
v_mac_f32_e32 v71, v18, v14
v_mad_f32 v10, v26, v46, v10
v_mad_f32 v9, v22, v46, v9
v_mac_f32_e32 v8, v18, v46
BB4_162: ; %Flow1146
; in Loop: Header=BB4_7 Depth=1
s_or_b64 exec, exec, s[22:23]
BB4_163: ; in Loop: Header=BB4_7 Depth=1
s_or_b64 exec, exec, s[18:19]
s_mov_b32 m0, -1
v_cmp_gt_i32_e32 vcc, 3, v2
ds_write_b32 v5, v71
ds_write_b32 v6, v73
ds_write_b32 v7, v76
s_and_saveexec_b64 s[18:19], vcc
s_xor_b64 s[18:19], exec, s[18:19]
s_waitcnt lgkmcnt(0)
; mask branch BB4_169
s_cbranch_execz BB4_169
BB4_164: ; in Loop: Header=BB4_7 Depth=1
v_lshlrev_b32_e32 v18, 6, v2
v_add_i32_e32 v14, vcc, v0, v18
v_lshlrev_b32_e32 v14, 2, v14
v_add_i32_e32 v22, vcc, s4, v14
s_mov_b32 m0, -1
ds_read_b32 v14, v22
v_add_i32_e32 v26, vcc, 8, v0
v_or_b32_e32 v30, 1, v0
v_cmp_lt_i32_e32 vcc, v30, v26
s_and_saveexec_b64 s[22:23], vcc
s_xor_b64 s[22:23], exec, s[22:23]
s_waitcnt lgkmcnt(0)
; mask branch BB4_166
s_cbranch_execz BB4_166
BB4_165: ; in Loop: Header=BB4_7 Depth=1
s_mov_b32 m0, -1
ds_read2_b32 v[65:66], v22 offset0:1 offset1:2
v_or_b32_e32 v26, 3, v0
v_add_i32_e32 v18, vcc, v26, v18
v_lshlrev_b32_e32 v18, 2, v18
ds_read2_b32 v[67:68], v22 offset0:3 offset1:4
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v14, v14, v65
v_add_i32_e32 v18, vcc, s4, v18
v_add_f32_e32 v14, v66, v14
ds_read2_b32 v[65:66], v18 offset0:2 offset1:3
ds_read_b32 v22, v22 offset:28
v_add_f32_e32 v14, v67, v14
v_add_f32_e32 v14, v68, v14
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v14, v65, v14
v_add_f32_e32 v14, v66, v14
v_add_f32_e32 v14, v22, v14
BB4_166: ; %._crit_edge.i
; in Loop: Header=BB4_7 Depth=1
s_or_b64 exec, exec, s[22:23]
v_mul_lo_i32 v18, v64, 3
v_mov_b32_e32 v22, s13
s_mov_b64 s[36:37], s[12:13]
s_mov_b64 s[38:39], s[30:31]
v_add_i32_e32 v63, vcc, v18, v2
v_ashrrev_i32_e32 v64, 31, v63
v_lshl_b64 v[65:66], v[63:64], 2
v_add_i32_e32 v63, vcc, s12, v65
v_addc_u32_e32 v64, vcc, v66, v22, vcc
buffer_load_dword v66, v[65:66], s[36:39], 0 addr64
s_mov_b64 s[22:23], 0
s_waitcnt vmcnt(0)
BB4_167: ; Parent Loop BB4_7 Depth=1
; => This Inner Loop Header: Depth=2
v_add_f32_e32 v65, v14, v66
v_mov_b32_e32 v68, v66
v_mov_b32_e32 v67, v65
buffer_atomic_cmpswap v[67:68], v[63:64], s[28:31], 0 addr64 glc
v_mov_b32_e32 v18, -1
v_mov_b32_e32 v18, 0xf000
s_waitcnt vmcnt(0) expcnt(0)
v_cmp_eq_u32_e32 vcc, v67, v66
s_or_b64 s[22:23], vcc, s[22:23]
v_mov_b32_e32 v66, v67
s_andn2_b64 exec, exec, s[22:23]
s_cbranch_execnz BB4_167
; BB#168: ; %Flow1144
; in Loop: Header=BB4_7 Depth=1
s_or_b64 exec, exec, s[22:23]
BB4_169: ; %Flow1145
; in Loop: Header=BB4_7 Depth=1
s_or_b64 exec, exec, s[18:19]
BB4_170: ; %Flow1154
; in Loop: Header=BB4_7 Depth=1
s_or_b64 exec, exec, s[14:15]
BB4_171: ; %Flow1188
; in Loop: Header=BB4_7 Depth=1
s_or_b64 exec, exec, s[32:33]
v_add_i32_e32 v55, vcc, 1, v55
v_addc_u32_e32 v56, vcc, 0, v56, vcc
v_cmp_ne_u32_e32 vcc, v55, v34
s_and_b64 vcc, exec, vcc
s_cbranch_vccnz BB4_7
BB4_172: ; %Flow1191
s_mov_b32 m0, -1
ds_write_b32 v5, v43
ds_write_b32 v6, v44
ds_write_b32 v7, v45
s_waitcnt lgkmcnt(0)
s_barrier
s_load_dword s0, s[6:7], 0x32
v_cmp_ne_u32_e32 vcc, 22, v32
v_lshlrev_b32_e32 v18, 2, v39
v_mov_b32_e32 v3, 0
v_lshlrev_b32_e32 v14, 6, v31
s_waitcnt lgkmcnt(0)
v_cmp_ne_u32_e64 s[0:1], s0, 0
s_and_b64 s[2:3], s[0:1], vcc
v_add_i32_e32 v18, vcc, s4, v18
v_add_i32_e32 v26, vcc, 64, v2
v_add_i32_e32 v22, vcc, 0x80, v2
v_cmp_gt_i32_e64 s[0:1], 4, v1
s_and_saveexec_b64 s[6:7], s[0:1]
s_xor_b64 s[6:7], exec, s[6:7]
; mask branch BB4_183
s_cbranch_execz BB4_183
BB4_173:
s_mov_b32 m0, -1
ds_read_b32 v3, v18 offset:128
ds_read_b32 v30, v18
v_add_i32_e32 v31, vcc, v0, v26
v_lshlrev_b32_e32 v31, 2, v31
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v3, v3, v30
ds_write_b32 v18, v3
v_add_i32_e32 v30, vcc, s4, v31
s_waitcnt lgkmcnt(0)
ds_read_b32 v3, v30 offset:128
ds_read_b32 v31, v18 offset:256
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v3, v3, v31
ds_write_b32 v18, v3 offset:256
s_waitcnt lgkmcnt(0)
v_add_i32_e32 v3, vcc, v0, v22
v_lshlrev_b32_e32 v3, 2, v3
v_add_i32_e32 v31, vcc, s4, v3
ds_read_b32 v3, v31 offset:128
ds_read_b32 v32, v18 offset:512
v_cmp_lt_i32_e32 vcc, 1, v1
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v3, v3, v32
v_mov_b32_e32 v32, 0
ds_write_b32 v18, v3 offset:512
s_waitcnt lgkmcnt(0)
; implicit-def: %VGPR3
s_and_saveexec_b64 s[10:11], vcc
s_xor_b64 s[10:11], exec, s[10:11]
; mask branch BB4_175
BB4_174:
v_cmp_eq_u32_e32 vcc, 2, v1
v_mov_b32_e32 v3, 0
v_cndmask_b32_e64 v32, 0, -1, vcc
BB4_175: ; %Flow1141
s_or_saveexec_b64 s[10:11], s[10:11]
s_xor_b64 exec, exec, s[10:11]
; mask branch BB4_177
s_cbranch_execz BB4_177
BB4_176: ; %.thread85.i
s_mov_b32 m0, -1
ds_read_b32 v32, v18 offset:64
ds_read_b32 v33, v18
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v32, v32, v33
ds_write_b32 v18, v32
ds_read_b32 v30, v30 offset:64
s_waitcnt lgkmcnt(0)
ds_read_b32 v32, v18 offset:256
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v30, v30, v32
ds_write_b32 v18, v30 offset:256
s_waitcnt lgkmcnt(0)
ds_read_b32 v30, v31 offset:64
ds_read_b32 v31, v18 offset:512
v_mov_b32_e32 v32, -1
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v30, v30, v31
ds_write_b32 v18, v30 offset:512
s_waitcnt lgkmcnt(0)
BB4_177: ; %Flow1142
s_or_b64 exec, exec, s[10:11]
v_cmp_ne_u32_e32 vcc, 0, v32
s_and_saveexec_b64 s[10:11], vcc
s_xor_b64 s[10:11], exec, s[10:11]
; mask branch BB4_182
s_cbranch_execz BB4_182
BB4_178:
v_mov_b32_e32 v30, 0xe0
v_mad_i32_i24 v30, v30, v1, v18
s_mov_b32 m0, -1
v_add_i32_e32 v3, vcc, v14, v2
ds_read_b32 v31, v30
ds_read_b32 v30, v30 offset:32
v_mul_lo_i32 v3, v3, 3
s_mov_b32 s19, 0xf000
s_mov_b32 s18, 0
s_mov_b64 s[16:17], s[12:13]
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v30, v31, v30
v_add_i32_e32 v31, vcc, v1, v3
v_ashrrev_i32_e32 v32, 31, v31
v_lshl_b64 v[33:34], v[31:32], 2
v_add_i32_e32 v31, vcc, s12, v33
v_mov_b32_e32 v3, s13
v_addc_u32_e32 v32, vcc, v34, v3, vcc
buffer_load_dword v34, v[33:34], s[16:19], 0 addr64
s_mov_b64 s[16:17], 0
s_mov_b64 s[14:15], s[16:17]
s_waitcnt vmcnt(0)
BB4_179: ; =>This Inner Loop Header: Depth=1
v_add_f32_e32 v33, v30, v34
v_mov_b32_e32 v39, v34
v_mov_b32_e32 v38, v33
buffer_atomic_cmpswap v[38:39], v[31:32], s[16:19], 0 addr64 glc
v_mov_b32_e32 v3, -1
v_mov_b32_e32 v3, 0xf000
s_waitcnt vmcnt(0) expcnt(0)
v_cmp_eq_u32_e32 vcc, v38, v34
s_or_b64 s[14:15], vcc, s[14:15]
v_mov_b32_e32 v34, v38
s_andn2_b64 exec, exec, s[14:15]
s_cbranch_execnz BB4_179
; BB#180: ; %atomicAdd_g_f.exit.i
s_or_b64 exec, exec, s[14:15]
s_and_b64 s[14:15], exec, s[2:3]
v_cndmask_b32_e64 v31, 0, 1, s[14:15]
v_cmp_ne_u32_e32 vcc, 1, v31
v_mov_b32_e32 v3, 0
s_and_b64 vcc, exec, vcc
s_cbranch_vccnz BB4_182
; BB#181:
v_mov_b32_e32 v3, v30
BB4_182: ; %Flow1143
s_or_b64 exec, exec, s[10:11]
BB4_183: ; %reduce_force_i_pow2.exit
s_or_b64 exec, exec, s[6:7]
s_mov_b32 m0, -1
s_barrier
ds_write_b32 v5, v35
ds_write_b32 v6, v36
ds_write_b32 v7, v37
s_waitcnt lgkmcnt(0)
s_barrier
s_and_saveexec_b64 s[6:7], s[0:1]
s_xor_b64 s[6:7], exec, s[6:7]
; mask branch BB4_194
s_cbranch_execz BB4_194
BB4_184:
s_mov_b32 m0, -1
ds_read_b32 v30, v18 offset:128
ds_read_b32 v31, v18
v_add_i32_e32 v32, vcc, v0, v26
v_lshlrev_b32_e32 v32, 2, v32
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v30, v30, v31
ds_write_b32 v18, v30
s_waitcnt lgkmcnt(0)
v_add_i32_e32 v30, vcc, s4, v32
ds_read_b32 v31, v30 offset:128
ds_read_b32 v32, v18 offset:256
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v31, v31, v32
ds_write_b32 v18, v31 offset:256
s_waitcnt lgkmcnt(0)
v_add_i32_e32 v31, vcc, v0, v22
v_lshlrev_b32_e32 v31, 2, v31
v_add_i32_e32 v31, vcc, s4, v31
ds_read_b32 v32, v31 offset:128
ds_read_b32 v33, v18 offset:512
v_cmp_lt_i32_e32 vcc, 1, v1
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v32, v32, v33
ds_write_b32 v18, v32 offset:512
s_waitcnt lgkmcnt(0)
v_mov_b32_e32 v32, 0
s_and_saveexec_b64 s[10:11], vcc
s_xor_b64 s[10:11], exec, s[10:11]
; mask branch BB4_186
BB4_185:
v_cmp_eq_u32_e32 vcc, 2, v1
v_cndmask_b32_e64 v32, 0, -1, vcc
BB4_186: ; %Flow1138
s_or_saveexec_b64 s[10:11], s[10:11]
s_xor_b64 exec, exec, s[10:11]
; mask branch BB4_188
s_cbranch_execz BB4_188
BB4_187: ; %.thread85.i491
s_mov_b32 m0, -1
ds_read_b32 v32, v18 offset:64
ds_read_b32 v33, v18
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v32, v32, v33
ds_write_b32 v18, v32
ds_read_b32 v30, v30 offset:64
s_waitcnt lgkmcnt(0)
ds_read_b32 v32, v18 offset:256
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v30, v30, v32
ds_write_b32 v18, v30 offset:256
s_waitcnt lgkmcnt(0)
ds_read_b32 v30, v31 offset:64
ds_read_b32 v31, v18 offset:512
v_mov_b32_e32 v32, -1
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v30, v30, v31
ds_write_b32 v18, v30 offset:512
s_waitcnt lgkmcnt(0)
BB4_188: ; %Flow1139
s_or_b64 exec, exec, s[10:11]
v_cmp_ne_u32_e32 vcc, 0, v32
s_and_saveexec_b64 s[10:11], vcc
s_xor_b64 s[10:11], exec, s[10:11]
; mask branch BB4_193
s_cbranch_execz BB4_193
BB4_189:
v_or_b32_e32 v30, 8, v14
v_add_i32_e32 v30, vcc, v30, v2
v_mul_lo_i32 v31, v30, 3
v_mov_b32_e32 v30, 0xe0
v_mad_i32_i24 v30, v30, v1, v18
s_mov_b32 m0, -1
ds_read_b32 v32, v30
ds_read_b32 v30, v30 offset:32
v_add_i32_e32 v31, vcc, v1, v31
s_mov_b32 s19, 0xf000
s_mov_b32 s18, 0
s_mov_b64 s[16:17], s[12:13]
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v30, v32, v30
v_ashrrev_i32_e32 v32, 31, v31
v_lshl_b64 v[33:34], v[31:32], 2
v_add_i32_e32 v31, vcc, s12, v33
v_mov_b32_e32 v32, s13
v_addc_u32_e32 v32, vcc, v34, v32, vcc
buffer_load_dword v34, v[33:34], s[16:19], 0 addr64
s_mov_b64 s[16:17], 0
s_mov_b64 s[14:15], s[16:17]
s_waitcnt vmcnt(0)
BB4_190: ; =>This Inner Loop Header: Depth=1
v_mov_b32_e32 v33, -1
v_add_f32_e32 v33, v30, v34
v_mov_b32_e32 v36, v34
v_mov_b32_e32 v35, v33
buffer_atomic_cmpswap v[35:36], v[31:32], s[16:19], 0 addr64 glc
v_mov_b32_e32 v33, 0xf000
s_waitcnt vmcnt(0) expcnt(0)
v_cmp_eq_u32_e32 vcc, v35, v34
s_or_b64 s[14:15], vcc, s[14:15]
v_mov_b32_e32 v34, v35
s_andn2_b64 exec, exec, s[14:15]
s_cbranch_execnz BB4_190
; BB#191: ; %atomicAdd_g_f.exit.i479
s_or_b64 exec, exec, s[14:15]
s_and_b64 s[14:15], exec, s[2:3]
v_cndmask_b32_e64 v31, 0, 1, s[14:15]
v_cmp_ne_u32_e32 vcc, 1, v31
s_and_b64 vcc, exec, vcc
s_cbranch_vccnz BB4_193
; BB#192:
v_add_f32_e32 v3, v30, v3
BB4_193: ; %Flow1140
s_or_b64 exec, exec, s[10:11]
BB4_194: ; %reduce_force_i_pow2.exit493
s_or_b64 exec, exec, s[6:7]
s_mov_b32 m0, -1
s_barrier
ds_write_b32 v5, v27
ds_write_b32 v6, v28
ds_write_b32 v7, v29
s_waitcnt lgkmcnt(0)
s_barrier
s_and_saveexec_b64 s[6:7], s[0:1]
s_xor_b64 s[6:7], exec, s[6:7]
; mask branch BB4_205
s_cbranch_execz BB4_205
BB4_195:
s_mov_b32 m0, -1
ds_read_b32 v27, v18 offset:128
ds_read_b32 v28, v18
v_add_i32_e32 v29, vcc, v0, v26
v_lshlrev_b32_e32 v29, 2, v29
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v27, v27, v28
ds_write_b32 v18, v27
s_waitcnt lgkmcnt(0)
v_add_i32_e32 v27, vcc, s4, v29
ds_read_b32 v28, v27 offset:128
ds_read_b32 v29, v18 offset:256
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v28, v28, v29
ds_write_b32 v18, v28 offset:256
s_waitcnt lgkmcnt(0)
v_add_i32_e32 v28, vcc, v0, v22
v_lshlrev_b32_e32 v28, 2, v28
v_add_i32_e32 v28, vcc, s4, v28
ds_read_b32 v29, v28 offset:128
ds_read_b32 v30, v18 offset:512
v_cmp_lt_i32_e32 vcc, 1, v1
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v29, v29, v30
ds_write_b32 v18, v29 offset:512
s_waitcnt lgkmcnt(0)
v_mov_b32_e32 v29, 0
s_and_saveexec_b64 s[10:11], vcc
s_xor_b64 s[10:11], exec, s[10:11]
; mask branch BB4_197
BB4_196:
v_cmp_eq_u32_e32 vcc, 2, v1
v_cndmask_b32_e64 v29, 0, -1, vcc
BB4_197: ; %Flow1135
s_or_saveexec_b64 s[10:11], s[10:11]
s_xor_b64 exec, exec, s[10:11]
; mask branch BB4_199
s_cbranch_execz BB4_199
BB4_198: ; %.thread85.i442
s_mov_b32 m0, -1
ds_read_b32 v29, v18 offset:64
ds_read_b32 v30, v18
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v29, v29, v30
ds_write_b32 v18, v29
ds_read_b32 v27, v27 offset:64
s_waitcnt lgkmcnt(0)
ds_read_b32 v29, v18 offset:256
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v27, v27, v29
ds_write_b32 v18, v27 offset:256
s_waitcnt lgkmcnt(0)
ds_read_b32 v27, v28 offset:64
ds_read_b32 v28, v18 offset:512
v_mov_b32_e32 v29, -1
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v27, v27, v28
ds_write_b32 v18, v27 offset:512
s_waitcnt lgkmcnt(0)
BB4_199: ; %Flow1136
s_or_b64 exec, exec, s[10:11]
v_cmp_ne_u32_e32 vcc, 0, v29
s_and_saveexec_b64 s[10:11], vcc
s_xor_b64 s[10:11], exec, s[10:11]
; mask branch BB4_204
s_cbranch_execz BB4_204
BB4_200:
v_or_b32_e32 v27, 16, v14
v_add_i32_e32 v27, vcc, v27, v2
v_mul_lo_i32 v28, v27, 3
v_mov_b32_e32 v27, 0xe0
v_mad_i32_i24 v27, v27, v1, v18
s_mov_b32 m0, -1
ds_read_b32 v29, v27
ds_read_b32 v27, v27 offset:32
v_add_i32_e32 v28, vcc, v1, v28
s_mov_b32 s19, 0xf000
s_mov_b32 s18, 0
s_mov_b64 s[16:17], s[12:13]
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v27, v29, v27
v_ashrrev_i32_e32 v29, 31, v28
v_lshl_b64 v[30:31], v[28:29], 2
v_add_i32_e32 v28, vcc, s12, v30
v_mov_b32_e32 v29, s13
v_addc_u32_e32 v29, vcc, v31, v29, vcc
buffer_load_dword v31, v[30:31], s[16:19], 0 addr64
s_mov_b64 s[16:17], 0
s_mov_b64 s[14:15], s[16:17]
s_waitcnt vmcnt(0)
BB4_201: ; =>This Inner Loop Header: Depth=1
v_mov_b32_e32 v30, -1
v_add_f32_e32 v30, v27, v31
v_mov_b32_e32 v33, v31
v_mov_b32_e32 v32, v30
buffer_atomic_cmpswap v[32:33], v[28:29], s[16:19], 0 addr64 glc
v_mov_b32_e32 v30, 0xf000
s_waitcnt vmcnt(0) expcnt(0)
v_cmp_eq_u32_e32 vcc, v32, v31
s_or_b64 s[14:15], vcc, s[14:15]
v_mov_b32_e32 v31, v32
s_andn2_b64 exec, exec, s[14:15]
s_cbranch_execnz BB4_201
; BB#202: ; %atomicAdd_g_f.exit.i430
s_or_b64 exec, exec, s[14:15]
s_and_b64 s[14:15], exec, s[2:3]
v_cndmask_b32_e64 v28, 0, 1, s[14:15]
v_cmp_ne_u32_e32 vcc, 1, v28
s_and_b64 vcc, exec, vcc
s_cbranch_vccnz BB4_204
; BB#203:
v_add_f32_e32 v3, v27, v3
BB4_204: ; %Flow1137
s_or_b64 exec, exec, s[10:11]
BB4_205: ; %reduce_force_i_pow2.exit444
s_or_b64 exec, exec, s[6:7]
s_mov_b32 m0, -1
s_barrier
ds_write_b32 v5, v23
ds_write_b32 v6, v24
ds_write_b32 v7, v25
s_waitcnt lgkmcnt(0)
s_barrier
s_and_saveexec_b64 s[6:7], s[0:1]
s_xor_b64 s[6:7], exec, s[6:7]
; mask branch BB4_216
s_cbranch_execz BB4_216
BB4_206:
s_mov_b32 m0, -1
ds_read_b32 v23, v18 offset:128
ds_read_b32 v24, v18
v_add_i32_e32 v25, vcc, v0, v26
v_lshlrev_b32_e32 v25, 2, v25
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v23, v23, v24
ds_write_b32 v18, v23
s_waitcnt lgkmcnt(0)
v_add_i32_e32 v23, vcc, s4, v25
ds_read_b32 v24, v23 offset:128
ds_read_b32 v25, v18 offset:256
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v24, v24, v25
ds_write_b32 v18, v24 offset:256
s_waitcnt lgkmcnt(0)
v_add_i32_e32 v24, vcc, v0, v22
v_lshlrev_b32_e32 v24, 2, v24
v_add_i32_e32 v24, vcc, s4, v24
ds_read_b32 v25, v24 offset:128
ds_read_b32 v27, v18 offset:512
v_cmp_lt_i32_e32 vcc, 1, v1
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v25, v25, v27
ds_write_b32 v18, v25 offset:512
s_waitcnt lgkmcnt(0)
v_mov_b32_e32 v25, 0
s_and_saveexec_b64 s[10:11], vcc
s_xor_b64 s[10:11], exec, s[10:11]
; mask branch BB4_208
BB4_207:
v_cmp_eq_u32_e32 vcc, 2, v1
v_cndmask_b32_e64 v25, 0, -1, vcc
BB4_208: ; %Flow1132
s_or_saveexec_b64 s[10:11], s[10:11]
s_xor_b64 exec, exec, s[10:11]
; mask branch BB4_210
s_cbranch_execz BB4_210
BB4_209: ; %.thread85.i393
s_mov_b32 m0, -1
ds_read_b32 v25, v18 offset:64
ds_read_b32 v27, v18
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v25, v25, v27
ds_write_b32 v18, v25
ds_read_b32 v23, v23 offset:64
s_waitcnt lgkmcnt(0)
ds_read_b32 v25, v18 offset:256
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v23, v23, v25
ds_write_b32 v18, v23 offset:256
s_waitcnt lgkmcnt(0)
ds_read_b32 v23, v24 offset:64
ds_read_b32 v24, v18 offset:512
v_mov_b32_e32 v25, -1
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v23, v23, v24
ds_write_b32 v18, v23 offset:512
s_waitcnt lgkmcnt(0)
BB4_210: ; %Flow1133
s_or_b64 exec, exec, s[10:11]
v_cmp_ne_u32_e32 vcc, 0, v25
s_and_saveexec_b64 s[10:11], vcc
s_xor_b64 s[10:11], exec, s[10:11]
; mask branch BB4_215
s_cbranch_execz BB4_215
BB4_211:
v_or_b32_e32 v23, 24, v14
v_add_i32_e32 v23, vcc, v23, v2
v_mul_lo_i32 v24, v23, 3
v_mov_b32_e32 v23, 0xe0
v_mad_i32_i24 v23, v23, v1, v18
s_mov_b32 m0, -1
ds_read_b32 v25, v23
ds_read_b32 v23, v23 offset:32
v_add_i32_e32 v24, vcc, v1, v24
v_mov_b32_e32 v28, s13
s_mov_b32 s19, 0xf000
s_mov_b32 s18, 0
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v23, v25, v23
v_ashrrev_i32_e32 v25, 31, v24
v_lshl_b64 v[24:25], v[24:25], 2
v_add_i32_e32 v27, vcc, s12, v24
s_mov_b64 s[16:17], s[12:13]
v_addc_u32_e32 v28, vcc, v25, v28, vcc
buffer_load_dword v25, v[24:25], s[16:19], 0 addr64
s_mov_b64 s[16:17], 0
s_mov_b64 s[14:15], s[16:17]
s_waitcnt vmcnt(0)
BB4_212: ; =>This Inner Loop Header: Depth=1
v_mov_b32_e32 v24, -1
v_add_f32_e32 v24, v23, v25
v_mov_b32_e32 v30, v25
v_mov_b32_e32 v29, v24
buffer_atomic_cmpswap v[29:30], v[27:28], s[16:19], 0 addr64 glc
v_mov_b32_e32 v24, 0xf000
s_waitcnt vmcnt(0) expcnt(0)
v_cmp_eq_u32_e32 vcc, v29, v25
s_or_b64 s[14:15], vcc, s[14:15]
v_mov_b32_e32 v25, v29
s_andn2_b64 exec, exec, s[14:15]
s_cbranch_execnz BB4_212
; BB#213: ; %atomicAdd_g_f.exit.i381
s_or_b64 exec, exec, s[14:15]
s_and_b64 s[14:15], exec, s[2:3]
v_cndmask_b32_e64 v24, 0, 1, s[14:15]
v_cmp_ne_u32_e32 vcc, 1, v24
s_and_b64 vcc, exec, vcc
s_cbranch_vccnz BB4_215
; BB#214:
v_add_f32_e32 v3, v23, v3
BB4_215: ; %Flow1134
s_or_b64 exec, exec, s[10:11]
BB4_216: ; %reduce_force_i_pow2.exit395
s_or_b64 exec, exec, s[6:7]
s_mov_b32 m0, -1
s_barrier
ds_write_b32 v5, v19
ds_write_b32 v6, v20
ds_write_b32 v7, v21
s_waitcnt lgkmcnt(0)
s_barrier
s_and_saveexec_b64 s[6:7], s[0:1]
s_xor_b64 s[6:7], exec, s[6:7]
; mask branch BB4_227
s_cbranch_execz BB4_227
BB4_217:
s_mov_b32 m0, -1
ds_read_b32 v19, v18 offset:128
ds_read_b32 v20, v18
v_add_i32_e32 v21, vcc, v0, v26
v_lshlrev_b32_e32 v21, 2, v21
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v19, v19, v20
ds_write_b32 v18, v19
s_waitcnt lgkmcnt(0)
v_add_i32_e32 v19, vcc, s4, v21
ds_read_b32 v20, v19 offset:128
ds_read_b32 v21, v18 offset:256
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v20, v20, v21
ds_write_b32 v18, v20 offset:256
s_waitcnt lgkmcnt(0)
v_add_i32_e32 v20, vcc, v0, v22
v_lshlrev_b32_e32 v20, 2, v20
v_add_i32_e32 v20, vcc, s4, v20
ds_read_b32 v21, v20 offset:128
ds_read_b32 v23, v18 offset:512
v_cmp_lt_i32_e32 vcc, 1, v1
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v21, v21, v23
ds_write_b32 v18, v21 offset:512
s_waitcnt lgkmcnt(0)
v_mov_b32_e32 v21, 0
s_and_saveexec_b64 s[10:11], vcc
s_xor_b64 s[10:11], exec, s[10:11]
; mask branch BB4_219
BB4_218:
v_cmp_eq_u32_e32 vcc, 2, v1
v_cndmask_b32_e64 v21, 0, -1, vcc
BB4_219: ; %Flow1129
s_or_saveexec_b64 s[10:11], s[10:11]
s_xor_b64 exec, exec, s[10:11]
; mask branch BB4_221
s_cbranch_execz BB4_221
BB4_220: ; %.thread85.i344
s_mov_b32 m0, -1
ds_read_b32 v21, v18 offset:64
ds_read_b32 v23, v18
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v21, v21, v23
ds_write_b32 v18, v21
ds_read_b32 v19, v19 offset:64
s_waitcnt lgkmcnt(0)
ds_read_b32 v21, v18 offset:256
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v19, v19, v21
ds_write_b32 v18, v19 offset:256
s_waitcnt lgkmcnt(0)
ds_read_b32 v19, v20 offset:64
ds_read_b32 v20, v18 offset:512
v_mov_b32_e32 v21, -1
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v19, v19, v20
ds_write_b32 v18, v19 offset:512
s_waitcnt lgkmcnt(0)
BB4_221: ; %Flow1130
s_or_b64 exec, exec, s[10:11]
v_cmp_ne_u32_e32 vcc, 0, v21
s_and_saveexec_b64 s[10:11], vcc
s_xor_b64 s[10:11], exec, s[10:11]
; mask branch BB4_226
s_cbranch_execz BB4_226
BB4_222:
v_or_b32_e32 v19, 32, v14
v_add_i32_e32 v19, vcc, v19, v2
v_mul_lo_i32 v20, v19, 3
v_mov_b32_e32 v19, 0xe0
v_mad_i32_i24 v19, v19, v1, v18
s_mov_b32 m0, -1
ds_read_b32 v21, v19
ds_read_b32 v19, v19 offset:32
v_add_i32_e32 v20, vcc, v1, v20
v_mov_b32_e32 v23, s13
s_mov_b32 s19, 0xf000
s_mov_b32 s18, 0
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v19, v21, v19
v_ashrrev_i32_e32 v21, 31, v20
v_lshl_b64 v[20:21], v[20:21], 2
v_add_i32_e32 v27, vcc, s12, v20
s_mov_b64 s[16:17], s[12:13]
v_addc_u32_e32 v28, vcc, v21, v23, vcc
buffer_load_dword v21, v[20:21], s[16:19], 0 addr64
s_mov_b64 s[16:17], 0
s_mov_b64 s[14:15], s[16:17]
s_waitcnt vmcnt(0)
BB4_223: ; =>This Inner Loop Header: Depth=1
v_mov_b32_e32 v20, -1
v_add_f32_e32 v20, v19, v21
v_mov_b32_e32 v24, v21
v_mov_b32_e32 v23, v20
buffer_atomic_cmpswap v[23:24], v[27:28], s[16:19], 0 addr64 glc
v_mov_b32_e32 v20, 0xf000
s_waitcnt vmcnt(0) expcnt(0)
v_cmp_eq_u32_e32 vcc, v23, v21
s_or_b64 s[14:15], vcc, s[14:15]
v_mov_b32_e32 v21, v23
s_andn2_b64 exec, exec, s[14:15]
s_cbranch_execnz BB4_223
; BB#224: ; %atomicAdd_g_f.exit.i332
s_or_b64 exec, exec, s[14:15]
s_and_b64 s[14:15], exec, s[2:3]
v_cndmask_b32_e64 v20, 0, 1, s[14:15]
v_cmp_ne_u32_e32 vcc, 1, v20
s_and_b64 vcc, exec, vcc
s_cbranch_vccnz BB4_226
; BB#225:
v_add_f32_e32 v3, v19, v3
BB4_226: ; %Flow1131
s_or_b64 exec, exec, s[10:11]
BB4_227: ; %reduce_force_i_pow2.exit346
s_or_b64 exec, exec, s[6:7]
s_mov_b32 m0, -1
s_barrier
ds_write_b32 v5, v15
ds_write_b32 v6, v16
ds_write_b32 v7, v17
s_waitcnt lgkmcnt(0)
s_barrier
s_and_saveexec_b64 s[6:7], s[0:1]
s_xor_b64 s[6:7], exec, s[6:7]
; mask branch BB4_238
s_cbranch_execz BB4_238
BB4_228:
s_mov_b32 m0, -1
ds_read_b32 v15, v18 offset:128
ds_read_b32 v16, v18
v_add_i32_e32 v17, vcc, v0, v26
v_lshlrev_b32_e32 v17, 2, v17
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v15, v15, v16
ds_write_b32 v18, v15
s_waitcnt lgkmcnt(0)
v_add_i32_e32 v15, vcc, s4, v17
ds_read_b32 v16, v15 offset:128
ds_read_b32 v17, v18 offset:256
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v16, v16, v17
ds_write_b32 v18, v16 offset:256
s_waitcnt lgkmcnt(0)
v_add_i32_e32 v16, vcc, v0, v22
v_lshlrev_b32_e32 v16, 2, v16
v_add_i32_e32 v16, vcc, s4, v16
ds_read_b32 v17, v16 offset:128
ds_read_b32 v19, v18 offset:512
v_cmp_lt_i32_e32 vcc, 1, v1
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v17, v17, v19
ds_write_b32 v18, v17 offset:512
s_waitcnt lgkmcnt(0)
v_mov_b32_e32 v17, 0
s_and_saveexec_b64 s[10:11], vcc
s_xor_b64 s[10:11], exec, s[10:11]
; mask branch BB4_230
BB4_229:
v_cmp_eq_u32_e32 vcc, 2, v1
v_cndmask_b32_e64 v17, 0, -1, vcc
BB4_230: ; %Flow1126
s_or_saveexec_b64 s[10:11], s[10:11]
s_xor_b64 exec, exec, s[10:11]
; mask branch BB4_232
s_cbranch_execz BB4_232
BB4_231: ; %.thread85.i295
s_mov_b32 m0, -1
ds_read_b32 v17, v18 offset:64
ds_read_b32 v19, v18
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v17, v17, v19
ds_write_b32 v18, v17
ds_read_b32 v15, v15 offset:64
s_waitcnt lgkmcnt(0)
ds_read_b32 v17, v18 offset:256
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v15, v15, v17
ds_write_b32 v18, v15 offset:256
s_waitcnt lgkmcnt(0)
ds_read_b32 v15, v16 offset:64
ds_read_b32 v16, v18 offset:512
v_mov_b32_e32 v17, -1
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v15, v15, v16
ds_write_b32 v18, v15 offset:512
s_waitcnt lgkmcnt(0)
BB4_232: ; %Flow1127
s_or_b64 exec, exec, s[10:11]
v_cmp_ne_u32_e32 vcc, 0, v17
s_and_saveexec_b64 s[10:11], vcc
s_xor_b64 s[10:11], exec, s[10:11]
; mask branch BB4_237
s_cbranch_execz BB4_237
BB4_233:
v_or_b32_e32 v15, 40, v14
v_add_i32_e32 v15, vcc, v15, v2
v_mul_lo_i32 v16, v15, 3
v_mov_b32_e32 v15, 0xe0
v_mad_i32_i24 v15, v15, v1, v18
s_mov_b32 m0, -1
ds_read_b32 v17, v15
ds_read_b32 v15, v15 offset:32
v_add_i32_e32 v16, vcc, v1, v16
v_mov_b32_e32 v19, s13
s_mov_b32 s19, 0xf000
s_mov_b32 s18, 0
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v15, v17, v15
v_ashrrev_i32_e32 v17, 31, v16
v_lshl_b64 v[16:17], v[16:17], 2
v_add_i32_e32 v27, vcc, s12, v16
s_mov_b64 s[16:17], s[12:13]
v_addc_u32_e32 v28, vcc, v17, v19, vcc
buffer_load_dword v17, v[16:17], s[16:19], 0 addr64
s_mov_b64 s[16:17], 0
s_mov_b64 s[14:15], s[16:17]
s_waitcnt vmcnt(0)
BB4_234: ; =>This Inner Loop Header: Depth=1
v_mov_b32_e32 v16, -1
v_add_f32_e32 v16, v15, v17
v_mov_b32_e32 v20, v17
v_mov_b32_e32 v19, v16
buffer_atomic_cmpswap v[19:20], v[27:28], s[16:19], 0 addr64 glc
v_mov_b32_e32 v16, 0xf000
s_waitcnt vmcnt(0) expcnt(0)
v_cmp_eq_u32_e32 vcc, v19, v17
s_or_b64 s[14:15], vcc, s[14:15]
v_mov_b32_e32 v17, v19
s_andn2_b64 exec, exec, s[14:15]
s_cbranch_execnz BB4_234
; BB#235: ; %atomicAdd_g_f.exit.i283
s_or_b64 exec, exec, s[14:15]
s_and_b64 s[14:15], exec, s[2:3]
v_cndmask_b32_e64 v16, 0, 1, s[14:15]
v_cmp_ne_u32_e32 vcc, 1, v16
s_and_b64 vcc, exec, vcc
s_cbranch_vccnz BB4_237
; BB#236:
v_add_f32_e32 v3, v15, v3
BB4_237: ; %Flow1128
s_or_b64 exec, exec, s[10:11]
BB4_238: ; %reduce_force_i_pow2.exit297
s_or_b64 exec, exec, s[6:7]
s_mov_b32 m0, -1
s_barrier
ds_write_b32 v5, v11
ds_write_b32 v6, v12
ds_write_b32 v7, v13
s_waitcnt lgkmcnt(0)
s_barrier
s_and_saveexec_b64 s[6:7], s[0:1]
s_xor_b64 s[6:7], exec, s[6:7]
; mask branch BB4_249
s_cbranch_execz BB4_249
BB4_239:
s_mov_b32 m0, -1
ds_read_b32 v11, v18 offset:128
ds_read_b32 v12, v18
v_add_i32_e32 v13, vcc, v0, v26
v_lshlrev_b32_e32 v13, 2, v13
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v11, v11, v12
ds_write_b32 v18, v11
s_waitcnt lgkmcnt(0)
v_add_i32_e32 v11, vcc, s4, v13
ds_read_b32 v12, v11 offset:128
ds_read_b32 v13, v18 offset:256
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v12, v12, v13
ds_write_b32 v18, v12 offset:256
s_waitcnt lgkmcnt(0)
v_add_i32_e32 v12, vcc, v0, v22
v_lshlrev_b32_e32 v12, 2, v12
v_add_i32_e32 v12, vcc, s4, v12
ds_read_b32 v13, v12 offset:128
ds_read_b32 v15, v18 offset:512
v_cmp_lt_i32_e32 vcc, 1, v1
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v13, v13, v15
ds_write_b32 v18, v13 offset:512
s_waitcnt lgkmcnt(0)
v_mov_b32_e32 v13, 0
s_and_saveexec_b64 s[10:11], vcc
s_xor_b64 s[10:11], exec, s[10:11]
; mask branch BB4_241
BB4_240:
v_cmp_eq_u32_e32 vcc, 2, v1
v_cndmask_b32_e64 v13, 0, -1, vcc
BB4_241: ; %Flow1123
s_or_saveexec_b64 s[10:11], s[10:11]
s_xor_b64 exec, exec, s[10:11]
; mask branch BB4_243
s_cbranch_execz BB4_243
BB4_242: ; %.thread85.i246
s_mov_b32 m0, -1
ds_read_b32 v13, v18 offset:64
ds_read_b32 v15, v18
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v13, v13, v15
ds_write_b32 v18, v13
ds_read_b32 v11, v11 offset:64
s_waitcnt lgkmcnt(0)
ds_read_b32 v13, v18 offset:256
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v11, v11, v13
ds_write_b32 v18, v11 offset:256
s_waitcnt lgkmcnt(0)
ds_read_b32 v11, v12 offset:64
ds_read_b32 v12, v18 offset:512
v_mov_b32_e32 v13, -1
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v11, v11, v12
ds_write_b32 v18, v11 offset:512
s_waitcnt lgkmcnt(0)
BB4_243: ; %Flow1124
s_or_b64 exec, exec, s[10:11]
v_cmp_ne_u32_e32 vcc, 0, v13
s_and_saveexec_b64 s[10:11], vcc
s_xor_b64 s[10:11], exec, s[10:11]
; mask branch BB4_248
s_cbranch_execz BB4_248
BB4_244:
v_or_b32_e32 v11, 48, v14
v_add_i32_e32 v11, vcc, v11, v2
v_mul_lo_i32 v12, v11, 3
v_mov_b32_e32 v11, 0xe0
v_mad_i32_i24 v11, v11, v1, v18
s_mov_b32 m0, -1
ds_read_b32 v13, v11
ds_read_b32 v11, v11 offset:32
v_add_i32_e32 v12, vcc, v1, v12
v_mov_b32_e32 v15, s13
s_mov_b32 s19, 0xf000
s_mov_b32 s18, 0
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v11, v13, v11
v_ashrrev_i32_e32 v13, 31, v12
v_lshl_b64 v[12:13], v[12:13], 2
v_add_i32_e32 v27, vcc, s12, v12
s_mov_b64 s[16:17], s[12:13]
v_addc_u32_e32 v28, vcc, v13, v15, vcc
buffer_load_dword v13, v[12:13], s[16:19], 0 addr64
s_mov_b64 s[16:17], 0
s_mov_b64 s[14:15], s[16:17]
s_waitcnt vmcnt(0)
BB4_245: ; =>This Inner Loop Header: Depth=1
v_mov_b32_e32 v12, -1
v_add_f32_e32 v12, v11, v13
v_mov_b32_e32 v16, v13
v_mov_b32_e32 v15, v12
buffer_atomic_cmpswap v[15:16], v[27:28], s[16:19], 0 addr64 glc
v_mov_b32_e32 v12, 0xf000
s_waitcnt vmcnt(0) expcnt(0)
v_cmp_eq_u32_e32 vcc, v15, v13
s_or_b64 s[14:15], vcc, s[14:15]
v_mov_b32_e32 v13, v15
s_andn2_b64 exec, exec, s[14:15]
s_cbranch_execnz BB4_245
; BB#246: ; %atomicAdd_g_f.exit.i234
s_or_b64 exec, exec, s[14:15]
s_and_b64 s[14:15], exec, s[2:3]
v_cndmask_b32_e64 v12, 0, 1, s[14:15]
v_cmp_ne_u32_e32 vcc, 1, v12
s_and_b64 vcc, exec, vcc
s_cbranch_vccnz BB4_248
; BB#247:
v_add_f32_e32 v3, v11, v3
BB4_248: ; %Flow1125
s_or_b64 exec, exec, s[10:11]
BB4_249: ; %reduce_force_i_pow2.exit248
s_or_b64 exec, exec, s[6:7]
s_mov_b32 m0, -1
s_barrier
ds_write_b32 v5, v8
ds_write_b32 v6, v9
ds_write_b32 v7, v10
s_waitcnt lgkmcnt(0)
s_barrier
s_and_saveexec_b64 s[6:7], s[0:1]
s_xor_b64 s[0:1], exec, s[6:7]
; mask branch BB4_260
s_cbranch_execz BB4_260
BB4_250:
s_mov_b32 m0, -1
ds_read_b32 v5, v18 offset:128
ds_read_b32 v6, v18
v_add_i32_e32 v7, vcc, v0, v26
v_lshlrev_b32_e32 v7, 2, v7
v_add_i32_e32 v0, vcc, v0, v22
v_lshlrev_b32_e32 v0, 2, v0
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v5, v5, v6
ds_write_b32 v18, v5
s_waitcnt lgkmcnt(0)
v_add_i32_e32 v5, vcc, s4, v7
ds_read_b32 v6, v5 offset:128
ds_read_b32 v7, v18 offset:256
v_add_i32_e32 v0, vcc, s4, v0
v_cmp_lt_i32_e32 vcc, 1, v1
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v6, v6, v7
ds_write_b32 v18, v6 offset:256
s_waitcnt lgkmcnt(0)
ds_read_b32 v6, v0 offset:128
ds_read_b32 v7, v18 offset:512
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v6, v6, v7
ds_write_b32 v18, v6 offset:512
s_waitcnt lgkmcnt(0)
v_mov_b32_e32 v6, 0
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[4:5], exec, s[4:5]
; mask branch BB4_252
BB4_251:
v_cmp_eq_u32_e32 vcc, 2, v1
v_cndmask_b32_e64 v6, 0, -1, vcc
BB4_252: ; %Flow1120
s_or_saveexec_b64 s[4:5], s[4:5]
s_xor_b64 exec, exec, s[4:5]
; mask branch BB4_254
s_cbranch_execz BB4_254
BB4_253: ; %.thread85.i197
s_mov_b32 m0, -1
ds_read_b32 v6, v18 offset:64
ds_read_b32 v7, v18
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v6, v6, v7
ds_write_b32 v18, v6
ds_read_b32 v5, v5 offset:64
s_waitcnt lgkmcnt(0)
ds_read_b32 v6, v18 offset:256
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v5, v5, v6
ds_write_b32 v18, v5 offset:256
ds_read_b32 v0, v0 offset:64
s_waitcnt lgkmcnt(0)
ds_read_b32 v5, v18 offset:512
v_mov_b32_e32 v6, -1
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v0, v0, v5
ds_write_b32 v18, v0 offset:512
s_waitcnt lgkmcnt(0)
BB4_254: ; %Flow1121
s_or_b64 exec, exec, s[4:5]
v_cmp_ne_u32_e32 vcc, 0, v6
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[4:5], exec, s[4:5]
; mask branch BB4_259
s_cbranch_execz BB4_259
BB4_255:
v_or_b32_e32 v0, 56, v14
v_add_i32_e32 v0, vcc, v0, v2
v_mul_lo_i32 v2, v0, 3
v_mov_b32_e32 v0, 0xe0
v_mad_i32_i24 v0, v0, v1, v18
s_mov_b32 m0, -1
ds_read_b32 v5, v0
ds_read_b32 v0, v0 offset:32
s_mov_b32 s15, 0xf000
s_mov_b32 s14, 0
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v0, v5, v0
v_add_i32_e32 v5, vcc, v1, v2
v_ashrrev_i32_e32 v6, 31, v5
v_lshl_b64 v[7:8], v[5:6], 2
v_add_i32_e32 v5, vcc, s12, v7
v_mov_b32_e32 v2, s13
v_addc_u32_e32 v6, vcc, v8, v2, vcc
buffer_load_dword v8, v[7:8], s[12:15], 0 addr64
s_mov_b64 s[12:13], 0
s_mov_b64 s[6:7], s[12:13]
s_waitcnt vmcnt(0)
BB4_256: ; =>This Inner Loop Header: Depth=1
v_add_f32_e32 v7, v0, v8
v_mov_b32_e32 v10, v8
v_mov_b32_e32 v9, v7
buffer_atomic_cmpswap v[9:10], v[5:6], s[12:15], 0 addr64 glc
v_mov_b32_e32 v2, -1
v_mov_b32_e32 v2, 0xf000
s_waitcnt vmcnt(0) expcnt(0)
v_cmp_eq_u32_e32 vcc, v9, v8
s_or_b64 s[6:7], vcc, s[6:7]
v_mov_b32_e32 v8, v9
s_andn2_b64 exec, exec, s[6:7]
s_cbranch_execnz BB4_256
; BB#257: ; %atomicAdd_g_f.exit.i185
s_or_b64 exec, exec, s[6:7]
s_and_b64 s[6:7], exec, s[2:3]
v_cndmask_b32_e64 v2, 0, 1, s[6:7]
v_cmp_ne_u32_e32 vcc, 1, v2
s_and_b64 vcc, exec, vcc
s_cbranch_vccnz BB4_259
; BB#258:
v_add_f32_e32 v3, v0, v3
BB4_259: ; %Flow1122
s_or_b64 exec, exec, s[4:5]
BB4_260: ; %reduce_force_i_pow2.exit199
s_or_b64 exec, exec, s[0:1]
s_barrier
v_cmp_gt_u32_e32 vcc, 3, v1
s_and_b64 s[0:1], exec, s[2:3]
s_and_b64 s[0:1], vcc, s[0:1]
s_and_saveexec_b64 s[2:3], s[0:1]
s_xor_b64 s[0:1], exec, s[2:3]
; mask branch BB4_264
s_cbranch_execz BB4_264
BB4_261:
v_add_i32_e32 v0, vcc, v4, v1
v_mov_b32_e32 v1, 0
v_lshl_b64 v[0:1], v[0:1], 2
v_add_i32_e32 v4, vcc, s8, v0
v_mov_b32_e32 v2, s9
s_mov_b32 s11, 0xf000
s_mov_b32 s10, 0
v_addc_u32_e32 v5, vcc, v1, v2, vcc
buffer_load_dword v1, v[0:1], s[8:11], 0 addr64
s_mov_b64 s[8:9], 0
s_mov_b64 s[2:3], s[8:9]
s_waitcnt vmcnt(0)
BB4_262: ; =>This Inner Loop Header: Depth=1
v_mov_b32_e32 v0, -1
v_add_f32_e32 v0, v3, v1
v_mov_b32_e32 v7, v1
v_mov_b32_e32 v6, v0
buffer_atomic_cmpswap v[6:7], v[4:5], s[8:11], 0 addr64 glc
v_mov_b32_e32 v0, 0xf000
s_waitcnt vmcnt(0) expcnt(0)
v_cmp_eq_u32_e32 vcc, v6, v1
s_or_b64 s[2:3], vcc, s[2:3]
v_mov_b32_e32 v1, v6
s_andn2_b64 exec, exec, s[2:3]
s_cbranch_execnz BB4_262
; BB#263: ; %Flow
s_or_b64 exec, exec, s[2:3]
BB4_264: ; %Flow1119
s_or_b64 exec, exec, s[0:1]
s_endpgm
.Lfunc_end4:
.size nbnxn_kernel_ElecEw_VdwLJCombGeom_F_opencl, .Lfunc_end4-nbnxn_kernel_ElecEw_VdwLJCombGeom_F_opencl
.section .AMDGPU.csdata
; Kernel info:
; codeLenInByte = 20740
; NumSgprs: 42
; NumVgprs: 83
; FloatMode: 192
; IeeeMode: 1
; ScratchSize: 0
; LDSByteSize: 0 bytes/workgroup (compile time only)
; SGPRBlocks: 5
; VGPRBlocks: 20
; NumSGPRsForWavesPerEU: 42
; NumVGPRsForWavesPerEU: 83
; ReservedVGPRFirst: 0
; ReservedVGPRCount: 0
; COMPUTE_PGM_RSRC2:USER_SGPR: 8
; COMPUTE_PGM_RSRC2:TGID_X_EN: 1
; COMPUTE_PGM_RSRC2:TGID_Y_EN: 0
; COMPUTE_PGM_RSRC2:TGID_Z_EN: 0
; COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 1
.section .AMDGPU.config
.long 47176
.long 11272532
.long 47180
.long 2192
.long 47200
.long 0
.long 4
.long 0
.long 8
.long 0
.text
.globl nbnxn_kernel_ElecEwTwinCut_VdwLJCombGeom_F_opencl
.p2align 8
.type nbnxn_kernel_ElecEwTwinCut_VdwLJCombGeom_F_opencl,@function
.amdgpu_hsa_kernel nbnxn_kernel_ElecEwTwinCut_VdwLJCombGeom_F_opencl
nbnxn_kernel_ElecEwTwinCut_VdwLJCombGeom_F_opencl: ; @nbnxn_kernel_ElecEwTwinCut_VdwLJCombGeom_F_opencl
.amd_kernel_code_t
amd_code_version_major = 1
amd_code_version_minor = 0
amd_machine_kind = 1
amd_machine_version_major = 7
amd_machine_version_minor = 0
amd_machine_version_stepping = 1
kernel_code_entry_byte_offset = 256
kernel_code_prefetch_byte_size = 0
max_scratch_backing_memory_byte_size = 0
granulated_workitem_vgpr_count = 20
granulated_wavefront_sgpr_count = 5
priority = 0
float_mode = 192
priv = 0
enable_dx10_clamp = 1
debug_mode = 0
enable_ieee_mode = 1
enable_sgpr_private_segment_wave_byte_offset = 0
user_sgpr_count = 8
enable_sgpr_workgroup_id_x = 1
enable_sgpr_workgroup_id_y = 0
enable_sgpr_workgroup_id_z = 0
enable_sgpr_workgroup_info = 0
enable_vgpr_workitem_id = 1
enable_exception_msb = 0
granulated_lds_size = 0
enable_exception = 0
enable_sgpr_private_segment_buffer = 1
enable_sgpr_dispatch_ptr = 1
enable_sgpr_queue_ptr = 0
enable_sgpr_kernarg_segment_ptr = 1
enable_sgpr_dispatch_id = 0
enable_sgpr_flat_scratch_init = 0
enable_sgpr_private_segment_size = 0
enable_sgpr_grid_workgroup_count_x = 0
enable_sgpr_grid_workgroup_count_y = 0
enable_sgpr_grid_workgroup_count_z = 0
enable_ordered_append_gds = 0
private_element_size = 1
is_ptr64 = 1
is_dynamic_callstack = 0
is_debug_enabled = 0
is_xnack_enabled = 0
workitem_private_segment_byte_size = 0
workgroup_group_segment_byte_size = 0
gds_segment_byte_size = 0
kernarg_segment_byte_size = 232
workgroup_fbarrier_count = 0
wavefront_sgpr_count = 42
workitem_vgpr_count = 84
reserved_vgpr_first = 0
reserved_vgpr_count = 0
reserved_sgpr_first = 0
reserved_sgpr_count = 0
debug_wavefront_private_segment_offset_sgpr = 0
debug_private_segment_buffer_sgpr = 0
kernarg_segment_alignment = 4
group_segment_alignment = 4
private_segment_alignment = 4
wavefront_size = 6
call_convention = -1
runtime_loader_kernel_symbol = 0
.end_amd_kernel_code_t
; BB#0:
s_load_dwordx2 s[0:1], s[6:7], 0x2c
s_mov_b32 s9, 0
s_lshl_b64 s[10:11], s[8:9], 4
v_mov_b32_e32 v3, s10
s_mov_b32 s2, s9
s_mov_b32 s3, 0xf000
v_mov_b32_e32 v4, s11
s_waitcnt lgkmcnt(0)
buffer_load_dwordx4 v[31:34], v[3:4], s[0:3], 0 addr64
v_mov_b32_e32 v2, v0
s_load_dwordx2 s[8:9], s[6:7], 0x24
s_load_dwordx2 s[16:17], s[6:7], 0x18
s_mov_b64 s[18:19], s[2:3]
s_mov_b64 s[10:11], s[2:3]
s_load_dword s14, s[6:7], 0x33
s_load_dword s0, s[6:7], 0x2
s_load_dwordx2 s[20:21], s[6:7], 0x22
s_mov_b32 m0, -1
s_mov_b64 s[22:23], s[2:3]
s_load_dword s1, s[4:5], 0x1
s_waitcnt lgkmcnt(0)
s_add_i32 s15, s14, 0x420
s_waitcnt vmcnt(0)
v_lshlrev_b32_e32 v40, 3, v31
v_mul_lo_i32 v4, v32, 3
v_add_i32_e32 v0, vcc, v1, v40
v_lshlrev_b32_e32 v0, 3, v0
v_add_i32_e32 v9, vcc, v2, v0
v_ashrrev_i32_e32 v10, 31, v9
v_ashrrev_i32_e32 v5, 31, v4
v_lshl_b64 v[11:12], v[4:5], 2
v_lshl_b64 v[6:7], v[9:10], 4
buffer_load_dwordx4 v[5:8], v[6:7], s[16:19], 0 addr64
buffer_load_dwordx2 v[13:14], v[11:12], s[8:11], 0 addr64
buffer_load_dword v0, v[11:12], s[8:11], 0 addr64 offset:8
s_waitcnt vmcnt(1)
v_add_f32_e32 v11, v5, v13
s_waitcnt vmcnt(0)
v_add_f32_e32 v5, v7, v0
v_lshlrev_b32_e32 v0, 3, v1
v_add_i32_e32 v39, vcc, v2, v0
v_lshlrev_b32_e32 v3, 4, v39
v_add_f32_e32 v12, v6, v14
v_mul_f32_e32 v6, s0, v8
v_add_i32_e32 v3, vcc, s14, v3
ds_write2_b64 v3, v[11:12], v[5:6] offset1:1
s_waitcnt lgkmcnt(0)
v_lshl_b64 v[5:6], v[9:10], 3
buffer_load_dwordx2 v[5:6], v[5:6], s[20:23], 0 addr64
s_and_b32 s0, s1, 0xffff
v_mad_u32_u24 v52, s0, v1, v2
v_lshlrev_b32_e32 v7, 3, v39
v_add_i32_e32 v7, vcc, s15, v7
v_or_b32_e32 v3, 32, v52
v_lshrrev_b32_e32 v41, 5, v52
v_cmp_eq_u32_e32 vcc, 32, v3
s_waitcnt vmcnt(0)
ds_write_b64 v7, v[5:6]
s_and_saveexec_b64 s[0:1], vcc
s_xor_b64 s[0:1], exec, s[0:1]
s_waitcnt lgkmcnt(0)
; mask branch BB5_2
BB5_1:
v_lshlrev_b32_e32 v3, 2, v41
v_add_i32_e32 v3, vcc, s14, v3
v_mov_b32_e32 v5, 0
s_mov_b32 m0, -1
ds_write_b32 v3, v5 offset:2336
s_waitcnt lgkmcnt(0)
BB5_2: ; %.preheader456605
s_or_b64 exec, exec, s[0:1]
s_barrier
s_load_dwordx2 s[12:13], s[6:7], 0x1a
v_cmp_lt_i32_e32 vcc, v33, v34
s_and_b64 vcc, exec, vcc
v_mov_b32_e32 v12, -1
s_waitcnt lgkmcnt(0)
s_mov_b64 vcc, vcc
s_cbranch_vccnz BB5_4
; BB#3: ; %.preheader456605.._crit_edge_crit_edge
v_mov_b32_e32 v43, 0
v_lshlrev_b32_e32 v3, 2, v52
v_mov_b32_e32 v44, v43
v_mov_b32_e32 v45, v43
v_add_i32_e32 v3, vcc, s14, v3
v_mov_b32_e32 v8, v43
v_add_i32_e32 v5, vcc, 0x620, v3
v_add_i32_e32 v6, vcc, 0x720, v3
v_add_i32_e32 v7, vcc, 0x820, v3
v_mov_b32_e32 v12, 0
v_mov_b32_e32 v9, v44
v_mov_b32_e32 v10, v45
v_mov_b32_e32 v11, v46
s_branch BB5_5
BB5_4:
; implicit-def: %VGPR43_VGPR44_VGPR45_VGPR46
; implicit-def: %VGPR5
; implicit-def: %VGPR6
; implicit-def: %VGPR7
; implicit-def: %VGPR8_VGPR9_VGPR10_VGPR11
BB5_5: ; %Flow1190
s_load_dwordx2 s[8:9], s[6:7], 0x20
v_cmp_ne_u32_e32 vcc, 0, v12
v_cndmask_b32_e64 v11, 0, 1, vcc
v_cmp_ne_u32_e32 vcc, 1, v11
v_mov_b32_e32 v35, v43
v_mov_b32_e32 v27, v43
v_mov_b32_e32 v23, v43
v_mov_b32_e32 v19, v43
v_mov_b32_e32 v15, v43
v_mov_b32_e32 v11, v43
s_movk_i32 s18, 0x620
v_mov_b32_e32 v3, 0
s_add_i32 s4, s14, s18
s_and_b64 vcc, exec, vcc
v_mov_b32_e32 v36, v44
v_mov_b32_e32 v37, v45
v_mov_b32_e32 v38, v46
v_mov_b32_e32 v28, v44
v_mov_b32_e32 v29, v45
v_mov_b32_e32 v30, v46
v_mov_b32_e32 v24, v44
v_mov_b32_e32 v25, v45
v_mov_b32_e32 v26, v46
v_mov_b32_e32 v20, v44
v_mov_b32_e32 v21, v45
v_mov_b32_e32 v22, v46
v_mov_b32_e32 v16, v44
v_mov_b32_e32 v17, v45
v_mov_b32_e32 v18, v46
v_mov_b32_e32 v12, v44
v_mov_b32_e32 v13, v45
v_mov_b32_e32 v14, v46
s_waitcnt lgkmcnt(0)
s_mov_b64 vcc, vcc
s_cbranch_vccnz BB5_172
; BB#6: ; %.lr.ph
v_or_b32_e32 v5, 4, v1
v_cmp_eq_u32_e32 vcc, 4, v5
v_cmp_gt_u32_e64 s[0:1], 4, v2
s_and_b64 s[32:33], s[0:1], vcc
v_add_i32_e32 v5, vcc, v1, v2
v_and_b32_e32 v8, 4, v1
s_add_i32 s22, s14, 0x400
v_lshlrev_b32_e32 v5, 2, v5
v_lshlrev_b32_e32 v8, 2, v8
v_add_i32_e32 v50, vcc, s22, v5
v_lshlrev_b32_e32 v5, 2, v52
v_and_b32_e32 v48, 31, v52
v_add_i32_e32 v52, vcc, s22, v8
v_lshlrev_b32_e32 v8, 4, v2
s_load_dword s19, s[6:7], 0x5
v_add_i32_e32 v53, vcc, s14, v8
v_lshlrev_b32_e32 v8, 3, v2
v_add_i32_e32 v54, vcc, s15, v8
v_mov_b32_e32 v8, 0
s_load_dwordx2 s[10:11], s[6:7], 0x30
s_load_dword s5, s[6:7], 0x9
s_load_dword s24, s[6:7], 0xa
s_load_dwordx2 s[26:27], s[6:7], 0x2e
v_mov_b32_e32 v9, v8
v_mov_b32_e32 v10, v8
v_mov_b32_e32 v14, v11
v_mov_b32_e32 v13, v10
v_mov_b32_e32 v12, v9
v_mov_b32_e32 v11, v8
v_add_i32_e32 v7, vcc, s14, v5
v_mov_b32_e32 v18, v11
v_mov_b32_e32 v22, v11
v_mov_b32_e32 v26, v11
v_mov_b32_e32 v30, v11
v_mov_b32_e32 v38, v11
v_mov_b32_e32 v46, v11
s_waitcnt lgkmcnt(0)
v_mul_f32_e64 v47, s19, s19
v_mov_b32_e32 v42, 0
s_mov_b32 s30, 0
v_mov_b32_e32 v49, v42
v_cmp_gt_u32_e64 s[0:1], v1, v2
v_cmp_ne_u32_e64 s[2:3], 22, v32
v_mul_f32_e32 v51, s19, v47
v_add_i32_e32 v5, vcc, s18, v7
v_add_i32_e32 v6, vcc, 0x720, v7
v_add_i32_e32 v7, vcc, 0x820, v7
s_mov_b32 s31, 0xf000
s_mov_b64 s[28:29], 0
v_ashrrev_i32_e32 v56, 31, v33
v_mov_b32_e32 v55, v33
v_or_b32_e32 v33, 7, v40
v_or_b32_e32 v57, 6, v40
v_or_b32_e32 v58, 5, v40
v_or_b32_e32 v59, 4, v40
v_or_b32_e32 v60, 3, v40
v_or_b32_e32 v61, 2, v40
v_or_b32_e32 v62, 1, v40
v_mov_b32_e32 v17, v10
v_mov_b32_e32 v16, v9
v_mov_b32_e32 v15, v8
v_mov_b32_e32 v21, v10
v_mov_b32_e32 v20, v9
v_mov_b32_e32 v19, v8
v_mov_b32_e32 v25, v10
v_mov_b32_e32 v24, v9
v_mov_b32_e32 v23, v8
v_mov_b32_e32 v29, v10
v_mov_b32_e32 v28, v9
v_mov_b32_e32 v27, v8
v_mov_b32_e32 v37, v10
v_mov_b32_e32 v36, v9
v_mov_b32_e32 v35, v8
v_mov_b32_e32 v45, v10
v_mov_b32_e32 v44, v9
v_mov_b32_e32 v43, v8
BB5_7: ; =>This Loop Header: Depth=1
; Child Loop BB5_47 Depth 2
; Child Loop BB5_87 Depth 2
; Child Loop BB5_127 Depth 2
; Child Loop BB5_167 Depth 2
v_lshl_b64 v[63:64], v[55:56], 5
v_add_i32_e32 v65, vcc, s26, v63
v_mov_b32_e32 v14, s27
v_addc_u32_e32 v64, vcc, v64, v14, vcc
v_lshl_b64 v[66:67], v[41:42], 3
v_add_i32_e32 v65, vcc, v65, v66
v_addc_u32_e32 v66, vcc, v64, v67, vcc
buffer_load_dwordx2 v[63:64], v[65:66], s[28:31], 0 addr64 offset:16
s_waitcnt vmcnt(0)
v_cmp_ne_u32_e32 vcc, 0, v63
s_and_saveexec_b64 s[14:15], vcc
s_xor_b64 s[34:35], exec, s[14:15]
; mask branch BB5_171
s_cbranch_execz BB5_171
BB5_8: ; in Loop: Header=BB5_7 Depth=1
v_ashrrev_i32_e32 v66, 31, v64
v_mov_b32_e32 v65, v64
v_lshl_b64 v[64:65], v[65:66], 7
v_add_i32_e32 v66, vcc, s10, v64
v_mov_b32_e32 v14, s11
v_addc_u32_e32 v65, vcc, v65, v14, vcc
v_lshl_b64 v[67:68], v[48:49], 2
v_add_i32_e32 v66, vcc, v66, v67
v_addc_u32_e32 v67, vcc, v65, v68, vcc
buffer_load_dword v14, v[66:67], s[28:31], 0 addr64
s_and_saveexec_b64 s[14:15], s[32:33]
s_xor_b64 s[14:15], exec, s[14:15]
s_waitcnt vmcnt(0)
; mask branch BB5_10
s_cbranch_execz BB5_10
BB5_9: ; in Loop: Header=BB5_7 Depth=1
v_lshl_b64 v[64:65], v[55:56], 5
v_add_i32_e32 v66, vcc, s26, v64
v_mov_b32_e32 v18, s27
v_addc_u32_e32 v65, vcc, v65, v18, vcc
v_lshl_b64 v[67:68], v[2:3], 2
v_add_i32_e32 v66, vcc, v66, v67
v_addc_u32_e32 v67, vcc, v65, v68, vcc
buffer_load_dword v18, v[66:67], s[28:31], 0 addr64
s_mov_b32 m0, -1
s_waitcnt vmcnt(0)
ds_write_b32 v50, v18
s_waitcnt lgkmcnt(0)
BB5_10: ; %.preheader.preheader
; in Loop: Header=BB5_7 Depth=1
s_or_b64 exec, exec, s[14:15]
v_and_b32_e32 v18, 0xff, v63
v_cmp_ne_u32_e32 vcc, 0, v18
s_and_saveexec_b64 s[14:15], vcc
s_xor_b64 s[36:37], exec, s[14:15]
; mask branch BB5_50
s_cbranch_execz BB5_50
BB5_11: ; in Loop: Header=BB5_7 Depth=1
s_mov_b32 m0, -1
ds_read_b32 v18, v52
s_mov_b64 s[18:19], s[30:31]
s_mov_b64 s[22:23], s[30:31]
s_waitcnt lgkmcnt(0)
v_lshlrev_b32_e32 v22, 3, v18
v_add_i32_e32 v64, vcc, v22, v1
v_ashrrev_i32_e32 v65, 31, v64
v_lshl_b64 v[66:67], v[64:65], 4
v_lshl_b64 v[68:69], v[64:65], 3
buffer_load_dwordx4 v[73:76], v[66:67], s[16:19], 0 addr64
buffer_load_dwordx2 v[65:66], v[68:69], s[20:23], 0 addr64
v_mov_b32_e32 v67, 0
v_and_b32_e32 v22, 1, v63
v_cmp_eq_u32_e32 vcc, 1, v22
v_mov_b32_e32 v69, v67
v_mov_b32_e32 v72, v67
s_and_saveexec_b64 s[14:15], vcc
s_xor_b64 s[14:15], exec, s[14:15]
s_waitcnt vmcnt(0)
; mask branch BB5_15
s_cbranch_execz BB5_15
BB5_12: ; in Loop: Header=BB5_7 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[77:80], v53 offset1:1
v_cmp_ne_u32_e32 vcc, v40, v18
s_and_b64 s[18:19], exec, s[2:3]
s_or_b64 s[18:19], s[18:19], vcc
s_and_b64 s[22:23], exec, s[0:1]
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v22, v74, v78
s_or_b64 s[18:19], s[22:23], s[18:19]
v_subrev_f32_e32 v26, v73, v77
v_mul_f32_e32 v38, v22, v22
v_cndmask_b32_e64 v46, 0, 1.0, s[18:19]
v_subrev_f32_e32 v30, v75, v79
v_mac_f32_e32 v38, v26, v26
v_mov_b32_e32 v67, 0
v_mac_f32_e32 v38, v30, v30
v_mul_f32_e32 v46, s5, v46
v_cmp_lt_f32_e32 vcc, v38, v46
v_mov_b32_e32 v69, v67
v_mov_b32_e32 v72, v67
s_and_saveexec_b64 s[18:19], vcc
s_xor_b64 s[18:19], exec, s[18:19]
; mask branch BB5_14
s_cbranch_execz BB5_14
BB5_13: ; in Loop: Header=BB5_7 Depth=1
v_max_f32_e32 v38, 0x34cd15ae, v38
v_rsq_f32_e32 v70, v38
v_and_b32_e32 v67, 1, v14
v_cmp_eq_u32_e32 vcc, 1, v67
v_cndmask_b32_e64 v69, 0, 1.0, vcc
v_mul_f32_e32 v78, v70, v70
v_mul_f32_e32 v67, v78, v78
v_mul_f32_e32 v67, v69, v67
s_mov_b32 m0, -1
v_mul_f32_e32 v79, v78, v67
ds_read_b64 v[67:68], v54
v_mul_f32_e32 v71, v47, v38
v_mul_f32_e32 v72, v71, v71
v_mov_b32_e32 v77, 0x3a92b707
v_mul_f32_e32 v46, v76, v80
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v68, v66, v68
v_mul_f32_e32 v68, v79, v68
v_mad_f32 v67, v65, v67, -v68
v_madak_f32_e32 v68, v77, v72, 0x3ded3cb2
v_mov_b32_e32 v77, 0x3c739487
v_madak_f32_e32 v77, v77, v72, 0x3f01e2bc
v_mad_f32 v68, v68, v72, 1.0
v_mac_f32_e32 v68, v71, v77
v_mov_b32_e32 v77, 0xb2951928
v_rcp_f32_e32 v68, v68
v_madak_f32_e32 v77, v77, v72, 0xb85ffb93
v_mov_b32_e32 v80, 0x35c55945
v_madak_f32_e32 v80, v80, v72, 0x3a83ca0c
v_cmp_gt_f32_e32 vcc, s24, v38
v_madak_f32_e32 v77, v77, v72, 0xbc9ded90
v_cndmask_b32_e64 v38, 0, 1.0, vcc
v_madak_f32_e32 v80, v80, v72, 0x3d8eaf3b
v_madak_f32_e32 v72, v77, v72, 0xbf409397
v_mul_f32_e32 v38, v38, v78
v_mac_f32_e32 v72, v71, v80
v_mul_f32_e32 v68, v51, v68
v_mul_f32_e32 v68, v72, v68
v_mul_f32_e32 v69, v69, v78
v_mul_f32_e32 v38, v79, v38
v_mac_f32_e32 v68, v70, v69
v_mul_f32_e32 v69, v67, v38
v_mad_f32 v70, v46, v68, -v69
v_mul_f32_e32 v46, v68, v46
v_mad_f32 v38, v38, v67, -v46
v_mad_f32 v45, v38, -v30, v45
v_mad_f32 v44, v38, -v22, v44
v_mad_f32 v43, v38, -v26, v43
v_mul_f32_e64 v72, v70, -v30
v_mul_f32_e64 v69, v70, -v22
v_mul_f32_e64 v67, v70, -v26
BB5_14: ; %Flow1186
; in Loop: Header=BB5_7 Depth=1
s_or_b64 exec, exec, s[18:19]
BB5_15: ; in Loop: Header=BB5_7 Depth=1
s_or_b64 exec, exec, s[14:15]
v_lshrrev_b32_e32 v22, 1, v63
v_and_b32_e32 v22, 1, v22
v_cmp_eq_u32_e32 vcc, 1, v22
s_and_saveexec_b64 s[14:15], vcc
s_xor_b64 s[14:15], exec, s[14:15]
; mask branch BB5_19
s_cbranch_execz BB5_19
BB5_16: ; in Loop: Header=BB5_7 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[77:80], v53 offset0:16 offset1:17
v_cmp_ne_u32_e32 vcc, v62, v18
s_and_b64 s[18:19], exec, s[2:3]
s_or_b64 s[18:19], s[18:19], vcc
s_and_b64 s[22:23], exec, s[0:1]
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v26, v74, v78
s_or_b64 s[18:19], s[22:23], s[18:19]
v_subrev_f32_e32 v22, v73, v77
v_mul_f32_e32 v38, v26, v26
v_cndmask_b32_e64 v46, 0, 1.0, s[18:19]
v_subrev_f32_e32 v30, v75, v79
v_mac_f32_e32 v38, v22, v22
v_mac_f32_e32 v38, v30, v30
v_mul_f32_e32 v46, s5, v46
v_cmp_lt_f32_e32 vcc, v38, v46
s_and_saveexec_b64 s[18:19], vcc
s_xor_b64 s[18:19], exec, s[18:19]
; mask branch BB5_18
s_cbranch_execz BB5_18
BB5_17: ; in Loop: Header=BB5_7 Depth=1
v_lshrrev_b32_e32 v68, 1, v14
v_and_b32_e32 v68, 1, v68
v_max_f32_e32 v38, 0x34cd15ae, v38
v_cmp_eq_u32_e32 vcc, 1, v68
v_rsq_f32_e32 v68, v38
v_cndmask_b32_e64 v77, 0, 1.0, vcc
s_mov_b32 m0, -1
v_mul_f32_e32 v46, v76, v80
v_mul_f32_e32 v78, v68, v68
v_mul_f32_e32 v70, v78, v78
v_mul_f32_e32 v70, v77, v70
v_mul_f32_e32 v79, v78, v70
ds_read_b64 v[70:71], v54 offset:64
v_mov_b32_e32 v81, 0x3a92b707
v_mov_b32_e32 v82, 0x3c739487
v_mov_b32_e32 v83, 0x35c55945
v_cmp_gt_f32_e32 vcc, s24, v38
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v71, v66, v71
v_mul_f32_e32 v71, v79, v71
v_mad_f32 v70, v65, v70, -v71
v_mul_f32_e32 v71, v47, v38
v_mul_f32_e32 v80, v71, v71
v_madak_f32_e32 v81, v81, v80, 0x3ded3cb2
v_madak_f32_e32 v82, v82, v80, 0x3f01e2bc
v_mad_f32 v81, v81, v80, 1.0
v_mac_f32_e32 v81, v71, v82
v_mov_b32_e32 v82, 0xb2951928
v_madak_f32_e32 v82, v82, v80, 0xb85ffb93
v_madak_f32_e32 v83, v83, v80, 0x3a83ca0c
v_madak_f32_e32 v82, v82, v80, 0xbc9ded90
v_madak_f32_e32 v83, v83, v80, 0x3d8eaf3b
v_madak_f32_e32 v80, v82, v80, 0xbf409397
v_mac_f32_e32 v80, v71, v83
v_rcp_f32_e32 v71, v81
v_cndmask_b32_e64 v38, 0, 1.0, vcc
v_mul_f32_e32 v38, v38, v78
v_mul_f32_e32 v77, v77, v78
v_mul_f32_e32 v71, v51, v71
v_mul_f32_e32 v71, v80, v71
v_mul_f32_e32 v38, v79, v38
v_mac_f32_e32 v71, v68, v77
v_mul_f32_e32 v68, v70, v38
v_mad_f32 v68, v46, v71, -v68
v_mul_f32_e32 v46, v71, v46
v_mad_f32 v38, v38, v70, -v46
v_mad_f32 v72, v30, v38, v72
v_mad_f32 v69, v26, v38, v69
v_mac_f32_e32 v67, v22, v38
v_mad_f32 v37, v30, v68, v37
v_mad_f32 v36, v26, v68, v36
v_mac_f32_e32 v35, v22, v68
BB5_18: ; %Flow1185
; in Loop: Header=BB5_7 Depth=1
s_or_b64 exec, exec, s[18:19]
BB5_19: ; in Loop: Header=BB5_7 Depth=1
s_or_b64 exec, exec, s[14:15]
v_lshrrev_b32_e32 v22, 2, v63
v_and_b32_e32 v22, 1, v22
v_cmp_eq_u32_e32 vcc, 1, v22
s_and_saveexec_b64 s[14:15], vcc
s_xor_b64 s[14:15], exec, s[14:15]
; mask branch BB5_23
s_cbranch_execz BB5_23
BB5_20: ; in Loop: Header=BB5_7 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[77:80], v53 offset0:32 offset1:33
v_cmp_ne_u32_e32 vcc, v61, v18
s_and_b64 s[18:19], exec, s[2:3]
s_or_b64 s[18:19], s[18:19], vcc
s_and_b64 s[22:23], exec, s[0:1]
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v26, v74, v78
s_or_b64 s[18:19], s[22:23], s[18:19]
v_subrev_f32_e32 v22, v73, v77
v_mul_f32_e32 v38, v26, v26
v_cndmask_b32_e64 v46, 0, 1.0, s[18:19]
v_subrev_f32_e32 v30, v75, v79
v_mac_f32_e32 v38, v22, v22
v_mac_f32_e32 v38, v30, v30
v_mul_f32_e32 v46, s5, v46
v_cmp_lt_f32_e32 vcc, v38, v46
s_and_saveexec_b64 s[18:19], vcc
s_xor_b64 s[18:19], exec, s[18:19]
; mask branch BB5_22
s_cbranch_execz BB5_22
BB5_21: ; in Loop: Header=BB5_7 Depth=1
v_lshrrev_b32_e32 v68, 2, v14
v_and_b32_e32 v68, 1, v68
v_max_f32_e32 v38, 0x34cd15ae, v38
v_cmp_eq_u32_e32 vcc, 1, v68
v_rsq_f32_e32 v68, v38
v_cndmask_b32_e64 v77, 0, 1.0, vcc
s_mov_b32 m0, -1
v_mul_f32_e32 v46, v76, v80
v_mul_f32_e32 v78, v68, v68
v_mul_f32_e32 v70, v78, v78
v_mul_f32_e32 v70, v77, v70
v_mul_f32_e32 v79, v78, v70
ds_read_b64 v[70:71], v54 offset:128
v_mov_b32_e32 v81, 0x3a92b707
v_mov_b32_e32 v82, 0x3c739487
v_mov_b32_e32 v83, 0x35c55945
v_cmp_gt_f32_e32 vcc, s24, v38
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v71, v66, v71
v_mul_f32_e32 v71, v79, v71
v_mad_f32 v70, v65, v70, -v71
v_mul_f32_e32 v71, v47, v38
v_mul_f32_e32 v80, v71, v71
v_madak_f32_e32 v81, v81, v80, 0x3ded3cb2
v_madak_f32_e32 v82, v82, v80, 0x3f01e2bc
v_mad_f32 v81, v81, v80, 1.0
v_mac_f32_e32 v81, v71, v82
v_mov_b32_e32 v82, 0xb2951928
v_madak_f32_e32 v82, v82, v80, 0xb85ffb93
v_madak_f32_e32 v83, v83, v80, 0x3a83ca0c
v_madak_f32_e32 v82, v82, v80, 0xbc9ded90
v_madak_f32_e32 v83, v83, v80, 0x3d8eaf3b
v_madak_f32_e32 v80, v82, v80, 0xbf409397
v_mac_f32_e32 v80, v71, v83
v_rcp_f32_e32 v71, v81
v_cndmask_b32_e64 v38, 0, 1.0, vcc
v_mul_f32_e32 v38, v38, v78
v_mul_f32_e32 v77, v77, v78
v_mul_f32_e32 v71, v51, v71
v_mul_f32_e32 v71, v80, v71
v_mul_f32_e32 v38, v79, v38
v_mac_f32_e32 v71, v68, v77
v_mul_f32_e32 v68, v70, v38
v_mad_f32 v68, v46, v71, -v68
v_mul_f32_e32 v46, v71, v46
v_mad_f32 v38, v38, v70, -v46
v_mad_f32 v72, v30, v38, v72
v_mad_f32 v69, v26, v38, v69
v_mac_f32_e32 v67, v22, v38
v_mad_f32 v29, v30, v68, v29
v_mad_f32 v28, v26, v68, v28
v_mac_f32_e32 v27, v22, v68
BB5_22: ; %Flow1184
; in Loop: Header=BB5_7 Depth=1
s_or_b64 exec, exec, s[18:19]
BB5_23: ; in Loop: Header=BB5_7 Depth=1
s_or_b64 exec, exec, s[14:15]
v_lshrrev_b32_e32 v22, 3, v63
v_and_b32_e32 v22, 1, v22
v_cmp_eq_u32_e32 vcc, 1, v22
s_and_saveexec_b64 s[14:15], vcc
s_xor_b64 s[14:15], exec, s[14:15]
; mask branch BB5_27
s_cbranch_execz BB5_27
BB5_24: ; in Loop: Header=BB5_7 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[77:80], v53 offset0:48 offset1:49
v_cmp_ne_u32_e32 vcc, v60, v18
s_and_b64 s[18:19], exec, s[2:3]
s_or_b64 s[18:19], s[18:19], vcc
s_and_b64 s[22:23], exec, s[0:1]
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v26, v74, v78
s_or_b64 s[18:19], s[22:23], s[18:19]
v_subrev_f32_e32 v22, v73, v77
v_mul_f32_e32 v38, v26, v26
v_cndmask_b32_e64 v46, 0, 1.0, s[18:19]
v_subrev_f32_e32 v30, v75, v79
v_mac_f32_e32 v38, v22, v22
v_mac_f32_e32 v38, v30, v30
v_mul_f32_e32 v46, s5, v46
v_cmp_lt_f32_e32 vcc, v38, v46
s_and_saveexec_b64 s[18:19], vcc
s_xor_b64 s[18:19], exec, s[18:19]
; mask branch BB5_26
s_cbranch_execz BB5_26
BB5_25: ; in Loop: Header=BB5_7 Depth=1
v_lshrrev_b32_e32 v68, 3, v14
v_and_b32_e32 v68, 1, v68
v_max_f32_e32 v38, 0x34cd15ae, v38
v_cmp_eq_u32_e32 vcc, 1, v68
v_rsq_f32_e32 v68, v38
v_cndmask_b32_e64 v77, 0, 1.0, vcc
s_mov_b32 m0, -1
v_mul_f32_e32 v46, v76, v80
v_mul_f32_e32 v78, v68, v68
v_mul_f32_e32 v70, v78, v78
v_mul_f32_e32 v70, v77, v70
v_mul_f32_e32 v79, v78, v70
ds_read_b64 v[70:71], v54 offset:192
v_mov_b32_e32 v81, 0x3a92b707
v_mov_b32_e32 v82, 0x3c739487
v_mov_b32_e32 v83, 0x35c55945
v_cmp_gt_f32_e32 vcc, s24, v38
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v71, v66, v71
v_mul_f32_e32 v71, v79, v71
v_mad_f32 v70, v65, v70, -v71
v_mul_f32_e32 v71, v47, v38
v_mul_f32_e32 v80, v71, v71
v_madak_f32_e32 v81, v81, v80, 0x3ded3cb2
v_madak_f32_e32 v82, v82, v80, 0x3f01e2bc
v_mad_f32 v81, v81, v80, 1.0
v_mac_f32_e32 v81, v71, v82
v_mov_b32_e32 v82, 0xb2951928
v_madak_f32_e32 v82, v82, v80, 0xb85ffb93
v_madak_f32_e32 v83, v83, v80, 0x3a83ca0c
v_madak_f32_e32 v82, v82, v80, 0xbc9ded90
v_madak_f32_e32 v83, v83, v80, 0x3d8eaf3b
v_madak_f32_e32 v80, v82, v80, 0xbf409397
v_mac_f32_e32 v80, v71, v83
v_rcp_f32_e32 v71, v81
v_cndmask_b32_e64 v38, 0, 1.0, vcc
v_mul_f32_e32 v38, v38, v78
v_mul_f32_e32 v77, v77, v78
v_mul_f32_e32 v71, v51, v71
v_mul_f32_e32 v71, v80, v71
v_mul_f32_e32 v38, v79, v38
v_mac_f32_e32 v71, v68, v77
v_mul_f32_e32 v68, v70, v38
v_mad_f32 v68, v46, v71, -v68
v_mul_f32_e32 v46, v71, v46
v_mad_f32 v38, v38, v70, -v46
v_mad_f32 v72, v30, v38, v72
v_mad_f32 v69, v26, v38, v69
v_mac_f32_e32 v67, v22, v38
v_mad_f32 v25, v30, v68, v25
v_mad_f32 v24, v26, v68, v24
v_mac_f32_e32 v23, v22, v68
BB5_26: ; %Flow1183
; in Loop: Header=BB5_7 Depth=1
s_or_b64 exec, exec, s[18:19]
BB5_27: ; in Loop: Header=BB5_7 Depth=1
s_or_b64 exec, exec, s[14:15]
v_lshrrev_b32_e32 v22, 4, v63
v_and_b32_e32 v22, 1, v22
v_cmp_eq_u32_e32 vcc, 1, v22
s_and_saveexec_b64 s[14:15], vcc
s_xor_b64 s[14:15], exec, s[14:15]
; mask branch BB5_31
s_cbranch_execz BB5_31
BB5_28: ; in Loop: Header=BB5_7 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[77:80], v53 offset0:64 offset1:65
v_cmp_ne_u32_e32 vcc, v59, v18
s_and_b64 s[18:19], exec, s[2:3]
s_or_b64 s[18:19], s[18:19], vcc
s_and_b64 s[22:23], exec, s[0:1]
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v26, v74, v78
s_or_b64 s[18:19], s[22:23], s[18:19]
v_subrev_f32_e32 v22, v73, v77
v_mul_f32_e32 v38, v26, v26
v_cndmask_b32_e64 v46, 0, 1.0, s[18:19]
v_subrev_f32_e32 v30, v75, v79
v_mac_f32_e32 v38, v22, v22
v_mac_f32_e32 v38, v30, v30
v_mul_f32_e32 v46, s5, v46
v_cmp_lt_f32_e32 vcc, v38, v46
s_and_saveexec_b64 s[18:19], vcc
s_xor_b64 s[18:19], exec, s[18:19]
; mask branch BB5_30
s_cbranch_execz BB5_30
BB5_29: ; in Loop: Header=BB5_7 Depth=1
v_lshrrev_b32_e32 v68, 4, v14
v_and_b32_e32 v68, 1, v68
v_max_f32_e32 v38, 0x34cd15ae, v38
v_cmp_eq_u32_e32 vcc, 1, v68
v_rsq_f32_e32 v68, v38
v_cndmask_b32_e64 v77, 0, 1.0, vcc
s_mov_b32 m0, -1
v_mul_f32_e32 v46, v76, v80
v_mul_f32_e32 v78, v68, v68
v_mul_f32_e32 v70, v78, v78
v_mul_f32_e32 v70, v77, v70
v_mul_f32_e32 v79, v78, v70
ds_read_b64 v[70:71], v54 offset:256
v_mov_b32_e32 v81, 0x3a92b707
v_mov_b32_e32 v82, 0x3c739487
v_mov_b32_e32 v83, 0x35c55945
v_cmp_gt_f32_e32 vcc, s24, v38
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v71, v66, v71
v_mul_f32_e32 v71, v79, v71
v_mad_f32 v70, v65, v70, -v71
v_mul_f32_e32 v71, v47, v38
v_mul_f32_e32 v80, v71, v71
v_madak_f32_e32 v81, v81, v80, 0x3ded3cb2
v_madak_f32_e32 v82, v82, v80, 0x3f01e2bc
v_mad_f32 v81, v81, v80, 1.0
v_mac_f32_e32 v81, v71, v82
v_mov_b32_e32 v82, 0xb2951928
v_madak_f32_e32 v82, v82, v80, 0xb85ffb93
v_madak_f32_e32 v83, v83, v80, 0x3a83ca0c
v_madak_f32_e32 v82, v82, v80, 0xbc9ded90
v_madak_f32_e32 v83, v83, v80, 0x3d8eaf3b
v_madak_f32_e32 v80, v82, v80, 0xbf409397
v_mac_f32_e32 v80, v71, v83
v_rcp_f32_e32 v71, v81
v_cndmask_b32_e64 v38, 0, 1.0, vcc
v_mul_f32_e32 v38, v38, v78
v_mul_f32_e32 v77, v77, v78
v_mul_f32_e32 v71, v51, v71
v_mul_f32_e32 v71, v80, v71
v_mul_f32_e32 v38, v79, v38
v_mac_f32_e32 v71, v68, v77
v_mul_f32_e32 v68, v70, v38
v_mad_f32 v68, v46, v71, -v68
v_mul_f32_e32 v46, v71, v46
v_mad_f32 v38, v38, v70, -v46
v_mad_f32 v72, v30, v38, v72
v_mad_f32 v69, v26, v38, v69
v_mac_f32_e32 v67, v22, v38
v_mad_f32 v21, v30, v68, v21
v_mad_f32 v20, v26, v68, v20
v_mac_f32_e32 v19, v22, v68
BB5_30: ; %Flow1182
; in Loop: Header=BB5_7 Depth=1
s_or_b64 exec, exec, s[18:19]
BB5_31: ; in Loop: Header=BB5_7 Depth=1
s_or_b64 exec, exec, s[14:15]
v_lshrrev_b32_e32 v22, 5, v63
v_and_b32_e32 v22, 1, v22
v_cmp_eq_u32_e32 vcc, 1, v22
s_and_saveexec_b64 s[14:15], vcc
s_xor_b64 s[14:15], exec, s[14:15]
; mask branch BB5_35
s_cbranch_execz BB5_35
BB5_32: ; in Loop: Header=BB5_7 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[77:80], v53 offset0:80 offset1:81
v_cmp_ne_u32_e32 vcc, v58, v18
s_and_b64 s[18:19], exec, s[2:3]
s_or_b64 s[18:19], s[18:19], vcc
s_and_b64 s[22:23], exec, s[0:1]
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v26, v74, v78
s_or_b64 s[18:19], s[22:23], s[18:19]
v_subrev_f32_e32 v22, v73, v77
v_mul_f32_e32 v38, v26, v26
v_cndmask_b32_e64 v46, 0, 1.0, s[18:19]
v_subrev_f32_e32 v30, v75, v79
v_mac_f32_e32 v38, v22, v22
v_mac_f32_e32 v38, v30, v30
v_mul_f32_e32 v46, s5, v46
v_cmp_lt_f32_e32 vcc, v38, v46
s_and_saveexec_b64 s[18:19], vcc
s_xor_b64 s[18:19], exec, s[18:19]
; mask branch BB5_34
s_cbranch_execz BB5_34
BB5_33: ; in Loop: Header=BB5_7 Depth=1
v_lshrrev_b32_e32 v68, 5, v14
v_and_b32_e32 v68, 1, v68
v_max_f32_e32 v38, 0x34cd15ae, v38
v_cmp_eq_u32_e32 vcc, 1, v68
v_rsq_f32_e32 v68, v38
v_cndmask_b32_e64 v77, 0, 1.0, vcc
s_mov_b32 m0, -1
v_mul_f32_e32 v46, v76, v80
v_mul_f32_e32 v78, v68, v68
v_mul_f32_e32 v70, v78, v78
v_mul_f32_e32 v70, v77, v70
v_mul_f32_e32 v79, v78, v70
ds_read_b64 v[70:71], v54 offset:320
v_mov_b32_e32 v81, 0x3a92b707
v_mov_b32_e32 v82, 0x3c739487
v_mov_b32_e32 v83, 0x35c55945
v_cmp_gt_f32_e32 vcc, s24, v38
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v71, v66, v71
v_mul_f32_e32 v71, v79, v71
v_mad_f32 v70, v65, v70, -v71
v_mul_f32_e32 v71, v47, v38
v_mul_f32_e32 v80, v71, v71
v_madak_f32_e32 v81, v81, v80, 0x3ded3cb2
v_madak_f32_e32 v82, v82, v80, 0x3f01e2bc
v_mad_f32 v81, v81, v80, 1.0
v_mac_f32_e32 v81, v71, v82
v_mov_b32_e32 v82, 0xb2951928
v_madak_f32_e32 v82, v82, v80, 0xb85ffb93
v_madak_f32_e32 v83, v83, v80, 0x3a83ca0c
v_madak_f32_e32 v82, v82, v80, 0xbc9ded90
v_madak_f32_e32 v83, v83, v80, 0x3d8eaf3b
v_madak_f32_e32 v80, v82, v80, 0xbf409397
v_mac_f32_e32 v80, v71, v83
v_rcp_f32_e32 v71, v81
v_cndmask_b32_e64 v38, 0, 1.0, vcc
v_mul_f32_e32 v38, v38, v78
v_mul_f32_e32 v77, v77, v78
v_mul_f32_e32 v71, v51, v71
v_mul_f32_e32 v71, v80, v71
v_mul_f32_e32 v38, v79, v38
v_mac_f32_e32 v71, v68, v77
v_mul_f32_e32 v68, v70, v38
v_mad_f32 v68, v46, v71, -v68
v_mul_f32_e32 v46, v71, v46
v_mad_f32 v38, v38, v70, -v46
v_mad_f32 v72, v30, v38, v72
v_mad_f32 v69, v26, v38, v69
v_mac_f32_e32 v67, v22, v38
v_mad_f32 v17, v30, v68, v17
v_mad_f32 v16, v26, v68, v16
v_mac_f32_e32 v15, v22, v68
BB5_34: ; %Flow1181
; in Loop: Header=BB5_7 Depth=1
s_or_b64 exec, exec, s[18:19]
BB5_35: ; in Loop: Header=BB5_7 Depth=1
s_or_b64 exec, exec, s[14:15]
v_lshrrev_b32_e32 v22, 6, v63
v_and_b32_e32 v22, 1, v22
v_cmp_eq_u32_e32 vcc, 1, v22
s_and_saveexec_b64 s[14:15], vcc
s_xor_b64 s[14:15], exec, s[14:15]
; mask branch BB5_39
s_cbranch_execz BB5_39
BB5_36: ; in Loop: Header=BB5_7 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[77:80], v53 offset0:96 offset1:97
v_cmp_ne_u32_e32 vcc, v57, v18
s_and_b64 s[18:19], exec, s[2:3]
s_or_b64 s[18:19], s[18:19], vcc
s_and_b64 s[22:23], exec, s[0:1]
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v26, v74, v78
s_or_b64 s[18:19], s[22:23], s[18:19]
v_subrev_f32_e32 v22, v73, v77
v_mul_f32_e32 v38, v26, v26
v_cndmask_b32_e64 v46, 0, 1.0, s[18:19]
v_subrev_f32_e32 v30, v75, v79
v_mac_f32_e32 v38, v22, v22
v_mac_f32_e32 v38, v30, v30
v_mul_f32_e32 v46, s5, v46
v_cmp_lt_f32_e32 vcc, v38, v46
s_and_saveexec_b64 s[18:19], vcc
s_xor_b64 s[18:19], exec, s[18:19]
; mask branch BB5_38
s_cbranch_execz BB5_38
BB5_37: ; in Loop: Header=BB5_7 Depth=1
v_lshrrev_b32_e32 v68, 6, v14
v_and_b32_e32 v68, 1, v68
v_max_f32_e32 v38, 0x34cd15ae, v38
v_cmp_eq_u32_e32 vcc, 1, v68
v_rsq_f32_e32 v68, v38
v_cndmask_b32_e64 v77, 0, 1.0, vcc
s_mov_b32 m0, -1
v_mul_f32_e32 v46, v76, v80
v_mul_f32_e32 v78, v68, v68
v_mul_f32_e32 v70, v78, v78
v_mul_f32_e32 v70, v77, v70
v_mul_f32_e32 v79, v78, v70
ds_read_b64 v[70:71], v54 offset:384
v_mov_b32_e32 v81, 0x3a92b707
v_mov_b32_e32 v82, 0x3c739487
v_mov_b32_e32 v83, 0x35c55945
v_cmp_gt_f32_e32 vcc, s24, v38
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v71, v66, v71
v_mul_f32_e32 v71, v79, v71
v_mad_f32 v70, v65, v70, -v71
v_mul_f32_e32 v71, v47, v38
v_mul_f32_e32 v80, v71, v71
v_madak_f32_e32 v81, v81, v80, 0x3ded3cb2
v_madak_f32_e32 v82, v82, v80, 0x3f01e2bc
v_mad_f32 v81, v81, v80, 1.0
v_mac_f32_e32 v81, v71, v82
v_mov_b32_e32 v82, 0xb2951928
v_madak_f32_e32 v82, v82, v80, 0xb85ffb93
v_madak_f32_e32 v83, v83, v80, 0x3a83ca0c
v_madak_f32_e32 v82, v82, v80, 0xbc9ded90
v_madak_f32_e32 v83, v83, v80, 0x3d8eaf3b
v_madak_f32_e32 v80, v82, v80, 0xbf409397
v_mac_f32_e32 v80, v71, v83
v_rcp_f32_e32 v71, v81
v_cndmask_b32_e64 v38, 0, 1.0, vcc
v_mul_f32_e32 v38, v38, v78
v_mul_f32_e32 v77, v77, v78
v_mul_f32_e32 v71, v51, v71
v_mul_f32_e32 v71, v80, v71
v_mul_f32_e32 v38, v79, v38
v_mac_f32_e32 v71, v68, v77
v_mul_f32_e32 v68, v70, v38
v_mad_f32 v68, v46, v71, -v68
v_mul_f32_e32 v46, v71, v46
v_mad_f32 v38, v38, v70, -v46
v_mad_f32 v72, v30, v38, v72
v_mad_f32 v69, v26, v38, v69
v_mac_f32_e32 v67, v22, v38
v_mad_f32 v13, v30, v68, v13
v_mad_f32 v12, v26, v68, v12
v_mac_f32_e32 v11, v22, v68
BB5_38: ; %Flow1180
; in Loop: Header=BB5_7 Depth=1
s_or_b64 exec, exec, s[18:19]
BB5_39: ; in Loop: Header=BB5_7 Depth=1
s_or_b64 exec, exec, s[14:15]
v_lshrrev_b32_e32 v22, 7, v63
v_and_b32_e32 v22, 1, v22
v_cmp_eq_u32_e32 vcc, 1, v22
s_and_saveexec_b64 s[14:15], vcc
s_xor_b64 s[14:15], exec, s[14:15]
; mask branch BB5_43
s_cbranch_execz BB5_43
BB5_40: ; in Loop: Header=BB5_7 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[77:80], v53 offset0:112 offset1:113
v_cmp_ne_u32_e32 vcc, v33, v18
s_and_b64 s[18:19], exec, s[2:3]
s_or_b64 s[18:19], s[18:19], vcc
s_and_b64 s[22:23], exec, s[0:1]
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v22, v74, v78
s_or_b64 s[18:19], s[22:23], s[18:19]
v_subrev_f32_e32 v18, v73, v77
v_mul_f32_e32 v30, v22, v22
v_cndmask_b32_e64 v38, 0, 1.0, s[18:19]
v_subrev_f32_e32 v26, v75, v79
v_mac_f32_e32 v30, v18, v18
v_mac_f32_e32 v30, v26, v26
v_mul_f32_e32 v38, s5, v38
v_cmp_lt_f32_e32 vcc, v30, v38
s_and_saveexec_b64 s[18:19], vcc
s_xor_b64 s[18:19], exec, s[18:19]
; mask branch BB5_42
s_cbranch_execz BB5_42
BB5_41: ; in Loop: Header=BB5_7 Depth=1
v_lshrrev_b32_e32 v46, 7, v14
v_and_b32_e32 v46, 1, v46
v_max_f32_e32 v30, 0x34cd15ae, v30
v_cmp_eq_u32_e32 vcc, 1, v46
v_rsq_f32_e32 v46, v30
v_cndmask_b32_e64 v68, 0, 1.0, vcc
s_mov_b32 m0, -1
v_mov_b32_e32 v75, 0x3c739487
v_mul_f32_e32 v73, v46, v46
v_mul_f32_e32 v70, v73, v73
v_mul_f32_e32 v70, v68, v70
v_mul_f32_e32 v74, v73, v70
ds_read_b64 v[70:71], v54 offset:448
v_mul_f32_e32 v38, v76, v80
v_mov_b32_e32 v76, 0x35c55945
v_cmp_gt_f32_e32 vcc, s24, v30
v_mul_f32_e32 v68, v68, v73
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v66, v66, v71
v_mul_f32_e32 v66, v74, v66
v_mad_f32 v65, v65, v70, -v66
v_mul_f32_e32 v66, v47, v30
v_mul_f32_e32 v70, v66, v66
v_mov_b32_e32 v71, 0x3a92b707
v_madak_f32_e32 v71, v71, v70, 0x3ded3cb2
v_madak_f32_e32 v75, v75, v70, 0x3f01e2bc
v_mad_f32 v71, v71, v70, 1.0
v_mac_f32_e32 v71, v66, v75
v_mov_b32_e32 v75, 0xb2951928
v_madak_f32_e32 v75, v75, v70, 0xb85ffb93
v_madak_f32_e32 v76, v76, v70, 0x3a83ca0c
v_madak_f32_e32 v75, v75, v70, 0xbc9ded90
v_madak_f32_e32 v76, v76, v70, 0x3d8eaf3b
v_madak_f32_e32 v70, v75, v70, 0xbf409397
v_mac_f32_e32 v70, v66, v76
v_rcp_f32_e32 v66, v71
v_cndmask_b32_e64 v30, 0, 1.0, vcc
v_mul_f32_e32 v30, v30, v73
v_mul_f32_e32 v30, v74, v30
v_mul_f32_e32 v66, v51, v66
v_mul_f32_e32 v66, v70, v66
v_mac_f32_e32 v66, v46, v68
v_mul_f32_e32 v46, v65, v30
v_mad_f32 v46, v38, v66, -v46
v_mul_f32_e32 v38, v66, v38
v_mad_f32 v30, v30, v65, -v38
v_mad_f32 v72, v26, v30, v72
v_mad_f32 v69, v22, v30, v69
v_mac_f32_e32 v67, v18, v30
v_mad_f32 v10, v26, v46, v10
v_mad_f32 v9, v22, v46, v9
v_mac_f32_e32 v8, v18, v46
BB5_42: ; %Flow1179
; in Loop: Header=BB5_7 Depth=1
s_or_b64 exec, exec, s[18:19]
BB5_43: ; in Loop: Header=BB5_7 Depth=1
s_or_b64 exec, exec, s[14:15]
s_mov_b32 m0, -1
v_cmp_gt_i32_e32 vcc, 3, v2
ds_write_b32 v5, v67
ds_write_b32 v6, v69
ds_write_b32 v7, v72
s_and_saveexec_b64 s[14:15], vcc
s_xor_b64 s[18:19], exec, s[14:15]
s_waitcnt lgkmcnt(0)
; mask branch BB5_49
s_cbranch_execz BB5_49
BB5_44: ; in Loop: Header=BB5_7 Depth=1
v_lshlrev_b32_e32 v22, 6, v2
v_add_i32_e32 v18, vcc, v0, v22
v_lshlrev_b32_e32 v18, 2, v18
v_add_i32_e32 v26, vcc, s4, v18
s_mov_b32 m0, -1
ds_read_b32 v18, v26
v_add_i32_e32 v30, vcc, 8, v0
v_or_b32_e32 v38, 1, v0
v_cmp_lt_i32_e32 vcc, v38, v30
s_and_saveexec_b64 s[14:15], vcc
s_xor_b64 s[14:15], exec, s[14:15]
s_waitcnt lgkmcnt(0)
; mask branch BB5_46
s_cbranch_execz BB5_46
BB5_45: ; in Loop: Header=BB5_7 Depth=1
s_mov_b32 m0, -1
ds_read2_b32 v[65:66], v26 offset0:1 offset1:2
v_or_b32_e32 v30, 3, v0
v_add_i32_e32 v22, vcc, v30, v22
v_lshlrev_b32_e32 v22, 2, v22
ds_read2_b32 v[67:68], v26 offset0:3 offset1:4
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v18, v18, v65
v_add_i32_e32 v22, vcc, s4, v22
v_add_f32_e32 v18, v66, v18
ds_read2_b32 v[65:66], v22 offset0:2 offset1:3
ds_read_b32 v26, v26 offset:28
v_add_f32_e32 v18, v67, v18
v_add_f32_e32 v18, v68, v18
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v18, v65, v18
v_add_f32_e32 v18, v66, v18
v_add_f32_e32 v18, v26, v18
BB5_46: ; %._crit_edge.i118
; in Loop: Header=BB5_7 Depth=1
s_or_b64 exec, exec, s[14:15]
v_mul_lo_i32 v22, v64, 3
v_mov_b32_e32 v26, s13
s_mov_b64 s[14:15], s[30:31]
v_add_i32_e32 v64, vcc, v22, v2
v_ashrrev_i32_e32 v65, 31, v64
v_lshl_b64 v[66:67], v[64:65], 2
v_add_i32_e32 v64, vcc, s12, v66
v_addc_u32_e32 v65, vcc, v67, v26, vcc
buffer_load_dword v67, v[66:67], s[12:15], 0 addr64
s_mov_b64 s[14:15], 0
s_waitcnt vmcnt(0)
BB5_47: ; Parent Loop BB5_7 Depth=1
; => This Inner Loop Header: Depth=2
v_add_f32_e32 v66, v18, v67
v_mov_b32_e32 v69, v67
v_mov_b32_e32 v68, v66
buffer_atomic_cmpswap v[68:69], v[64:65], s[28:31], 0 addr64 glc
v_mov_b32_e32 v22, -1
v_mov_b32_e32 v22, 0xf000
s_waitcnt vmcnt(0) expcnt(0)
v_cmp_eq_u32_e32 vcc, v68, v67
s_or_b64 s[14:15], vcc, s[14:15]
v_mov_b32_e32 v67, v68
s_andn2_b64 exec, exec, s[14:15]
s_cbranch_execnz BB5_47
; BB#48: ; %Flow1177
; in Loop: Header=BB5_7 Depth=1
s_or_b64 exec, exec, s[14:15]
BB5_49: ; %Flow1178
; in Loop: Header=BB5_7 Depth=1
s_or_b64 exec, exec, s[18:19]
BB5_50: ; %Flow1187
; in Loop: Header=BB5_7 Depth=1
s_or_b64 exec, exec, s[36:37]
v_and_b32_e32 v18, 0xff00, v63
v_cmp_ne_u32_e32 vcc, 0, v18
s_and_saveexec_b64 s[14:15], vcc
s_xor_b64 s[14:15], exec, s[14:15]
; mask branch BB5_90
s_cbranch_execz BB5_90
BB5_51: ; in Loop: Header=BB5_7 Depth=1
s_mov_b32 m0, -1
ds_read_b32 v18, v52 offset:4
s_mov_b64 s[36:37], s[16:17]
s_mov_b64 s[38:39], s[30:31]
s_waitcnt lgkmcnt(0)
v_lshlrev_b32_e32 v22, 3, v18
v_add_i32_e32 v64, vcc, v22, v1
v_ashrrev_i32_e32 v65, 31, v64
v_lshl_b64 v[66:67], v[64:65], 4
buffer_load_dwordx4 v[67:70], v[66:67], s[36:39], 0 addr64
v_lshl_b64 v[71:72], v[64:65], 3
s_mov_b64 s[36:37], s[20:21]
buffer_load_dwordx2 v[65:66], v[71:72], s[36:39], 0 addr64
v_lshrrev_b32_e32 v22, 8, v63
v_mov_b32_e32 v71, 0
v_and_b32_e32 v22, 1, v22
v_cmp_eq_u32_e32 vcc, 1, v22
v_mov_b32_e32 v73, v71
v_mov_b32_e32 v76, v71
s_and_saveexec_b64 s[18:19], vcc
s_xor_b64 s[18:19], exec, s[18:19]
s_waitcnt vmcnt(0)
; mask branch BB5_55
s_cbranch_execz BB5_55
BB5_52: ; in Loop: Header=BB5_7 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[77:80], v53 offset1:1
v_cmp_ne_u32_e32 vcc, v40, v18
s_and_b64 s[22:23], exec, s[2:3]
s_or_b64 s[22:23], s[22:23], vcc
s_and_b64 s[36:37], exec, s[0:1]
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v22, v68, v78
s_or_b64 s[22:23], s[36:37], s[22:23]
v_subrev_f32_e32 v26, v67, v77
v_mul_f32_e32 v38, v22, v22
v_cndmask_b32_e64 v46, 0, 1.0, s[22:23]
v_subrev_f32_e32 v30, v69, v79
v_mac_f32_e32 v38, v26, v26
v_mov_b32_e32 v71, 0
v_mac_f32_e32 v38, v30, v30
v_mul_f32_e32 v46, s5, v46
v_cmp_lt_f32_e32 vcc, v38, v46
v_mov_b32_e32 v73, v71
v_mov_b32_e32 v76, v71
s_and_saveexec_b64 s[22:23], vcc
s_xor_b64 s[22:23], exec, s[22:23]
; mask branch BB5_54
s_cbranch_execz BB5_54
BB5_53: ; in Loop: Header=BB5_7 Depth=1
v_max_f32_e32 v38, 0x34cd15ae, v38
v_rsq_f32_e32 v74, v38
v_lshrrev_b32_e32 v71, 8, v14
v_and_b32_e32 v71, 1, v71
v_cmp_eq_u32_e32 vcc, 1, v71
v_mul_f32_e32 v78, v74, v74
v_cndmask_b32_e64 v73, 0, 1.0, vcc
v_mul_f32_e32 v71, v78, v78
v_mul_f32_e32 v71, v73, v71
s_mov_b32 m0, -1
v_mul_f32_e32 v79, v78, v71
ds_read_b64 v[71:72], v54
v_mul_f32_e32 v75, v47, v38
v_mul_f32_e32 v76, v75, v75
v_mov_b32_e32 v77, 0x3a92b707
v_mul_f32_e32 v46, v70, v80
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v72, v66, v72
v_mul_f32_e32 v72, v79, v72
v_mad_f32 v71, v65, v71, -v72
v_madak_f32_e32 v72, v77, v76, 0x3ded3cb2
v_mov_b32_e32 v77, 0x3c739487
v_madak_f32_e32 v77, v77, v76, 0x3f01e2bc
v_mad_f32 v72, v72, v76, 1.0
v_mac_f32_e32 v72, v75, v77
v_mov_b32_e32 v77, 0xb2951928
v_rcp_f32_e32 v72, v72
v_madak_f32_e32 v77, v77, v76, 0xb85ffb93
v_mov_b32_e32 v80, 0x35c55945
v_madak_f32_e32 v80, v80, v76, 0x3a83ca0c
v_cmp_gt_f32_e32 vcc, s24, v38
v_madak_f32_e32 v77, v77, v76, 0xbc9ded90
v_cndmask_b32_e64 v38, 0, 1.0, vcc
v_madak_f32_e32 v80, v80, v76, 0x3d8eaf3b
v_madak_f32_e32 v76, v77, v76, 0xbf409397
v_mul_f32_e32 v38, v38, v78
v_mac_f32_e32 v76, v75, v80
v_mul_f32_e32 v72, v51, v72
v_mul_f32_e32 v72, v76, v72
v_mul_f32_e32 v73, v73, v78
v_mul_f32_e32 v38, v79, v38
v_mac_f32_e32 v72, v74, v73
v_mul_f32_e32 v73, v71, v38
v_mad_f32 v74, v46, v72, -v73
v_mul_f32_e32 v46, v72, v46
v_mad_f32 v38, v38, v71, -v46
v_mad_f32 v45, v38, -v30, v45
v_mad_f32 v44, v38, -v22, v44
v_mad_f32 v43, v38, -v26, v43
v_mul_f32_e64 v76, v74, -v30
v_mul_f32_e64 v73, v74, -v22
v_mul_f32_e64 v71, v74, -v26
BB5_54: ; %Flow1175
; in Loop: Header=BB5_7 Depth=1
s_or_b64 exec, exec, s[22:23]
BB5_55: ; in Loop: Header=BB5_7 Depth=1
s_or_b64 exec, exec, s[18:19]
v_lshrrev_b32_e32 v22, 9, v63
v_and_b32_e32 v22, 1, v22
v_cmp_eq_u32_e32 vcc, 1, v22
s_and_saveexec_b64 s[18:19], vcc
s_xor_b64 s[18:19], exec, s[18:19]
; mask branch BB5_59
s_cbranch_execz BB5_59
BB5_56: ; in Loop: Header=BB5_7 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[77:80], v53 offset0:16 offset1:17
v_cmp_ne_u32_e32 vcc, v62, v18
s_and_b64 s[22:23], exec, s[2:3]
s_or_b64 s[22:23], s[22:23], vcc
s_and_b64 s[36:37], exec, s[0:1]
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v26, v68, v78
s_or_b64 s[22:23], s[36:37], s[22:23]
v_subrev_f32_e32 v22, v67, v77
v_mul_f32_e32 v38, v26, v26
v_cndmask_b32_e64 v46, 0, 1.0, s[22:23]
v_subrev_f32_e32 v30, v69, v79
v_mac_f32_e32 v38, v22, v22
v_mac_f32_e32 v38, v30, v30
v_mul_f32_e32 v46, s5, v46
v_cmp_lt_f32_e32 vcc, v38, v46
s_and_saveexec_b64 s[22:23], vcc
s_xor_b64 s[22:23], exec, s[22:23]
; mask branch BB5_58
s_cbranch_execz BB5_58
BB5_57: ; in Loop: Header=BB5_7 Depth=1
v_lshrrev_b32_e32 v72, 9, v14
v_and_b32_e32 v72, 1, v72
v_max_f32_e32 v38, 0x34cd15ae, v38
v_cmp_eq_u32_e32 vcc, 1, v72
v_rsq_f32_e32 v72, v38
v_cndmask_b32_e64 v77, 0, 1.0, vcc
s_mov_b32 m0, -1
v_mul_f32_e32 v46, v70, v80
v_mul_f32_e32 v78, v72, v72
v_mul_f32_e32 v74, v78, v78
v_mul_f32_e32 v74, v77, v74
v_mul_f32_e32 v79, v78, v74
ds_read_b64 v[74:75], v54 offset:64
v_mov_b32_e32 v81, 0x3a92b707
v_mov_b32_e32 v82, 0x3c739487
v_mov_b32_e32 v83, 0x35c55945
v_cmp_gt_f32_e32 vcc, s24, v38
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v75, v66, v75
v_mul_f32_e32 v75, v79, v75
v_mad_f32 v74, v65, v74, -v75
v_mul_f32_e32 v75, v47, v38
v_mul_f32_e32 v80, v75, v75
v_madak_f32_e32 v81, v81, v80, 0x3ded3cb2
v_madak_f32_e32 v82, v82, v80, 0x3f01e2bc
v_mad_f32 v81, v81, v80, 1.0
v_mac_f32_e32 v81, v75, v82
v_mov_b32_e32 v82, 0xb2951928
v_madak_f32_e32 v82, v82, v80, 0xb85ffb93
v_madak_f32_e32 v83, v83, v80, 0x3a83ca0c
v_madak_f32_e32 v82, v82, v80, 0xbc9ded90
v_madak_f32_e32 v83, v83, v80, 0x3d8eaf3b
v_madak_f32_e32 v80, v82, v80, 0xbf409397
v_mac_f32_e32 v80, v75, v83
v_rcp_f32_e32 v75, v81
v_cndmask_b32_e64 v38, 0, 1.0, vcc
v_mul_f32_e32 v38, v38, v78
v_mul_f32_e32 v77, v77, v78
v_mul_f32_e32 v75, v51, v75
v_mul_f32_e32 v75, v80, v75
v_mul_f32_e32 v38, v79, v38
v_mac_f32_e32 v75, v72, v77
v_mul_f32_e32 v72, v74, v38
v_mad_f32 v72, v46, v75, -v72
v_mul_f32_e32 v46, v75, v46
v_mad_f32 v38, v38, v74, -v46
v_mad_f32 v76, v30, v38, v76
v_mad_f32 v73, v26, v38, v73
v_mac_f32_e32 v71, v22, v38
v_mad_f32 v37, v30, v72, v37
v_mad_f32 v36, v26, v72, v36
v_mac_f32_e32 v35, v22, v72
BB5_58: ; %Flow1174
; in Loop: Header=BB5_7 Depth=1
s_or_b64 exec, exec, s[22:23]
BB5_59: ; in Loop: Header=BB5_7 Depth=1
s_or_b64 exec, exec, s[18:19]
v_lshrrev_b32_e32 v22, 10, v63
v_and_b32_e32 v22, 1, v22
v_cmp_eq_u32_e32 vcc, 1, v22
s_and_saveexec_b64 s[18:19], vcc
s_xor_b64 s[18:19], exec, s[18:19]
; mask branch BB5_63
s_cbranch_execz BB5_63
BB5_60: ; in Loop: Header=BB5_7 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[77:80], v53 offset0:32 offset1:33
v_cmp_ne_u32_e32 vcc, v61, v18
s_and_b64 s[22:23], exec, s[2:3]
s_or_b64 s[22:23], s[22:23], vcc
s_and_b64 s[36:37], exec, s[0:1]
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v26, v68, v78
s_or_b64 s[22:23], s[36:37], s[22:23]
v_subrev_f32_e32 v22, v67, v77
v_mul_f32_e32 v38, v26, v26
v_cndmask_b32_e64 v46, 0, 1.0, s[22:23]
v_subrev_f32_e32 v30, v69, v79
v_mac_f32_e32 v38, v22, v22
v_mac_f32_e32 v38, v30, v30
v_mul_f32_e32 v46, s5, v46
v_cmp_lt_f32_e32 vcc, v38, v46
s_and_saveexec_b64 s[22:23], vcc
s_xor_b64 s[22:23], exec, s[22:23]
; mask branch BB5_62
s_cbranch_execz BB5_62
BB5_61: ; in Loop: Header=BB5_7 Depth=1
v_lshrrev_b32_e32 v72, 10, v14
v_and_b32_e32 v72, 1, v72
v_max_f32_e32 v38, 0x34cd15ae, v38
v_cmp_eq_u32_e32 vcc, 1, v72
v_rsq_f32_e32 v72, v38
v_cndmask_b32_e64 v77, 0, 1.0, vcc
s_mov_b32 m0, -1
v_mul_f32_e32 v46, v70, v80
v_mul_f32_e32 v78, v72, v72
v_mul_f32_e32 v74, v78, v78
v_mul_f32_e32 v74, v77, v74
v_mul_f32_e32 v79, v78, v74
ds_read_b64 v[74:75], v54 offset:128
v_mov_b32_e32 v81, 0x3a92b707
v_mov_b32_e32 v82, 0x3c739487
v_mov_b32_e32 v83, 0x35c55945
v_cmp_gt_f32_e32 vcc, s24, v38
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v75, v66, v75
v_mul_f32_e32 v75, v79, v75
v_mad_f32 v74, v65, v74, -v75
v_mul_f32_e32 v75, v47, v38
v_mul_f32_e32 v80, v75, v75
v_madak_f32_e32 v81, v81, v80, 0x3ded3cb2
v_madak_f32_e32 v82, v82, v80, 0x3f01e2bc
v_mad_f32 v81, v81, v80, 1.0
v_mac_f32_e32 v81, v75, v82
v_mov_b32_e32 v82, 0xb2951928
v_madak_f32_e32 v82, v82, v80, 0xb85ffb93
v_madak_f32_e32 v83, v83, v80, 0x3a83ca0c
v_madak_f32_e32 v82, v82, v80, 0xbc9ded90
v_madak_f32_e32 v83, v83, v80, 0x3d8eaf3b
v_madak_f32_e32 v80, v82, v80, 0xbf409397
v_mac_f32_e32 v80, v75, v83
v_rcp_f32_e32 v75, v81
v_cndmask_b32_e64 v38, 0, 1.0, vcc
v_mul_f32_e32 v38, v38, v78
v_mul_f32_e32 v77, v77, v78
v_mul_f32_e32 v75, v51, v75
v_mul_f32_e32 v75, v80, v75
v_mul_f32_e32 v38, v79, v38
v_mac_f32_e32 v75, v72, v77
v_mul_f32_e32 v72, v74, v38
v_mad_f32 v72, v46, v75, -v72
v_mul_f32_e32 v46, v75, v46
v_mad_f32 v38, v38, v74, -v46
v_mad_f32 v76, v30, v38, v76
v_mad_f32 v73, v26, v38, v73
v_mac_f32_e32 v71, v22, v38
v_mad_f32 v29, v30, v72, v29
v_mad_f32 v28, v26, v72, v28
v_mac_f32_e32 v27, v22, v72
BB5_62: ; %Flow1173
; in Loop: Header=BB5_7 Depth=1
s_or_b64 exec, exec, s[22:23]
BB5_63: ; in Loop: Header=BB5_7 Depth=1
s_or_b64 exec, exec, s[18:19]
v_lshrrev_b32_e32 v22, 11, v63
v_and_b32_e32 v22, 1, v22
v_cmp_eq_u32_e32 vcc, 1, v22
s_and_saveexec_b64 s[18:19], vcc
s_xor_b64 s[18:19], exec, s[18:19]
; mask branch BB5_67
s_cbranch_execz BB5_67
BB5_64: ; in Loop: Header=BB5_7 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[77:80], v53 offset0:48 offset1:49
v_cmp_ne_u32_e32 vcc, v60, v18
s_and_b64 s[22:23], exec, s[2:3]
s_or_b64 s[22:23], s[22:23], vcc
s_and_b64 s[36:37], exec, s[0:1]
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v26, v68, v78
s_or_b64 s[22:23], s[36:37], s[22:23]
v_subrev_f32_e32 v22, v67, v77
v_mul_f32_e32 v38, v26, v26
v_cndmask_b32_e64 v46, 0, 1.0, s[22:23]
v_subrev_f32_e32 v30, v69, v79
v_mac_f32_e32 v38, v22, v22
v_mac_f32_e32 v38, v30, v30
v_mul_f32_e32 v46, s5, v46
v_cmp_lt_f32_e32 vcc, v38, v46
s_and_saveexec_b64 s[22:23], vcc
s_xor_b64 s[22:23], exec, s[22:23]
; mask branch BB5_66
s_cbranch_execz BB5_66
BB5_65: ; in Loop: Header=BB5_7 Depth=1
v_lshrrev_b32_e32 v72, 11, v14
v_and_b32_e32 v72, 1, v72
v_max_f32_e32 v38, 0x34cd15ae, v38
v_cmp_eq_u32_e32 vcc, 1, v72
v_rsq_f32_e32 v72, v38
v_cndmask_b32_e64 v77, 0, 1.0, vcc
s_mov_b32 m0, -1
v_mul_f32_e32 v46, v70, v80
v_mul_f32_e32 v78, v72, v72
v_mul_f32_e32 v74, v78, v78
v_mul_f32_e32 v74, v77, v74
v_mul_f32_e32 v79, v78, v74
ds_read_b64 v[74:75], v54 offset:192
v_mov_b32_e32 v81, 0x3a92b707
v_mov_b32_e32 v82, 0x3c739487
v_mov_b32_e32 v83, 0x35c55945
v_cmp_gt_f32_e32 vcc, s24, v38
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v75, v66, v75
v_mul_f32_e32 v75, v79, v75
v_mad_f32 v74, v65, v74, -v75
v_mul_f32_e32 v75, v47, v38
v_mul_f32_e32 v80, v75, v75
v_madak_f32_e32 v81, v81, v80, 0x3ded3cb2
v_madak_f32_e32 v82, v82, v80, 0x3f01e2bc
v_mad_f32 v81, v81, v80, 1.0
v_mac_f32_e32 v81, v75, v82
v_mov_b32_e32 v82, 0xb2951928
v_madak_f32_e32 v82, v82, v80, 0xb85ffb93
v_madak_f32_e32 v83, v83, v80, 0x3a83ca0c
v_madak_f32_e32 v82, v82, v80, 0xbc9ded90
v_madak_f32_e32 v83, v83, v80, 0x3d8eaf3b
v_madak_f32_e32 v80, v82, v80, 0xbf409397
v_mac_f32_e32 v80, v75, v83
v_rcp_f32_e32 v75, v81
v_cndmask_b32_e64 v38, 0, 1.0, vcc
v_mul_f32_e32 v38, v38, v78
v_mul_f32_e32 v77, v77, v78
v_mul_f32_e32 v75, v51, v75
v_mul_f32_e32 v75, v80, v75
v_mul_f32_e32 v38, v79, v38
v_mac_f32_e32 v75, v72, v77
v_mul_f32_e32 v72, v74, v38
v_mad_f32 v72, v46, v75, -v72
v_mul_f32_e32 v46, v75, v46
v_mad_f32 v38, v38, v74, -v46
v_mad_f32 v76, v30, v38, v76
v_mad_f32 v73, v26, v38, v73
v_mac_f32_e32 v71, v22, v38
v_mad_f32 v25, v30, v72, v25
v_mad_f32 v24, v26, v72, v24
v_mac_f32_e32 v23, v22, v72
BB5_66: ; %Flow1172
; in Loop: Header=BB5_7 Depth=1
s_or_b64 exec, exec, s[22:23]
BB5_67: ; in Loop: Header=BB5_7 Depth=1
s_or_b64 exec, exec, s[18:19]
v_lshrrev_b32_e32 v22, 12, v63
v_and_b32_e32 v22, 1, v22
v_cmp_eq_u32_e32 vcc, 1, v22
s_and_saveexec_b64 s[18:19], vcc
s_xor_b64 s[18:19], exec, s[18:19]
; mask branch BB5_71
s_cbranch_execz BB5_71
BB5_68: ; in Loop: Header=BB5_7 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[77:80], v53 offset0:64 offset1:65
v_cmp_ne_u32_e32 vcc, v59, v18
s_and_b64 s[22:23], exec, s[2:3]
s_or_b64 s[22:23], s[22:23], vcc
s_and_b64 s[36:37], exec, s[0:1]
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v26, v68, v78
s_or_b64 s[22:23], s[36:37], s[22:23]
v_subrev_f32_e32 v22, v67, v77
v_mul_f32_e32 v38, v26, v26
v_cndmask_b32_e64 v46, 0, 1.0, s[22:23]
v_subrev_f32_e32 v30, v69, v79
v_mac_f32_e32 v38, v22, v22
v_mac_f32_e32 v38, v30, v30
v_mul_f32_e32 v46, s5, v46
v_cmp_lt_f32_e32 vcc, v38, v46
s_and_saveexec_b64 s[22:23], vcc
s_xor_b64 s[22:23], exec, s[22:23]
; mask branch BB5_70
s_cbranch_execz BB5_70
BB5_69: ; in Loop: Header=BB5_7 Depth=1
v_lshrrev_b32_e32 v72, 12, v14
v_and_b32_e32 v72, 1, v72
v_max_f32_e32 v38, 0x34cd15ae, v38
v_cmp_eq_u32_e32 vcc, 1, v72
v_rsq_f32_e32 v72, v38
v_cndmask_b32_e64 v77, 0, 1.0, vcc
s_mov_b32 m0, -1
v_mul_f32_e32 v46, v70, v80
v_mul_f32_e32 v78, v72, v72
v_mul_f32_e32 v74, v78, v78
v_mul_f32_e32 v74, v77, v74
v_mul_f32_e32 v79, v78, v74
ds_read_b64 v[74:75], v54 offset:256
v_mov_b32_e32 v81, 0x3a92b707
v_mov_b32_e32 v82, 0x3c739487
v_mov_b32_e32 v83, 0x35c55945
v_cmp_gt_f32_e32 vcc, s24, v38
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v75, v66, v75
v_mul_f32_e32 v75, v79, v75
v_mad_f32 v74, v65, v74, -v75
v_mul_f32_e32 v75, v47, v38
v_mul_f32_e32 v80, v75, v75
v_madak_f32_e32 v81, v81, v80, 0x3ded3cb2
v_madak_f32_e32 v82, v82, v80, 0x3f01e2bc
v_mad_f32 v81, v81, v80, 1.0
v_mac_f32_e32 v81, v75, v82
v_mov_b32_e32 v82, 0xb2951928
v_madak_f32_e32 v82, v82, v80, 0xb85ffb93
v_madak_f32_e32 v83, v83, v80, 0x3a83ca0c
v_madak_f32_e32 v82, v82, v80, 0xbc9ded90
v_madak_f32_e32 v83, v83, v80, 0x3d8eaf3b
v_madak_f32_e32 v80, v82, v80, 0xbf409397
v_mac_f32_e32 v80, v75, v83
v_rcp_f32_e32 v75, v81
v_cndmask_b32_e64 v38, 0, 1.0, vcc
v_mul_f32_e32 v38, v38, v78
v_mul_f32_e32 v77, v77, v78
v_mul_f32_e32 v75, v51, v75
v_mul_f32_e32 v75, v80, v75
v_mul_f32_e32 v38, v79, v38
v_mac_f32_e32 v75, v72, v77
v_mul_f32_e32 v72, v74, v38
v_mad_f32 v72, v46, v75, -v72
v_mul_f32_e32 v46, v75, v46
v_mad_f32 v38, v38, v74, -v46
v_mad_f32 v76, v30, v38, v76
v_mad_f32 v73, v26, v38, v73
v_mac_f32_e32 v71, v22, v38
v_mad_f32 v21, v30, v72, v21
v_mad_f32 v20, v26, v72, v20
v_mac_f32_e32 v19, v22, v72
BB5_70: ; %Flow1171
; in Loop: Header=BB5_7 Depth=1
s_or_b64 exec, exec, s[22:23]
BB5_71: ; in Loop: Header=BB5_7 Depth=1
s_or_b64 exec, exec, s[18:19]
v_lshrrev_b32_e32 v22, 13, v63
v_and_b32_e32 v22, 1, v22
v_cmp_eq_u32_e32 vcc, 1, v22
s_and_saveexec_b64 s[18:19], vcc
s_xor_b64 s[18:19], exec, s[18:19]
; mask branch BB5_75
s_cbranch_execz BB5_75
BB5_72: ; in Loop: Header=BB5_7 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[77:80], v53 offset0:80 offset1:81
v_cmp_ne_u32_e32 vcc, v58, v18
s_and_b64 s[22:23], exec, s[2:3]
s_or_b64 s[22:23], s[22:23], vcc
s_and_b64 s[36:37], exec, s[0:1]
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v26, v68, v78
s_or_b64 s[22:23], s[36:37], s[22:23]
v_subrev_f32_e32 v22, v67, v77
v_mul_f32_e32 v38, v26, v26
v_cndmask_b32_e64 v46, 0, 1.0, s[22:23]
v_subrev_f32_e32 v30, v69, v79
v_mac_f32_e32 v38, v22, v22
v_mac_f32_e32 v38, v30, v30
v_mul_f32_e32 v46, s5, v46
v_cmp_lt_f32_e32 vcc, v38, v46
s_and_saveexec_b64 s[22:23], vcc
s_xor_b64 s[22:23], exec, s[22:23]
; mask branch BB5_74
s_cbranch_execz BB5_74
BB5_73: ; in Loop: Header=BB5_7 Depth=1
v_lshrrev_b32_e32 v72, 13, v14
v_and_b32_e32 v72, 1, v72
v_max_f32_e32 v38, 0x34cd15ae, v38
v_cmp_eq_u32_e32 vcc, 1, v72
v_rsq_f32_e32 v72, v38
v_cndmask_b32_e64 v77, 0, 1.0, vcc
s_mov_b32 m0, -1
v_mul_f32_e32 v46, v70, v80
v_mul_f32_e32 v78, v72, v72
v_mul_f32_e32 v74, v78, v78
v_mul_f32_e32 v74, v77, v74
v_mul_f32_e32 v79, v78, v74
ds_read_b64 v[74:75], v54 offset:320
v_mov_b32_e32 v81, 0x3a92b707
v_mov_b32_e32 v82, 0x3c739487
v_mov_b32_e32 v83, 0x35c55945
v_cmp_gt_f32_e32 vcc, s24, v38
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v75, v66, v75
v_mul_f32_e32 v75, v79, v75
v_mad_f32 v74, v65, v74, -v75
v_mul_f32_e32 v75, v47, v38
v_mul_f32_e32 v80, v75, v75
v_madak_f32_e32 v81, v81, v80, 0x3ded3cb2
v_madak_f32_e32 v82, v82, v80, 0x3f01e2bc
v_mad_f32 v81, v81, v80, 1.0
v_mac_f32_e32 v81, v75, v82
v_mov_b32_e32 v82, 0xb2951928
v_madak_f32_e32 v82, v82, v80, 0xb85ffb93
v_madak_f32_e32 v83, v83, v80, 0x3a83ca0c
v_madak_f32_e32 v82, v82, v80, 0xbc9ded90
v_madak_f32_e32 v83, v83, v80, 0x3d8eaf3b
v_madak_f32_e32 v80, v82, v80, 0xbf409397
v_mac_f32_e32 v80, v75, v83
v_rcp_f32_e32 v75, v81
v_cndmask_b32_e64 v38, 0, 1.0, vcc
v_mul_f32_e32 v38, v38, v78
v_mul_f32_e32 v77, v77, v78
v_mul_f32_e32 v75, v51, v75
v_mul_f32_e32 v75, v80, v75
v_mul_f32_e32 v38, v79, v38
v_mac_f32_e32 v75, v72, v77
v_mul_f32_e32 v72, v74, v38
v_mad_f32 v72, v46, v75, -v72
v_mul_f32_e32 v46, v75, v46
v_mad_f32 v38, v38, v74, -v46
v_mad_f32 v76, v30, v38, v76
v_mad_f32 v73, v26, v38, v73
v_mac_f32_e32 v71, v22, v38
v_mad_f32 v17, v30, v72, v17
v_mad_f32 v16, v26, v72, v16
v_mac_f32_e32 v15, v22, v72
BB5_74: ; %Flow1170
; in Loop: Header=BB5_7 Depth=1
s_or_b64 exec, exec, s[22:23]
BB5_75: ; in Loop: Header=BB5_7 Depth=1
s_or_b64 exec, exec, s[18:19]
v_lshrrev_b32_e32 v22, 14, v63
v_and_b32_e32 v22, 1, v22
v_cmp_eq_u32_e32 vcc, 1, v22
s_and_saveexec_b64 s[18:19], vcc
s_xor_b64 s[18:19], exec, s[18:19]
; mask branch BB5_79
s_cbranch_execz BB5_79
BB5_76: ; in Loop: Header=BB5_7 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[77:80], v53 offset0:96 offset1:97
v_cmp_ne_u32_e32 vcc, v57, v18
s_and_b64 s[22:23], exec, s[2:3]
s_or_b64 s[22:23], s[22:23], vcc
s_and_b64 s[36:37], exec, s[0:1]
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v26, v68, v78
s_or_b64 s[22:23], s[36:37], s[22:23]
v_subrev_f32_e32 v22, v67, v77
v_mul_f32_e32 v38, v26, v26
v_cndmask_b32_e64 v46, 0, 1.0, s[22:23]
v_subrev_f32_e32 v30, v69, v79
v_mac_f32_e32 v38, v22, v22
v_mac_f32_e32 v38, v30, v30
v_mul_f32_e32 v46, s5, v46
v_cmp_lt_f32_e32 vcc, v38, v46
s_and_saveexec_b64 s[22:23], vcc
s_xor_b64 s[22:23], exec, s[22:23]
; mask branch BB5_78
s_cbranch_execz BB5_78
BB5_77: ; in Loop: Header=BB5_7 Depth=1
v_lshrrev_b32_e32 v72, 14, v14
v_and_b32_e32 v72, 1, v72
v_max_f32_e32 v38, 0x34cd15ae, v38
v_cmp_eq_u32_e32 vcc, 1, v72
v_rsq_f32_e32 v72, v38
v_cndmask_b32_e64 v77, 0, 1.0, vcc
s_mov_b32 m0, -1
v_mul_f32_e32 v46, v70, v80
v_mul_f32_e32 v78, v72, v72
v_mul_f32_e32 v74, v78, v78
v_mul_f32_e32 v74, v77, v74
v_mul_f32_e32 v79, v78, v74
ds_read_b64 v[74:75], v54 offset:384
v_mov_b32_e32 v81, 0x3a92b707
v_mov_b32_e32 v82, 0x3c739487
v_mov_b32_e32 v83, 0x35c55945
v_cmp_gt_f32_e32 vcc, s24, v38
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v75, v66, v75
v_mul_f32_e32 v75, v79, v75
v_mad_f32 v74, v65, v74, -v75
v_mul_f32_e32 v75, v47, v38
v_mul_f32_e32 v80, v75, v75
v_madak_f32_e32 v81, v81, v80, 0x3ded3cb2
v_madak_f32_e32 v82, v82, v80, 0x3f01e2bc
v_mad_f32 v81, v81, v80, 1.0
v_mac_f32_e32 v81, v75, v82
v_mov_b32_e32 v82, 0xb2951928
v_madak_f32_e32 v82, v82, v80, 0xb85ffb93
v_madak_f32_e32 v83, v83, v80, 0x3a83ca0c
v_madak_f32_e32 v82, v82, v80, 0xbc9ded90
v_madak_f32_e32 v83, v83, v80, 0x3d8eaf3b
v_madak_f32_e32 v80, v82, v80, 0xbf409397
v_mac_f32_e32 v80, v75, v83
v_rcp_f32_e32 v75, v81
v_cndmask_b32_e64 v38, 0, 1.0, vcc
v_mul_f32_e32 v38, v38, v78
v_mul_f32_e32 v77, v77, v78
v_mul_f32_e32 v75, v51, v75
v_mul_f32_e32 v75, v80, v75
v_mul_f32_e32 v38, v79, v38
v_mac_f32_e32 v75, v72, v77
v_mul_f32_e32 v72, v74, v38
v_mad_f32 v72, v46, v75, -v72
v_mul_f32_e32 v46, v75, v46
v_mad_f32 v38, v38, v74, -v46
v_mad_f32 v76, v30, v38, v76
v_mad_f32 v73, v26, v38, v73
v_mac_f32_e32 v71, v22, v38
v_mad_f32 v13, v30, v72, v13
v_mad_f32 v12, v26, v72, v12
v_mac_f32_e32 v11, v22, v72
BB5_78: ; %Flow1169
; in Loop: Header=BB5_7 Depth=1
s_or_b64 exec, exec, s[22:23]
BB5_79: ; in Loop: Header=BB5_7 Depth=1
s_or_b64 exec, exec, s[18:19]
v_lshrrev_b32_e32 v22, 15, v63
v_and_b32_e32 v22, 1, v22
v_cmp_eq_u32_e32 vcc, 1, v22
s_and_saveexec_b64 s[18:19], vcc
s_xor_b64 s[18:19], exec, s[18:19]
; mask branch BB5_83
s_cbranch_execz BB5_83
BB5_80: ; in Loop: Header=BB5_7 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[77:80], v53 offset0:112 offset1:113
v_cmp_ne_u32_e32 vcc, v33, v18
s_and_b64 s[22:23], exec, s[2:3]
s_or_b64 s[22:23], s[22:23], vcc
s_and_b64 s[36:37], exec, s[0:1]
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v22, v68, v78
s_or_b64 s[22:23], s[36:37], s[22:23]
v_subrev_f32_e32 v18, v67, v77
v_mul_f32_e32 v30, v22, v22
v_cndmask_b32_e64 v38, 0, 1.0, s[22:23]
v_subrev_f32_e32 v26, v69, v79
v_mac_f32_e32 v30, v18, v18
v_mac_f32_e32 v30, v26, v26
v_mul_f32_e32 v38, s5, v38
v_cmp_lt_f32_e32 vcc, v30, v38
s_and_saveexec_b64 s[22:23], vcc
s_xor_b64 s[22:23], exec, s[22:23]
; mask branch BB5_82
s_cbranch_execz BB5_82
BB5_81: ; in Loop: Header=BB5_7 Depth=1
v_lshrrev_b32_e32 v46, 15, v14
v_and_b32_e32 v46, 1, v46
v_max_f32_e32 v30, 0x34cd15ae, v30
v_cmp_eq_u32_e32 vcc, 1, v46
v_rsq_f32_e32 v46, v30
v_mul_f32_e32 v38, v70, v80
v_cndmask_b32_e64 v69, 0, 1.0, vcc
s_mov_b32 m0, -1
v_mul_f32_e32 v70, v46, v46
v_mul_f32_e32 v67, v70, v70
v_mul_f32_e32 v67, v69, v67
v_mul_f32_e32 v72, v70, v67
ds_read_b64 v[67:68], v54 offset:448
v_mov_b32_e32 v74, 0x3c739487
v_mov_b32_e32 v75, 0x35c55945
v_cmp_gt_f32_e32 vcc, s24, v30
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v66, v66, v68
v_mul_f32_e32 v66, v72, v66
v_mad_f32 v65, v65, v67, -v66
v_mul_f32_e32 v66, v47, v30
v_mul_f32_e32 v67, v66, v66
v_mov_b32_e32 v68, 0x3a92b707
v_madak_f32_e32 v68, v68, v67, 0x3ded3cb2
v_madak_f32_e32 v74, v74, v67, 0x3f01e2bc
v_mad_f32 v68, v68, v67, 1.0
v_mac_f32_e32 v68, v66, v74
v_mov_b32_e32 v74, 0xb2951928
v_madak_f32_e32 v74, v74, v67, 0xb85ffb93
v_madak_f32_e32 v75, v75, v67, 0x3a83ca0c
v_madak_f32_e32 v74, v74, v67, 0xbc9ded90
v_madak_f32_e32 v75, v75, v67, 0x3d8eaf3b
v_madak_f32_e32 v67, v74, v67, 0xbf409397
v_mac_f32_e32 v67, v66, v75
v_rcp_f32_e32 v66, v68
v_cndmask_b32_e64 v30, 0, 1.0, vcc
v_mul_f32_e32 v30, v30, v70
v_mul_f32_e32 v30, v72, v30
v_mul_f32_e32 v66, v51, v66
v_mul_f32_e32 v66, v67, v66
v_mul_f32_e32 v67, v69, v70
v_mac_f32_e32 v66, v46, v67
v_mul_f32_e32 v46, v65, v30
v_mad_f32 v46, v38, v66, -v46
v_mul_f32_e32 v38, v66, v38
v_mad_f32 v30, v30, v65, -v38
v_mad_f32 v76, v26, v30, v76
v_mad_f32 v73, v22, v30, v73
v_mac_f32_e32 v71, v18, v30
v_mad_f32 v10, v26, v46, v10
v_mad_f32 v9, v22, v46, v9
v_mac_f32_e32 v8, v18, v46
BB5_82: ; %Flow1168
; in Loop: Header=BB5_7 Depth=1
s_or_b64 exec, exec, s[22:23]
BB5_83: ; in Loop: Header=BB5_7 Depth=1
s_or_b64 exec, exec, s[18:19]
s_mov_b32 m0, -1
v_cmp_gt_i32_e32 vcc, 3, v2
ds_write_b32 v5, v71
ds_write_b32 v6, v73
ds_write_b32 v7, v76
s_and_saveexec_b64 s[18:19], vcc
s_xor_b64 s[18:19], exec, s[18:19]
s_waitcnt lgkmcnt(0)
; mask branch BB5_89
s_cbranch_execz BB5_89
BB5_84: ; in Loop: Header=BB5_7 Depth=1
v_lshlrev_b32_e32 v22, 6, v2
v_add_i32_e32 v18, vcc, v0, v22
v_lshlrev_b32_e32 v18, 2, v18
v_add_i32_e32 v26, vcc, s4, v18
s_mov_b32 m0, -1
ds_read_b32 v18, v26
v_add_i32_e32 v30, vcc, 8, v0
v_or_b32_e32 v38, 1, v0
v_cmp_lt_i32_e32 vcc, v38, v30
s_and_saveexec_b64 s[22:23], vcc
s_xor_b64 s[22:23], exec, s[22:23]
s_waitcnt lgkmcnt(0)
; mask branch BB5_86
s_cbranch_execz BB5_86
BB5_85: ; in Loop: Header=BB5_7 Depth=1
s_mov_b32 m0, -1
ds_read2_b32 v[65:66], v26 offset0:1 offset1:2
v_or_b32_e32 v30, 3, v0
v_add_i32_e32 v22, vcc, v30, v22
v_lshlrev_b32_e32 v22, 2, v22
ds_read2_b32 v[67:68], v26 offset0:3 offset1:4
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v18, v18, v65
v_add_i32_e32 v22, vcc, s4, v22
v_add_f32_e32 v18, v66, v18
ds_read2_b32 v[65:66], v22 offset0:2 offset1:3
ds_read_b32 v26, v26 offset:28
v_add_f32_e32 v18, v67, v18
v_add_f32_e32 v18, v68, v18
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v18, v65, v18
v_add_f32_e32 v18, v66, v18
v_add_f32_e32 v18, v26, v18
BB5_86: ; %._crit_edge.i72
; in Loop: Header=BB5_7 Depth=1
s_or_b64 exec, exec, s[22:23]
v_mul_lo_i32 v22, v64, 3
v_mov_b32_e32 v26, s13
s_mov_b64 s[36:37], s[12:13]
s_mov_b64 s[38:39], s[30:31]
v_add_i32_e32 v64, vcc, v22, v2
v_ashrrev_i32_e32 v65, 31, v64
v_lshl_b64 v[66:67], v[64:65], 2
v_add_i32_e32 v64, vcc, s12, v66
v_addc_u32_e32 v65, vcc, v67, v26, vcc
buffer_load_dword v67, v[66:67], s[36:39], 0 addr64
s_mov_b64 s[22:23], 0
s_waitcnt vmcnt(0)
BB5_87: ; Parent Loop BB5_7 Depth=1
; => This Inner Loop Header: Depth=2
v_add_f32_e32 v66, v18, v67
v_mov_b32_e32 v69, v67
v_mov_b32_e32 v68, v66
buffer_atomic_cmpswap v[68:69], v[64:65], s[28:31], 0 addr64 glc
v_mov_b32_e32 v22, -1
v_mov_b32_e32 v22, 0xf000
s_waitcnt vmcnt(0) expcnt(0)
v_cmp_eq_u32_e32 vcc, v68, v67
s_or_b64 s[22:23], vcc, s[22:23]
v_mov_b32_e32 v67, v68
s_andn2_b64 exec, exec, s[22:23]
s_cbranch_execnz BB5_87
; BB#88: ; %Flow1166
; in Loop: Header=BB5_7 Depth=1
s_or_b64 exec, exec, s[22:23]
BB5_89: ; %Flow1167
; in Loop: Header=BB5_7 Depth=1
s_or_b64 exec, exec, s[18:19]
BB5_90: ; %Flow1176
; in Loop: Header=BB5_7 Depth=1
s_or_b64 exec, exec, s[14:15]
v_and_b32_e32 v18, 0xff0000, v63
v_cmp_ne_u32_e32 vcc, 0, v18
s_and_saveexec_b64 s[14:15], vcc
s_xor_b64 s[14:15], exec, s[14:15]
; mask branch BB5_130
s_cbranch_execz BB5_130
BB5_91: ; in Loop: Header=BB5_7 Depth=1
s_mov_b32 m0, -1
ds_read_b32 v18, v52 offset:8
s_mov_b64 s[36:37], s[16:17]
s_mov_b64 s[38:39], s[30:31]
s_waitcnt lgkmcnt(0)
v_lshlrev_b32_e32 v22, 3, v18
v_add_i32_e32 v64, vcc, v22, v1
v_ashrrev_i32_e32 v65, 31, v64
v_lshl_b64 v[66:67], v[64:65], 4
buffer_load_dwordx4 v[67:70], v[66:67], s[36:39], 0 addr64
v_lshl_b64 v[71:72], v[64:65], 3
s_mov_b64 s[36:37], s[20:21]
buffer_load_dwordx2 v[65:66], v[71:72], s[36:39], 0 addr64
v_lshrrev_b32_e32 v22, 16, v63
v_mov_b32_e32 v71, 0
v_and_b32_e32 v22, 1, v22
v_cmp_eq_u32_e32 vcc, 1, v22
v_mov_b32_e32 v73, v71
v_mov_b32_e32 v76, v71
s_and_saveexec_b64 s[18:19], vcc
s_xor_b64 s[18:19], exec, s[18:19]
s_waitcnt vmcnt(0)
; mask branch BB5_95
s_cbranch_execz BB5_95
BB5_92: ; in Loop: Header=BB5_7 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[77:80], v53 offset1:1
v_cmp_ne_u32_e32 vcc, v40, v18
s_and_b64 s[22:23], exec, s[2:3]
s_or_b64 s[22:23], s[22:23], vcc
s_and_b64 s[36:37], exec, s[0:1]
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v22, v68, v78
s_or_b64 s[22:23], s[36:37], s[22:23]
v_subrev_f32_e32 v26, v67, v77
v_mul_f32_e32 v38, v22, v22
v_cndmask_b32_e64 v46, 0, 1.0, s[22:23]
v_subrev_f32_e32 v30, v69, v79
v_mac_f32_e32 v38, v26, v26
v_mov_b32_e32 v71, 0
v_mac_f32_e32 v38, v30, v30
v_mul_f32_e32 v46, s5, v46
v_cmp_lt_f32_e32 vcc, v38, v46
v_mov_b32_e32 v73, v71
v_mov_b32_e32 v76, v71
s_and_saveexec_b64 s[22:23], vcc
s_xor_b64 s[22:23], exec, s[22:23]
; mask branch BB5_94
s_cbranch_execz BB5_94
BB5_93: ; in Loop: Header=BB5_7 Depth=1
v_max_f32_e32 v38, 0x34cd15ae, v38
v_rsq_f32_e32 v74, v38
v_lshrrev_b32_e32 v71, 16, v14
v_and_b32_e32 v71, 1, v71
v_cmp_eq_u32_e32 vcc, 1, v71
v_mul_f32_e32 v78, v74, v74
v_cndmask_b32_e64 v73, 0, 1.0, vcc
v_mul_f32_e32 v71, v78, v78
v_mul_f32_e32 v71, v73, v71
s_mov_b32 m0, -1
v_mul_f32_e32 v79, v78, v71
ds_read_b64 v[71:72], v54
v_mul_f32_e32 v75, v47, v38
v_mul_f32_e32 v76, v75, v75
v_mov_b32_e32 v77, 0x3a92b707
v_mul_f32_e32 v46, v70, v80
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v72, v66, v72
v_mul_f32_e32 v72, v79, v72
v_mad_f32 v71, v65, v71, -v72
v_madak_f32_e32 v72, v77, v76, 0x3ded3cb2
v_mov_b32_e32 v77, 0x3c739487
v_madak_f32_e32 v77, v77, v76, 0x3f01e2bc
v_mad_f32 v72, v72, v76, 1.0
v_mac_f32_e32 v72, v75, v77
v_mov_b32_e32 v77, 0xb2951928
v_rcp_f32_e32 v72, v72
v_madak_f32_e32 v77, v77, v76, 0xb85ffb93
v_mov_b32_e32 v80, 0x35c55945
v_madak_f32_e32 v80, v80, v76, 0x3a83ca0c
v_cmp_gt_f32_e32 vcc, s24, v38
v_madak_f32_e32 v77, v77, v76, 0xbc9ded90
v_cndmask_b32_e64 v38, 0, 1.0, vcc
v_madak_f32_e32 v80, v80, v76, 0x3d8eaf3b
v_madak_f32_e32 v76, v77, v76, 0xbf409397
v_mul_f32_e32 v38, v38, v78
v_mac_f32_e32 v76, v75, v80
v_mul_f32_e32 v72, v51, v72
v_mul_f32_e32 v72, v76, v72
v_mul_f32_e32 v73, v73, v78
v_mul_f32_e32 v38, v79, v38
v_mac_f32_e32 v72, v74, v73
v_mul_f32_e32 v73, v71, v38
v_mad_f32 v74, v46, v72, -v73
v_mul_f32_e32 v46, v72, v46
v_mad_f32 v38, v38, v71, -v46
v_mad_f32 v45, v38, -v30, v45
v_mad_f32 v44, v38, -v22, v44
v_mad_f32 v43, v38, -v26, v43
v_mul_f32_e64 v76, v74, -v30
v_mul_f32_e64 v73, v74, -v22
v_mul_f32_e64 v71, v74, -v26
BB5_94: ; %Flow1164
; in Loop: Header=BB5_7 Depth=1
s_or_b64 exec, exec, s[22:23]
BB5_95: ; in Loop: Header=BB5_7 Depth=1
s_or_b64 exec, exec, s[18:19]
v_lshrrev_b32_e32 v22, 17, v63
v_and_b32_e32 v22, 1, v22
v_cmp_eq_u32_e32 vcc, 1, v22
s_and_saveexec_b64 s[18:19], vcc
s_xor_b64 s[18:19], exec, s[18:19]
; mask branch BB5_99
s_cbranch_execz BB5_99
BB5_96: ; in Loop: Header=BB5_7 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[77:80], v53 offset0:16 offset1:17
v_cmp_ne_u32_e32 vcc, v62, v18
s_and_b64 s[22:23], exec, s[2:3]
s_or_b64 s[22:23], s[22:23], vcc
s_and_b64 s[36:37], exec, s[0:1]
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v26, v68, v78
s_or_b64 s[22:23], s[36:37], s[22:23]
v_subrev_f32_e32 v22, v67, v77
v_mul_f32_e32 v38, v26, v26
v_cndmask_b32_e64 v46, 0, 1.0, s[22:23]
v_subrev_f32_e32 v30, v69, v79
v_mac_f32_e32 v38, v22, v22
v_mac_f32_e32 v38, v30, v30
v_mul_f32_e32 v46, s5, v46
v_cmp_lt_f32_e32 vcc, v38, v46
s_and_saveexec_b64 s[22:23], vcc
s_xor_b64 s[22:23], exec, s[22:23]
; mask branch BB5_98
s_cbranch_execz BB5_98
BB5_97: ; in Loop: Header=BB5_7 Depth=1
v_lshrrev_b32_e32 v72, 17, v14
v_and_b32_e32 v72, 1, v72
v_max_f32_e32 v38, 0x34cd15ae, v38
v_cmp_eq_u32_e32 vcc, 1, v72
v_rsq_f32_e32 v72, v38
v_cndmask_b32_e64 v77, 0, 1.0, vcc
s_mov_b32 m0, -1
v_mul_f32_e32 v46, v70, v80
v_mul_f32_e32 v78, v72, v72
v_mul_f32_e32 v74, v78, v78
v_mul_f32_e32 v74, v77, v74
v_mul_f32_e32 v79, v78, v74
ds_read_b64 v[74:75], v54 offset:64
v_mov_b32_e32 v81, 0x3a92b707
v_mov_b32_e32 v82, 0x3c739487
v_mov_b32_e32 v83, 0x35c55945
v_cmp_gt_f32_e32 vcc, s24, v38
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v75, v66, v75
v_mul_f32_e32 v75, v79, v75
v_mad_f32 v74, v65, v74, -v75
v_mul_f32_e32 v75, v47, v38
v_mul_f32_e32 v80, v75, v75
v_madak_f32_e32 v81, v81, v80, 0x3ded3cb2
v_madak_f32_e32 v82, v82, v80, 0x3f01e2bc
v_mad_f32 v81, v81, v80, 1.0
v_mac_f32_e32 v81, v75, v82
v_mov_b32_e32 v82, 0xb2951928
v_madak_f32_e32 v82, v82, v80, 0xb85ffb93
v_madak_f32_e32 v83, v83, v80, 0x3a83ca0c
v_madak_f32_e32 v82, v82, v80, 0xbc9ded90
v_madak_f32_e32 v83, v83, v80, 0x3d8eaf3b
v_madak_f32_e32 v80, v82, v80, 0xbf409397
v_mac_f32_e32 v80, v75, v83
v_rcp_f32_e32 v75, v81
v_cndmask_b32_e64 v38, 0, 1.0, vcc
v_mul_f32_e32 v38, v38, v78
v_mul_f32_e32 v77, v77, v78
v_mul_f32_e32 v75, v51, v75
v_mul_f32_e32 v75, v80, v75
v_mul_f32_e32 v38, v79, v38
v_mac_f32_e32 v75, v72, v77
v_mul_f32_e32 v72, v74, v38
v_mad_f32 v72, v46, v75, -v72
v_mul_f32_e32 v46, v75, v46
v_mad_f32 v38, v38, v74, -v46
v_mad_f32 v76, v30, v38, v76
v_mad_f32 v73, v26, v38, v73
v_mac_f32_e32 v71, v22, v38
v_mad_f32 v37, v30, v72, v37
v_mad_f32 v36, v26, v72, v36
v_mac_f32_e32 v35, v22, v72
BB5_98: ; %Flow1163
; in Loop: Header=BB5_7 Depth=1
s_or_b64 exec, exec, s[22:23]
BB5_99: ; in Loop: Header=BB5_7 Depth=1
s_or_b64 exec, exec, s[18:19]
v_lshrrev_b32_e32 v22, 18, v63
v_and_b32_e32 v22, 1, v22
v_cmp_eq_u32_e32 vcc, 1, v22
s_and_saveexec_b64 s[18:19], vcc
s_xor_b64 s[18:19], exec, s[18:19]
; mask branch BB5_103
s_cbranch_execz BB5_103
BB5_100: ; in Loop: Header=BB5_7 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[77:80], v53 offset0:32 offset1:33
v_cmp_ne_u32_e32 vcc, v61, v18
s_and_b64 s[22:23], exec, s[2:3]
s_or_b64 s[22:23], s[22:23], vcc
s_and_b64 s[36:37], exec, s[0:1]
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v26, v68, v78
s_or_b64 s[22:23], s[36:37], s[22:23]
v_subrev_f32_e32 v22, v67, v77
v_mul_f32_e32 v38, v26, v26
v_cndmask_b32_e64 v46, 0, 1.0, s[22:23]
v_subrev_f32_e32 v30, v69, v79
v_mac_f32_e32 v38, v22, v22
v_mac_f32_e32 v38, v30, v30
v_mul_f32_e32 v46, s5, v46
v_cmp_lt_f32_e32 vcc, v38, v46
s_and_saveexec_b64 s[22:23], vcc
s_xor_b64 s[22:23], exec, s[22:23]
; mask branch BB5_102
s_cbranch_execz BB5_102
BB5_101: ; in Loop: Header=BB5_7 Depth=1
v_lshrrev_b32_e32 v72, 18, v14
v_and_b32_e32 v72, 1, v72
v_max_f32_e32 v38, 0x34cd15ae, v38
v_cmp_eq_u32_e32 vcc, 1, v72
v_rsq_f32_e32 v72, v38
v_cndmask_b32_e64 v77, 0, 1.0, vcc
s_mov_b32 m0, -1
v_mul_f32_e32 v46, v70, v80
v_mul_f32_e32 v78, v72, v72
v_mul_f32_e32 v74, v78, v78
v_mul_f32_e32 v74, v77, v74
v_mul_f32_e32 v79, v78, v74
ds_read_b64 v[74:75], v54 offset:128
v_mov_b32_e32 v81, 0x3a92b707
v_mov_b32_e32 v82, 0x3c739487
v_mov_b32_e32 v83, 0x35c55945
v_cmp_gt_f32_e32 vcc, s24, v38
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v75, v66, v75
v_mul_f32_e32 v75, v79, v75
v_mad_f32 v74, v65, v74, -v75
v_mul_f32_e32 v75, v47, v38
v_mul_f32_e32 v80, v75, v75
v_madak_f32_e32 v81, v81, v80, 0x3ded3cb2
v_madak_f32_e32 v82, v82, v80, 0x3f01e2bc
v_mad_f32 v81, v81, v80, 1.0
v_mac_f32_e32 v81, v75, v82
v_mov_b32_e32 v82, 0xb2951928
v_madak_f32_e32 v82, v82, v80, 0xb85ffb93
v_madak_f32_e32 v83, v83, v80, 0x3a83ca0c
v_madak_f32_e32 v82, v82, v80, 0xbc9ded90
v_madak_f32_e32 v83, v83, v80, 0x3d8eaf3b
v_madak_f32_e32 v80, v82, v80, 0xbf409397
v_mac_f32_e32 v80, v75, v83
v_rcp_f32_e32 v75, v81
v_cndmask_b32_e64 v38, 0, 1.0, vcc
v_mul_f32_e32 v38, v38, v78
v_mul_f32_e32 v77, v77, v78
v_mul_f32_e32 v75, v51, v75
v_mul_f32_e32 v75, v80, v75
v_mul_f32_e32 v38, v79, v38
v_mac_f32_e32 v75, v72, v77
v_mul_f32_e32 v72, v74, v38
v_mad_f32 v72, v46, v75, -v72
v_mul_f32_e32 v46, v75, v46
v_mad_f32 v38, v38, v74, -v46
v_mad_f32 v76, v30, v38, v76
v_mad_f32 v73, v26, v38, v73
v_mac_f32_e32 v71, v22, v38
v_mad_f32 v29, v30, v72, v29
v_mad_f32 v28, v26, v72, v28
v_mac_f32_e32 v27, v22, v72
BB5_102: ; %Flow1162
; in Loop: Header=BB5_7 Depth=1
s_or_b64 exec, exec, s[22:23]
BB5_103: ; in Loop: Header=BB5_7 Depth=1
s_or_b64 exec, exec, s[18:19]
v_lshrrev_b32_e32 v22, 19, v63
v_and_b32_e32 v22, 1, v22
v_cmp_eq_u32_e32 vcc, 1, v22
s_and_saveexec_b64 s[18:19], vcc
s_xor_b64 s[18:19], exec, s[18:19]
; mask branch BB5_107
s_cbranch_execz BB5_107
BB5_104: ; in Loop: Header=BB5_7 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[77:80], v53 offset0:48 offset1:49
v_cmp_ne_u32_e32 vcc, v60, v18
s_and_b64 s[22:23], exec, s[2:3]
s_or_b64 s[22:23], s[22:23], vcc
s_and_b64 s[36:37], exec, s[0:1]
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v26, v68, v78
s_or_b64 s[22:23], s[36:37], s[22:23]
v_subrev_f32_e32 v22, v67, v77
v_mul_f32_e32 v38, v26, v26
v_cndmask_b32_e64 v46, 0, 1.0, s[22:23]
v_subrev_f32_e32 v30, v69, v79
v_mac_f32_e32 v38, v22, v22
v_mac_f32_e32 v38, v30, v30
v_mul_f32_e32 v46, s5, v46
v_cmp_lt_f32_e32 vcc, v38, v46
s_and_saveexec_b64 s[22:23], vcc
s_xor_b64 s[22:23], exec, s[22:23]
; mask branch BB5_106
s_cbranch_execz BB5_106
BB5_105: ; in Loop: Header=BB5_7 Depth=1
v_lshrrev_b32_e32 v72, 19, v14
v_and_b32_e32 v72, 1, v72
v_max_f32_e32 v38, 0x34cd15ae, v38
v_cmp_eq_u32_e32 vcc, 1, v72
v_rsq_f32_e32 v72, v38
v_cndmask_b32_e64 v77, 0, 1.0, vcc
s_mov_b32 m0, -1
v_mul_f32_e32 v46, v70, v80
v_mul_f32_e32 v78, v72, v72
v_mul_f32_e32 v74, v78, v78
v_mul_f32_e32 v74, v77, v74
v_mul_f32_e32 v79, v78, v74
ds_read_b64 v[74:75], v54 offset:192
v_mov_b32_e32 v81, 0x3a92b707
v_mov_b32_e32 v82, 0x3c739487
v_mov_b32_e32 v83, 0x35c55945
v_cmp_gt_f32_e32 vcc, s24, v38
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v75, v66, v75
v_mul_f32_e32 v75, v79, v75
v_mad_f32 v74, v65, v74, -v75
v_mul_f32_e32 v75, v47, v38
v_mul_f32_e32 v80, v75, v75
v_madak_f32_e32 v81, v81, v80, 0x3ded3cb2
v_madak_f32_e32 v82, v82, v80, 0x3f01e2bc
v_mad_f32 v81, v81, v80, 1.0
v_mac_f32_e32 v81, v75, v82
v_mov_b32_e32 v82, 0xb2951928
v_madak_f32_e32 v82, v82, v80, 0xb85ffb93
v_madak_f32_e32 v83, v83, v80, 0x3a83ca0c
v_madak_f32_e32 v82, v82, v80, 0xbc9ded90
v_madak_f32_e32 v83, v83, v80, 0x3d8eaf3b
v_madak_f32_e32 v80, v82, v80, 0xbf409397
v_mac_f32_e32 v80, v75, v83
v_rcp_f32_e32 v75, v81
v_cndmask_b32_e64 v38, 0, 1.0, vcc
v_mul_f32_e32 v38, v38, v78
v_mul_f32_e32 v77, v77, v78
v_mul_f32_e32 v75, v51, v75
v_mul_f32_e32 v75, v80, v75
v_mul_f32_e32 v38, v79, v38
v_mac_f32_e32 v75, v72, v77
v_mul_f32_e32 v72, v74, v38
v_mad_f32 v72, v46, v75, -v72
v_mul_f32_e32 v46, v75, v46
v_mad_f32 v38, v38, v74, -v46
v_mad_f32 v76, v30, v38, v76
v_mad_f32 v73, v26, v38, v73
v_mac_f32_e32 v71, v22, v38
v_mad_f32 v25, v30, v72, v25
v_mad_f32 v24, v26, v72, v24
v_mac_f32_e32 v23, v22, v72
BB5_106: ; %Flow1161
; in Loop: Header=BB5_7 Depth=1
s_or_b64 exec, exec, s[22:23]
BB5_107: ; in Loop: Header=BB5_7 Depth=1
s_or_b64 exec, exec, s[18:19]
v_lshrrev_b32_e32 v22, 20, v63
v_and_b32_e32 v22, 1, v22
v_cmp_eq_u32_e32 vcc, 1, v22
s_and_saveexec_b64 s[18:19], vcc
s_xor_b64 s[18:19], exec, s[18:19]
; mask branch BB5_111
s_cbranch_execz BB5_111
BB5_108: ; in Loop: Header=BB5_7 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[77:80], v53 offset0:64 offset1:65
v_cmp_ne_u32_e32 vcc, v59, v18
s_and_b64 s[22:23], exec, s[2:3]
s_or_b64 s[22:23], s[22:23], vcc
s_and_b64 s[36:37], exec, s[0:1]
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v26, v68, v78
s_or_b64 s[22:23], s[36:37], s[22:23]
v_subrev_f32_e32 v22, v67, v77
v_mul_f32_e32 v38, v26, v26
v_cndmask_b32_e64 v46, 0, 1.0, s[22:23]
v_subrev_f32_e32 v30, v69, v79
v_mac_f32_e32 v38, v22, v22
v_mac_f32_e32 v38, v30, v30
v_mul_f32_e32 v46, s5, v46
v_cmp_lt_f32_e32 vcc, v38, v46
s_and_saveexec_b64 s[22:23], vcc
s_xor_b64 s[22:23], exec, s[22:23]
; mask branch BB5_110
s_cbranch_execz BB5_110
BB5_109: ; in Loop: Header=BB5_7 Depth=1
v_lshrrev_b32_e32 v72, 20, v14
v_and_b32_e32 v72, 1, v72
v_max_f32_e32 v38, 0x34cd15ae, v38
v_cmp_eq_u32_e32 vcc, 1, v72
v_rsq_f32_e32 v72, v38
v_cndmask_b32_e64 v77, 0, 1.0, vcc
s_mov_b32 m0, -1
v_mul_f32_e32 v46, v70, v80
v_mul_f32_e32 v78, v72, v72
v_mul_f32_e32 v74, v78, v78
v_mul_f32_e32 v74, v77, v74
v_mul_f32_e32 v79, v78, v74
ds_read_b64 v[74:75], v54 offset:256
v_mov_b32_e32 v81, 0x3a92b707
v_mov_b32_e32 v82, 0x3c739487
v_mov_b32_e32 v83, 0x35c55945
v_cmp_gt_f32_e32 vcc, s24, v38
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v75, v66, v75
v_mul_f32_e32 v75, v79, v75
v_mad_f32 v74, v65, v74, -v75
v_mul_f32_e32 v75, v47, v38
v_mul_f32_e32 v80, v75, v75
v_madak_f32_e32 v81, v81, v80, 0x3ded3cb2
v_madak_f32_e32 v82, v82, v80, 0x3f01e2bc
v_mad_f32 v81, v81, v80, 1.0
v_mac_f32_e32 v81, v75, v82
v_mov_b32_e32 v82, 0xb2951928
v_madak_f32_e32 v82, v82, v80, 0xb85ffb93
v_madak_f32_e32 v83, v83, v80, 0x3a83ca0c
v_madak_f32_e32 v82, v82, v80, 0xbc9ded90
v_madak_f32_e32 v83, v83, v80, 0x3d8eaf3b
v_madak_f32_e32 v80, v82, v80, 0xbf409397
v_mac_f32_e32 v80, v75, v83
v_rcp_f32_e32 v75, v81
v_cndmask_b32_e64 v38, 0, 1.0, vcc
v_mul_f32_e32 v38, v38, v78
v_mul_f32_e32 v77, v77, v78
v_mul_f32_e32 v75, v51, v75
v_mul_f32_e32 v75, v80, v75
v_mul_f32_e32 v38, v79, v38
v_mac_f32_e32 v75, v72, v77
v_mul_f32_e32 v72, v74, v38
v_mad_f32 v72, v46, v75, -v72
v_mul_f32_e32 v46, v75, v46
v_mad_f32 v38, v38, v74, -v46
v_mad_f32 v76, v30, v38, v76
v_mad_f32 v73, v26, v38, v73
v_mac_f32_e32 v71, v22, v38
v_mad_f32 v21, v30, v72, v21
v_mad_f32 v20, v26, v72, v20
v_mac_f32_e32 v19, v22, v72
BB5_110: ; %Flow1160
; in Loop: Header=BB5_7 Depth=1
s_or_b64 exec, exec, s[22:23]
BB5_111: ; in Loop: Header=BB5_7 Depth=1
s_or_b64 exec, exec, s[18:19]
v_lshrrev_b32_e32 v22, 21, v63
v_and_b32_e32 v22, 1, v22
v_cmp_eq_u32_e32 vcc, 1, v22
s_and_saveexec_b64 s[18:19], vcc
s_xor_b64 s[18:19], exec, s[18:19]
; mask branch BB5_115
s_cbranch_execz BB5_115
BB5_112: ; in Loop: Header=BB5_7 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[77:80], v53 offset0:80 offset1:81
v_cmp_ne_u32_e32 vcc, v58, v18
s_and_b64 s[22:23], exec, s[2:3]
s_or_b64 s[22:23], s[22:23], vcc
s_and_b64 s[36:37], exec, s[0:1]
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v26, v68, v78
s_or_b64 s[22:23], s[36:37], s[22:23]
v_subrev_f32_e32 v22, v67, v77
v_mul_f32_e32 v38, v26, v26
v_cndmask_b32_e64 v46, 0, 1.0, s[22:23]
v_subrev_f32_e32 v30, v69, v79
v_mac_f32_e32 v38, v22, v22
v_mac_f32_e32 v38, v30, v30
v_mul_f32_e32 v46, s5, v46
v_cmp_lt_f32_e32 vcc, v38, v46
s_and_saveexec_b64 s[22:23], vcc
s_xor_b64 s[22:23], exec, s[22:23]
; mask branch BB5_114
s_cbranch_execz BB5_114
BB5_113: ; in Loop: Header=BB5_7 Depth=1
v_lshrrev_b32_e32 v72, 21, v14
v_and_b32_e32 v72, 1, v72
v_max_f32_e32 v38, 0x34cd15ae, v38
v_cmp_eq_u32_e32 vcc, 1, v72
v_rsq_f32_e32 v72, v38
v_cndmask_b32_e64 v77, 0, 1.0, vcc
s_mov_b32 m0, -1
v_mul_f32_e32 v46, v70, v80
v_mul_f32_e32 v78, v72, v72
v_mul_f32_e32 v74, v78, v78
v_mul_f32_e32 v74, v77, v74
v_mul_f32_e32 v79, v78, v74
ds_read_b64 v[74:75], v54 offset:320
v_mov_b32_e32 v81, 0x3a92b707
v_mov_b32_e32 v82, 0x3c739487
v_mov_b32_e32 v83, 0x35c55945
v_cmp_gt_f32_e32 vcc, s24, v38
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v75, v66, v75
v_mul_f32_e32 v75, v79, v75
v_mad_f32 v74, v65, v74, -v75
v_mul_f32_e32 v75, v47, v38
v_mul_f32_e32 v80, v75, v75
v_madak_f32_e32 v81, v81, v80, 0x3ded3cb2
v_madak_f32_e32 v82, v82, v80, 0x3f01e2bc
v_mad_f32 v81, v81, v80, 1.0
v_mac_f32_e32 v81, v75, v82
v_mov_b32_e32 v82, 0xb2951928
v_madak_f32_e32 v82, v82, v80, 0xb85ffb93
v_madak_f32_e32 v83, v83, v80, 0x3a83ca0c
v_madak_f32_e32 v82, v82, v80, 0xbc9ded90
v_madak_f32_e32 v83, v83, v80, 0x3d8eaf3b
v_madak_f32_e32 v80, v82, v80, 0xbf409397
v_mac_f32_e32 v80, v75, v83
v_rcp_f32_e32 v75, v81
v_cndmask_b32_e64 v38, 0, 1.0, vcc
v_mul_f32_e32 v38, v38, v78
v_mul_f32_e32 v77, v77, v78
v_mul_f32_e32 v75, v51, v75
v_mul_f32_e32 v75, v80, v75
v_mul_f32_e32 v38, v79, v38
v_mac_f32_e32 v75, v72, v77
v_mul_f32_e32 v72, v74, v38
v_mad_f32 v72, v46, v75, -v72
v_mul_f32_e32 v46, v75, v46
v_mad_f32 v38, v38, v74, -v46
v_mad_f32 v76, v30, v38, v76
v_mad_f32 v73, v26, v38, v73
v_mac_f32_e32 v71, v22, v38
v_mad_f32 v17, v30, v72, v17
v_mad_f32 v16, v26, v72, v16
v_mac_f32_e32 v15, v22, v72
BB5_114: ; %Flow1159
; in Loop: Header=BB5_7 Depth=1
s_or_b64 exec, exec, s[22:23]
BB5_115: ; in Loop: Header=BB5_7 Depth=1
s_or_b64 exec, exec, s[18:19]
v_lshrrev_b32_e32 v22, 22, v63
v_and_b32_e32 v22, 1, v22
v_cmp_eq_u32_e32 vcc, 1, v22
s_and_saveexec_b64 s[18:19], vcc
s_xor_b64 s[18:19], exec, s[18:19]
; mask branch BB5_119
s_cbranch_execz BB5_119
BB5_116: ; in Loop: Header=BB5_7 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[77:80], v53 offset0:96 offset1:97
v_cmp_ne_u32_e32 vcc, v57, v18
s_and_b64 s[22:23], exec, s[2:3]
s_or_b64 s[22:23], s[22:23], vcc
s_and_b64 s[36:37], exec, s[0:1]
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v26, v68, v78
s_or_b64 s[22:23], s[36:37], s[22:23]
v_subrev_f32_e32 v22, v67, v77
v_mul_f32_e32 v38, v26, v26
v_cndmask_b32_e64 v46, 0, 1.0, s[22:23]
v_subrev_f32_e32 v30, v69, v79
v_mac_f32_e32 v38, v22, v22
v_mac_f32_e32 v38, v30, v30
v_mul_f32_e32 v46, s5, v46
v_cmp_lt_f32_e32 vcc, v38, v46
s_and_saveexec_b64 s[22:23], vcc
s_xor_b64 s[22:23], exec, s[22:23]
; mask branch BB5_118
s_cbranch_execz BB5_118
BB5_117: ; in Loop: Header=BB5_7 Depth=1
v_lshrrev_b32_e32 v72, 22, v14
v_and_b32_e32 v72, 1, v72
v_max_f32_e32 v38, 0x34cd15ae, v38
v_cmp_eq_u32_e32 vcc, 1, v72
v_rsq_f32_e32 v72, v38
v_cndmask_b32_e64 v77, 0, 1.0, vcc
s_mov_b32 m0, -1
v_mul_f32_e32 v46, v70, v80
v_mul_f32_e32 v78, v72, v72
v_mul_f32_e32 v74, v78, v78
v_mul_f32_e32 v74, v77, v74
v_mul_f32_e32 v79, v78, v74
ds_read_b64 v[74:75], v54 offset:384
v_mov_b32_e32 v81, 0x3a92b707
v_mov_b32_e32 v82, 0x3c739487
v_mov_b32_e32 v83, 0x35c55945
v_cmp_gt_f32_e32 vcc, s24, v38
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v75, v66, v75
v_mul_f32_e32 v75, v79, v75
v_mad_f32 v74, v65, v74, -v75
v_mul_f32_e32 v75, v47, v38
v_mul_f32_e32 v80, v75, v75
v_madak_f32_e32 v81, v81, v80, 0x3ded3cb2
v_madak_f32_e32 v82, v82, v80, 0x3f01e2bc
v_mad_f32 v81, v81, v80, 1.0
v_mac_f32_e32 v81, v75, v82
v_mov_b32_e32 v82, 0xb2951928
v_madak_f32_e32 v82, v82, v80, 0xb85ffb93
v_madak_f32_e32 v83, v83, v80, 0x3a83ca0c
v_madak_f32_e32 v82, v82, v80, 0xbc9ded90
v_madak_f32_e32 v83, v83, v80, 0x3d8eaf3b
v_madak_f32_e32 v80, v82, v80, 0xbf409397
v_mac_f32_e32 v80, v75, v83
v_rcp_f32_e32 v75, v81
v_cndmask_b32_e64 v38, 0, 1.0, vcc
v_mul_f32_e32 v38, v38, v78
v_mul_f32_e32 v77, v77, v78
v_mul_f32_e32 v75, v51, v75
v_mul_f32_e32 v75, v80, v75
v_mul_f32_e32 v38, v79, v38
v_mac_f32_e32 v75, v72, v77
v_mul_f32_e32 v72, v74, v38
v_mad_f32 v72, v46, v75, -v72
v_mul_f32_e32 v46, v75, v46
v_mad_f32 v38, v38, v74, -v46
v_mad_f32 v76, v30, v38, v76
v_mad_f32 v73, v26, v38, v73
v_mac_f32_e32 v71, v22, v38
v_mad_f32 v13, v30, v72, v13
v_mad_f32 v12, v26, v72, v12
v_mac_f32_e32 v11, v22, v72
BB5_118: ; %Flow1158
; in Loop: Header=BB5_7 Depth=1
s_or_b64 exec, exec, s[22:23]
BB5_119: ; in Loop: Header=BB5_7 Depth=1
s_or_b64 exec, exec, s[18:19]
v_lshrrev_b32_e32 v22, 23, v63
v_and_b32_e32 v22, 1, v22
v_cmp_eq_u32_e32 vcc, 1, v22
s_and_saveexec_b64 s[18:19], vcc
s_xor_b64 s[18:19], exec, s[18:19]
; mask branch BB5_123
s_cbranch_execz BB5_123
BB5_120: ; in Loop: Header=BB5_7 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[77:80], v53 offset0:112 offset1:113
v_cmp_ne_u32_e32 vcc, v33, v18
s_and_b64 s[22:23], exec, s[2:3]
s_or_b64 s[22:23], s[22:23], vcc
s_and_b64 s[36:37], exec, s[0:1]
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v22, v68, v78
s_or_b64 s[22:23], s[36:37], s[22:23]
v_subrev_f32_e32 v18, v67, v77
v_mul_f32_e32 v30, v22, v22
v_cndmask_b32_e64 v38, 0, 1.0, s[22:23]
v_subrev_f32_e32 v26, v69, v79
v_mac_f32_e32 v30, v18, v18
v_mac_f32_e32 v30, v26, v26
v_mul_f32_e32 v38, s5, v38
v_cmp_lt_f32_e32 vcc, v30, v38
s_and_saveexec_b64 s[22:23], vcc
s_xor_b64 s[22:23], exec, s[22:23]
; mask branch BB5_122
s_cbranch_execz BB5_122
BB5_121: ; in Loop: Header=BB5_7 Depth=1
v_lshrrev_b32_e32 v46, 23, v14
v_and_b32_e32 v46, 1, v46
v_max_f32_e32 v30, 0x34cd15ae, v30
v_cmp_eq_u32_e32 vcc, 1, v46
v_rsq_f32_e32 v46, v30
v_mul_f32_e32 v38, v70, v80
v_cndmask_b32_e64 v69, 0, 1.0, vcc
s_mov_b32 m0, -1
v_mul_f32_e32 v70, v46, v46
v_mul_f32_e32 v67, v70, v70
v_mul_f32_e32 v67, v69, v67
v_mul_f32_e32 v72, v70, v67
ds_read_b64 v[67:68], v54 offset:448
v_mov_b32_e32 v74, 0x3c739487
v_mov_b32_e32 v75, 0x35c55945
v_cmp_gt_f32_e32 vcc, s24, v30
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v66, v66, v68
v_mul_f32_e32 v66, v72, v66
v_mad_f32 v65, v65, v67, -v66
v_mul_f32_e32 v66, v47, v30
v_mul_f32_e32 v67, v66, v66
v_mov_b32_e32 v68, 0x3a92b707
v_madak_f32_e32 v68, v68, v67, 0x3ded3cb2
v_madak_f32_e32 v74, v74, v67, 0x3f01e2bc
v_mad_f32 v68, v68, v67, 1.0
v_mac_f32_e32 v68, v66, v74
v_mov_b32_e32 v74, 0xb2951928
v_madak_f32_e32 v74, v74, v67, 0xb85ffb93
v_madak_f32_e32 v75, v75, v67, 0x3a83ca0c
v_madak_f32_e32 v74, v74, v67, 0xbc9ded90
v_madak_f32_e32 v75, v75, v67, 0x3d8eaf3b
v_madak_f32_e32 v67, v74, v67, 0xbf409397
v_mac_f32_e32 v67, v66, v75
v_rcp_f32_e32 v66, v68
v_cndmask_b32_e64 v30, 0, 1.0, vcc
v_mul_f32_e32 v30, v30, v70
v_mul_f32_e32 v30, v72, v30
v_mul_f32_e32 v66, v51, v66
v_mul_f32_e32 v66, v67, v66
v_mul_f32_e32 v67, v69, v70
v_mac_f32_e32 v66, v46, v67
v_mul_f32_e32 v46, v65, v30
v_mad_f32 v46, v38, v66, -v46
v_mul_f32_e32 v38, v66, v38
v_mad_f32 v30, v30, v65, -v38
v_mad_f32 v76, v26, v30, v76
v_mad_f32 v73, v22, v30, v73
v_mac_f32_e32 v71, v18, v30
v_mad_f32 v10, v26, v46, v10
v_mad_f32 v9, v22, v46, v9
v_mac_f32_e32 v8, v18, v46
BB5_122: ; %Flow1157
; in Loop: Header=BB5_7 Depth=1
s_or_b64 exec, exec, s[22:23]
BB5_123: ; in Loop: Header=BB5_7 Depth=1
s_or_b64 exec, exec, s[18:19]
s_mov_b32 m0, -1
v_cmp_gt_i32_e32 vcc, 3, v2
ds_write_b32 v5, v71
ds_write_b32 v6, v73
ds_write_b32 v7, v76
s_and_saveexec_b64 s[18:19], vcc
s_xor_b64 s[18:19], exec, s[18:19]
s_waitcnt lgkmcnt(0)
; mask branch BB5_129
s_cbranch_execz BB5_129
BB5_124: ; in Loop: Header=BB5_7 Depth=1
v_lshlrev_b32_e32 v22, 6, v2
v_add_i32_e32 v18, vcc, v0, v22
v_lshlrev_b32_e32 v18, 2, v18
v_add_i32_e32 v26, vcc, s4, v18
s_mov_b32 m0, -1
ds_read_b32 v18, v26
v_add_i32_e32 v30, vcc, 8, v0
v_or_b32_e32 v38, 1, v0
v_cmp_lt_i32_e32 vcc, v38, v30
s_and_saveexec_b64 s[22:23], vcc
s_xor_b64 s[22:23], exec, s[22:23]
s_waitcnt lgkmcnt(0)
; mask branch BB5_126
s_cbranch_execz BB5_126
BB5_125: ; in Loop: Header=BB5_7 Depth=1
s_mov_b32 m0, -1
ds_read2_b32 v[65:66], v26 offset0:1 offset1:2
v_or_b32_e32 v30, 3, v0
v_add_i32_e32 v22, vcc, v30, v22
v_lshlrev_b32_e32 v22, 2, v22
ds_read2_b32 v[67:68], v26 offset0:3 offset1:4
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v18, v18, v65
v_add_i32_e32 v22, vcc, s4, v22
v_add_f32_e32 v18, v66, v18
ds_read2_b32 v[65:66], v22 offset0:2 offset1:3
ds_read_b32 v26, v26 offset:28
v_add_f32_e32 v18, v67, v18
v_add_f32_e32 v18, v68, v18
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v18, v65, v18
v_add_f32_e32 v18, v66, v18
v_add_f32_e32 v18, v26, v18
BB5_126: ; %._crit_edge.i26
; in Loop: Header=BB5_7 Depth=1
s_or_b64 exec, exec, s[22:23]
v_mul_lo_i32 v22, v64, 3
v_mov_b32_e32 v26, s13
s_mov_b64 s[36:37], s[12:13]
s_mov_b64 s[38:39], s[30:31]
v_add_i32_e32 v64, vcc, v22, v2
v_ashrrev_i32_e32 v65, 31, v64
v_lshl_b64 v[66:67], v[64:65], 2
v_add_i32_e32 v64, vcc, s12, v66
v_addc_u32_e32 v65, vcc, v67, v26, vcc
buffer_load_dword v67, v[66:67], s[36:39], 0 addr64
s_mov_b64 s[22:23], 0
s_waitcnt vmcnt(0)
BB5_127: ; Parent Loop BB5_7 Depth=1
; => This Inner Loop Header: Depth=2
v_add_f32_e32 v66, v18, v67
v_mov_b32_e32 v69, v67
v_mov_b32_e32 v68, v66
buffer_atomic_cmpswap v[68:69], v[64:65], s[28:31], 0 addr64 glc
v_mov_b32_e32 v22, -1
v_mov_b32_e32 v22, 0xf000
s_waitcnt vmcnt(0) expcnt(0)
v_cmp_eq_u32_e32 vcc, v68, v67
s_or_b64 s[22:23], vcc, s[22:23]
v_mov_b32_e32 v67, v68
s_andn2_b64 exec, exec, s[22:23]
s_cbranch_execnz BB5_127
; BB#128: ; %Flow1155
; in Loop: Header=BB5_7 Depth=1
s_or_b64 exec, exec, s[22:23]
BB5_129: ; %Flow1156
; in Loop: Header=BB5_7 Depth=1
s_or_b64 exec, exec, s[18:19]
BB5_130: ; %Flow1165
; in Loop: Header=BB5_7 Depth=1
s_or_b64 exec, exec, s[14:15]
v_mov_b32_e32 v18, 0xffffff
v_cmp_lt_u32_e32 vcc, v18, v63
s_and_saveexec_b64 s[14:15], vcc
s_xor_b64 s[14:15], exec, s[14:15]
; mask branch BB5_170
s_cbranch_execz BB5_170
BB5_131: ; in Loop: Header=BB5_7 Depth=1
s_mov_b32 m0, -1
ds_read_b32 v18, v52 offset:12
s_mov_b64 s[36:37], s[16:17]
s_mov_b64 s[38:39], s[30:31]
s_waitcnt lgkmcnt(0)
v_lshlrev_b32_e32 v22, 3, v18
v_add_i32_e32 v64, vcc, v22, v1
v_ashrrev_i32_e32 v65, 31, v64
v_lshl_b64 v[66:67], v[64:65], 4
buffer_load_dwordx4 v[67:70], v[66:67], s[36:39], 0 addr64
v_lshl_b64 v[71:72], v[64:65], 3
s_mov_b64 s[36:37], s[20:21]
buffer_load_dwordx2 v[65:66], v[71:72], s[36:39], 0 addr64
v_lshrrev_b32_e32 v22, 24, v63
v_mov_b32_e32 v71, 0
v_and_b32_e32 v22, 1, v22
v_cmp_eq_u32_e32 vcc, 1, v22
v_mov_b32_e32 v73, v71
v_mov_b32_e32 v76, v71
s_and_saveexec_b64 s[18:19], vcc
s_xor_b64 s[18:19], exec, s[18:19]
s_waitcnt vmcnt(0)
; mask branch BB5_135
s_cbranch_execz BB5_135
BB5_132: ; in Loop: Header=BB5_7 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[77:80], v53 offset1:1
v_cmp_ne_u32_e32 vcc, v40, v18
s_and_b64 s[22:23], exec, s[2:3]
s_or_b64 s[22:23], s[22:23], vcc
s_and_b64 s[36:37], exec, s[0:1]
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v22, v68, v78
s_or_b64 s[22:23], s[36:37], s[22:23]
v_subrev_f32_e32 v26, v67, v77
v_mul_f32_e32 v38, v22, v22
v_cndmask_b32_e64 v46, 0, 1.0, s[22:23]
v_subrev_f32_e32 v30, v69, v79
v_mac_f32_e32 v38, v26, v26
v_mov_b32_e32 v71, 0
v_mac_f32_e32 v38, v30, v30
v_mul_f32_e32 v46, s5, v46
v_cmp_lt_f32_e32 vcc, v38, v46
v_mov_b32_e32 v73, v71
v_mov_b32_e32 v76, v71
s_and_saveexec_b64 s[22:23], vcc
s_xor_b64 s[22:23], exec, s[22:23]
; mask branch BB5_134
s_cbranch_execz BB5_134
BB5_133: ; in Loop: Header=BB5_7 Depth=1
v_max_f32_e32 v38, 0x34cd15ae, v38
v_rsq_f32_e32 v74, v38
v_lshrrev_b32_e32 v71, 24, v14
v_and_b32_e32 v71, 1, v71
v_cmp_eq_u32_e32 vcc, 1, v71
v_mul_f32_e32 v78, v74, v74
v_cndmask_b32_e64 v73, 0, 1.0, vcc
v_mul_f32_e32 v71, v78, v78
v_mul_f32_e32 v71, v73, v71
s_mov_b32 m0, -1
v_mul_f32_e32 v79, v78, v71
ds_read_b64 v[71:72], v54
v_mul_f32_e32 v75, v47, v38
v_mul_f32_e32 v76, v75, v75
v_mov_b32_e32 v77, 0x3a92b707
v_mul_f32_e32 v46, v70, v80
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v72, v66, v72
v_mul_f32_e32 v72, v79, v72
v_mad_f32 v71, v65, v71, -v72
v_madak_f32_e32 v72, v77, v76, 0x3ded3cb2
v_mov_b32_e32 v77, 0x3c739487
v_madak_f32_e32 v77, v77, v76, 0x3f01e2bc
v_mad_f32 v72, v72, v76, 1.0
v_mac_f32_e32 v72, v75, v77
v_mov_b32_e32 v77, 0xb2951928
v_rcp_f32_e32 v72, v72
v_madak_f32_e32 v77, v77, v76, 0xb85ffb93
v_mov_b32_e32 v80, 0x35c55945
v_madak_f32_e32 v80, v80, v76, 0x3a83ca0c
v_cmp_gt_f32_e32 vcc, s24, v38
v_madak_f32_e32 v77, v77, v76, 0xbc9ded90
v_cndmask_b32_e64 v38, 0, 1.0, vcc
v_madak_f32_e32 v80, v80, v76, 0x3d8eaf3b
v_madak_f32_e32 v76, v77, v76, 0xbf409397
v_mul_f32_e32 v38, v38, v78
v_mac_f32_e32 v76, v75, v80
v_mul_f32_e32 v72, v51, v72
v_mul_f32_e32 v72, v76, v72
v_mul_f32_e32 v73, v73, v78
v_mul_f32_e32 v38, v79, v38
v_mac_f32_e32 v72, v74, v73
v_mul_f32_e32 v73, v71, v38
v_mad_f32 v74, v46, v72, -v73
v_mul_f32_e32 v46, v72, v46
v_mad_f32 v38, v38, v71, -v46
v_mad_f32 v45, v38, -v30, v45
v_mad_f32 v44, v38, -v22, v44
v_mad_f32 v43, v38, -v26, v43
v_mul_f32_e64 v76, v74, -v30
v_mul_f32_e64 v73, v74, -v22
v_mul_f32_e64 v71, v74, -v26
BB5_134: ; %Flow1153
; in Loop: Header=BB5_7 Depth=1
s_or_b64 exec, exec, s[22:23]
BB5_135: ; in Loop: Header=BB5_7 Depth=1
s_or_b64 exec, exec, s[18:19]
v_lshrrev_b32_e32 v22, 25, v63
v_and_b32_e32 v22, 1, v22
v_cmp_eq_u32_e32 vcc, 1, v22
s_and_saveexec_b64 s[18:19], vcc
s_xor_b64 s[18:19], exec, s[18:19]
; mask branch BB5_139
s_cbranch_execz BB5_139
BB5_136: ; in Loop: Header=BB5_7 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[77:80], v53 offset0:16 offset1:17
v_cmp_ne_u32_e32 vcc, v62, v18
s_and_b64 s[22:23], exec, s[2:3]
s_or_b64 s[22:23], s[22:23], vcc
s_and_b64 s[36:37], exec, s[0:1]
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v26, v68, v78
s_or_b64 s[22:23], s[36:37], s[22:23]
v_subrev_f32_e32 v22, v67, v77
v_mul_f32_e32 v38, v26, v26
v_cndmask_b32_e64 v46, 0, 1.0, s[22:23]
v_subrev_f32_e32 v30, v69, v79
v_mac_f32_e32 v38, v22, v22
v_mac_f32_e32 v38, v30, v30
v_mul_f32_e32 v46, s5, v46
v_cmp_lt_f32_e32 vcc, v38, v46
s_and_saveexec_b64 s[22:23], vcc
s_xor_b64 s[22:23], exec, s[22:23]
; mask branch BB5_138
s_cbranch_execz BB5_138
BB5_137: ; in Loop: Header=BB5_7 Depth=1
v_lshrrev_b32_e32 v72, 25, v14
v_and_b32_e32 v72, 1, v72
v_max_f32_e32 v38, 0x34cd15ae, v38
v_cmp_eq_u32_e32 vcc, 1, v72
v_rsq_f32_e32 v72, v38
v_cndmask_b32_e64 v77, 0, 1.0, vcc
s_mov_b32 m0, -1
v_mul_f32_e32 v46, v70, v80
v_mul_f32_e32 v78, v72, v72
v_mul_f32_e32 v74, v78, v78
v_mul_f32_e32 v74, v77, v74
v_mul_f32_e32 v79, v78, v74
ds_read_b64 v[74:75], v54 offset:64
v_mov_b32_e32 v81, 0x3a92b707
v_mov_b32_e32 v82, 0x3c739487
v_mov_b32_e32 v83, 0x35c55945
v_cmp_gt_f32_e32 vcc, s24, v38
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v75, v66, v75
v_mul_f32_e32 v75, v79, v75
v_mad_f32 v74, v65, v74, -v75
v_mul_f32_e32 v75, v47, v38
v_mul_f32_e32 v80, v75, v75
v_madak_f32_e32 v81, v81, v80, 0x3ded3cb2
v_madak_f32_e32 v82, v82, v80, 0x3f01e2bc
v_mad_f32 v81, v81, v80, 1.0
v_mac_f32_e32 v81, v75, v82
v_mov_b32_e32 v82, 0xb2951928
v_madak_f32_e32 v82, v82, v80, 0xb85ffb93
v_madak_f32_e32 v83, v83, v80, 0x3a83ca0c
v_madak_f32_e32 v82, v82, v80, 0xbc9ded90
v_madak_f32_e32 v83, v83, v80, 0x3d8eaf3b
v_madak_f32_e32 v80, v82, v80, 0xbf409397
v_mac_f32_e32 v80, v75, v83
v_rcp_f32_e32 v75, v81
v_cndmask_b32_e64 v38, 0, 1.0, vcc
v_mul_f32_e32 v38, v38, v78
v_mul_f32_e32 v77, v77, v78
v_mul_f32_e32 v75, v51, v75
v_mul_f32_e32 v75, v80, v75
v_mul_f32_e32 v38, v79, v38
v_mac_f32_e32 v75, v72, v77
v_mul_f32_e32 v72, v74, v38
v_mad_f32 v72, v46, v75, -v72
v_mul_f32_e32 v46, v75, v46
v_mad_f32 v38, v38, v74, -v46
v_mad_f32 v76, v30, v38, v76
v_mad_f32 v73, v26, v38, v73
v_mac_f32_e32 v71, v22, v38
v_mad_f32 v37, v30, v72, v37
v_mad_f32 v36, v26, v72, v36
v_mac_f32_e32 v35, v22, v72
BB5_138: ; %Flow1152
; in Loop: Header=BB5_7 Depth=1
s_or_b64 exec, exec, s[22:23]
BB5_139: ; in Loop: Header=BB5_7 Depth=1
s_or_b64 exec, exec, s[18:19]
v_lshrrev_b32_e32 v22, 26, v63
v_and_b32_e32 v22, 1, v22
v_cmp_eq_u32_e32 vcc, 1, v22
s_and_saveexec_b64 s[18:19], vcc
s_xor_b64 s[18:19], exec, s[18:19]
; mask branch BB5_143
s_cbranch_execz BB5_143
BB5_140: ; in Loop: Header=BB5_7 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[77:80], v53 offset0:32 offset1:33
v_cmp_ne_u32_e32 vcc, v61, v18
s_and_b64 s[22:23], exec, s[2:3]
s_or_b64 s[22:23], s[22:23], vcc
s_and_b64 s[36:37], exec, s[0:1]
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v26, v68, v78
s_or_b64 s[22:23], s[36:37], s[22:23]
v_subrev_f32_e32 v22, v67, v77
v_mul_f32_e32 v38, v26, v26
v_cndmask_b32_e64 v46, 0, 1.0, s[22:23]
v_subrev_f32_e32 v30, v69, v79
v_mac_f32_e32 v38, v22, v22
v_mac_f32_e32 v38, v30, v30
v_mul_f32_e32 v46, s5, v46
v_cmp_lt_f32_e32 vcc, v38, v46
s_and_saveexec_b64 s[22:23], vcc
s_xor_b64 s[22:23], exec, s[22:23]
; mask branch BB5_142
s_cbranch_execz BB5_142
BB5_141: ; in Loop: Header=BB5_7 Depth=1
v_lshrrev_b32_e32 v72, 26, v14
v_and_b32_e32 v72, 1, v72
v_max_f32_e32 v38, 0x34cd15ae, v38
v_cmp_eq_u32_e32 vcc, 1, v72
v_rsq_f32_e32 v72, v38
v_cndmask_b32_e64 v77, 0, 1.0, vcc
s_mov_b32 m0, -1
v_mul_f32_e32 v46, v70, v80
v_mul_f32_e32 v78, v72, v72
v_mul_f32_e32 v74, v78, v78
v_mul_f32_e32 v74, v77, v74
v_mul_f32_e32 v79, v78, v74
ds_read_b64 v[74:75], v54 offset:128
v_mov_b32_e32 v81, 0x3a92b707
v_mov_b32_e32 v82, 0x3c739487
v_mov_b32_e32 v83, 0x35c55945
v_cmp_gt_f32_e32 vcc, s24, v38
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v75, v66, v75
v_mul_f32_e32 v75, v79, v75
v_mad_f32 v74, v65, v74, -v75
v_mul_f32_e32 v75, v47, v38
v_mul_f32_e32 v80, v75, v75
v_madak_f32_e32 v81, v81, v80, 0x3ded3cb2
v_madak_f32_e32 v82, v82, v80, 0x3f01e2bc
v_mad_f32 v81, v81, v80, 1.0
v_mac_f32_e32 v81, v75, v82
v_mov_b32_e32 v82, 0xb2951928
v_madak_f32_e32 v82, v82, v80, 0xb85ffb93
v_madak_f32_e32 v83, v83, v80, 0x3a83ca0c
v_madak_f32_e32 v82, v82, v80, 0xbc9ded90
v_madak_f32_e32 v83, v83, v80, 0x3d8eaf3b
v_madak_f32_e32 v80, v82, v80, 0xbf409397
v_mac_f32_e32 v80, v75, v83
v_rcp_f32_e32 v75, v81
v_cndmask_b32_e64 v38, 0, 1.0, vcc
v_mul_f32_e32 v38, v38, v78
v_mul_f32_e32 v77, v77, v78
v_mul_f32_e32 v75, v51, v75
v_mul_f32_e32 v75, v80, v75
v_mul_f32_e32 v38, v79, v38
v_mac_f32_e32 v75, v72, v77
v_mul_f32_e32 v72, v74, v38
v_mad_f32 v72, v46, v75, -v72
v_mul_f32_e32 v46, v75, v46
v_mad_f32 v38, v38, v74, -v46
v_mad_f32 v76, v30, v38, v76
v_mad_f32 v73, v26, v38, v73
v_mac_f32_e32 v71, v22, v38
v_mad_f32 v29, v30, v72, v29
v_mad_f32 v28, v26, v72, v28
v_mac_f32_e32 v27, v22, v72
BB5_142: ; %Flow1151
; in Loop: Header=BB5_7 Depth=1
s_or_b64 exec, exec, s[22:23]
BB5_143: ; in Loop: Header=BB5_7 Depth=1
s_or_b64 exec, exec, s[18:19]
v_lshrrev_b32_e32 v22, 27, v63
v_and_b32_e32 v22, 1, v22
v_cmp_eq_u32_e32 vcc, 1, v22
s_and_saveexec_b64 s[18:19], vcc
s_xor_b64 s[18:19], exec, s[18:19]
; mask branch BB5_147
s_cbranch_execz BB5_147
BB5_144: ; in Loop: Header=BB5_7 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[77:80], v53 offset0:48 offset1:49
v_cmp_ne_u32_e32 vcc, v60, v18
s_and_b64 s[22:23], exec, s[2:3]
s_or_b64 s[22:23], s[22:23], vcc
s_and_b64 s[36:37], exec, s[0:1]
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v26, v68, v78
s_or_b64 s[22:23], s[36:37], s[22:23]
v_subrev_f32_e32 v22, v67, v77
v_mul_f32_e32 v38, v26, v26
v_cndmask_b32_e64 v46, 0, 1.0, s[22:23]
v_subrev_f32_e32 v30, v69, v79
v_mac_f32_e32 v38, v22, v22
v_mac_f32_e32 v38, v30, v30
v_mul_f32_e32 v46, s5, v46
v_cmp_lt_f32_e32 vcc, v38, v46
s_and_saveexec_b64 s[22:23], vcc
s_xor_b64 s[22:23], exec, s[22:23]
; mask branch BB5_146
s_cbranch_execz BB5_146
BB5_145: ; in Loop: Header=BB5_7 Depth=1
v_lshrrev_b32_e32 v72, 27, v14
v_and_b32_e32 v72, 1, v72
v_max_f32_e32 v38, 0x34cd15ae, v38
v_cmp_eq_u32_e32 vcc, 1, v72
v_rsq_f32_e32 v72, v38
v_cndmask_b32_e64 v77, 0, 1.0, vcc
s_mov_b32 m0, -1
v_mul_f32_e32 v46, v70, v80
v_mul_f32_e32 v78, v72, v72
v_mul_f32_e32 v74, v78, v78
v_mul_f32_e32 v74, v77, v74
v_mul_f32_e32 v79, v78, v74
ds_read_b64 v[74:75], v54 offset:192
v_mov_b32_e32 v81, 0x3a92b707
v_mov_b32_e32 v82, 0x3c739487
v_mov_b32_e32 v83, 0x35c55945
v_cmp_gt_f32_e32 vcc, s24, v38
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v75, v66, v75
v_mul_f32_e32 v75, v79, v75
v_mad_f32 v74, v65, v74, -v75
v_mul_f32_e32 v75, v47, v38
v_mul_f32_e32 v80, v75, v75
v_madak_f32_e32 v81, v81, v80, 0x3ded3cb2
v_madak_f32_e32 v82, v82, v80, 0x3f01e2bc
v_mad_f32 v81, v81, v80, 1.0
v_mac_f32_e32 v81, v75, v82
v_mov_b32_e32 v82, 0xb2951928
v_madak_f32_e32 v82, v82, v80, 0xb85ffb93
v_madak_f32_e32 v83, v83, v80, 0x3a83ca0c
v_madak_f32_e32 v82, v82, v80, 0xbc9ded90
v_madak_f32_e32 v83, v83, v80, 0x3d8eaf3b
v_madak_f32_e32 v80, v82, v80, 0xbf409397
v_mac_f32_e32 v80, v75, v83
v_rcp_f32_e32 v75, v81
v_cndmask_b32_e64 v38, 0, 1.0, vcc
v_mul_f32_e32 v38, v38, v78
v_mul_f32_e32 v77, v77, v78
v_mul_f32_e32 v75, v51, v75
v_mul_f32_e32 v75, v80, v75
v_mul_f32_e32 v38, v79, v38
v_mac_f32_e32 v75, v72, v77
v_mul_f32_e32 v72, v74, v38
v_mad_f32 v72, v46, v75, -v72
v_mul_f32_e32 v46, v75, v46
v_mad_f32 v38, v38, v74, -v46
v_mad_f32 v76, v30, v38, v76
v_mad_f32 v73, v26, v38, v73
v_mac_f32_e32 v71, v22, v38
v_mad_f32 v25, v30, v72, v25
v_mad_f32 v24, v26, v72, v24
v_mac_f32_e32 v23, v22, v72
BB5_146: ; %Flow1150
; in Loop: Header=BB5_7 Depth=1
s_or_b64 exec, exec, s[22:23]
BB5_147: ; in Loop: Header=BB5_7 Depth=1
s_or_b64 exec, exec, s[18:19]
v_lshrrev_b32_e32 v22, 28, v63
v_and_b32_e32 v22, 1, v22
v_cmp_eq_u32_e32 vcc, 1, v22
s_and_saveexec_b64 s[18:19], vcc
s_xor_b64 s[18:19], exec, s[18:19]
; mask branch BB5_151
s_cbranch_execz BB5_151
BB5_148: ; in Loop: Header=BB5_7 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[77:80], v53 offset0:64 offset1:65
v_cmp_ne_u32_e32 vcc, v59, v18
s_and_b64 s[22:23], exec, s[2:3]
s_or_b64 s[22:23], s[22:23], vcc
s_and_b64 s[36:37], exec, s[0:1]
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v26, v68, v78
s_or_b64 s[22:23], s[36:37], s[22:23]
v_subrev_f32_e32 v22, v67, v77
v_mul_f32_e32 v38, v26, v26
v_cndmask_b32_e64 v46, 0, 1.0, s[22:23]
v_subrev_f32_e32 v30, v69, v79
v_mac_f32_e32 v38, v22, v22
v_mac_f32_e32 v38, v30, v30
v_mul_f32_e32 v46, s5, v46
v_cmp_lt_f32_e32 vcc, v38, v46
s_and_saveexec_b64 s[22:23], vcc
s_xor_b64 s[22:23], exec, s[22:23]
; mask branch BB5_150
s_cbranch_execz BB5_150
BB5_149: ; in Loop: Header=BB5_7 Depth=1
v_lshrrev_b32_e32 v72, 28, v14
v_and_b32_e32 v72, 1, v72
v_max_f32_e32 v38, 0x34cd15ae, v38
v_cmp_eq_u32_e32 vcc, 1, v72
v_rsq_f32_e32 v72, v38
v_cndmask_b32_e64 v77, 0, 1.0, vcc
s_mov_b32 m0, -1
v_mul_f32_e32 v46, v70, v80
v_mul_f32_e32 v78, v72, v72
v_mul_f32_e32 v74, v78, v78
v_mul_f32_e32 v74, v77, v74
v_mul_f32_e32 v79, v78, v74
ds_read_b64 v[74:75], v54 offset:256
v_mov_b32_e32 v81, 0x3a92b707
v_mov_b32_e32 v82, 0x3c739487
v_mov_b32_e32 v83, 0x35c55945
v_cmp_gt_f32_e32 vcc, s24, v38
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v75, v66, v75
v_mul_f32_e32 v75, v79, v75
v_mad_f32 v74, v65, v74, -v75
v_mul_f32_e32 v75, v47, v38
v_mul_f32_e32 v80, v75, v75
v_madak_f32_e32 v81, v81, v80, 0x3ded3cb2
v_madak_f32_e32 v82, v82, v80, 0x3f01e2bc
v_mad_f32 v81, v81, v80, 1.0
v_mac_f32_e32 v81, v75, v82
v_mov_b32_e32 v82, 0xb2951928
v_madak_f32_e32 v82, v82, v80, 0xb85ffb93
v_madak_f32_e32 v83, v83, v80, 0x3a83ca0c
v_madak_f32_e32 v82, v82, v80, 0xbc9ded90
v_madak_f32_e32 v83, v83, v80, 0x3d8eaf3b
v_madak_f32_e32 v80, v82, v80, 0xbf409397
v_mac_f32_e32 v80, v75, v83
v_rcp_f32_e32 v75, v81
v_cndmask_b32_e64 v38, 0, 1.0, vcc
v_mul_f32_e32 v38, v38, v78
v_mul_f32_e32 v77, v77, v78
v_mul_f32_e32 v75, v51, v75
v_mul_f32_e32 v75, v80, v75
v_mul_f32_e32 v38, v79, v38
v_mac_f32_e32 v75, v72, v77
v_mul_f32_e32 v72, v74, v38
v_mad_f32 v72, v46, v75, -v72
v_mul_f32_e32 v46, v75, v46
v_mad_f32 v38, v38, v74, -v46
v_mad_f32 v76, v30, v38, v76
v_mad_f32 v73, v26, v38, v73
v_mac_f32_e32 v71, v22, v38
v_mad_f32 v21, v30, v72, v21
v_mad_f32 v20, v26, v72, v20
v_mac_f32_e32 v19, v22, v72
BB5_150: ; %Flow1149
; in Loop: Header=BB5_7 Depth=1
s_or_b64 exec, exec, s[22:23]
BB5_151: ; in Loop: Header=BB5_7 Depth=1
s_or_b64 exec, exec, s[18:19]
v_lshrrev_b32_e32 v22, 29, v63
v_and_b32_e32 v22, 1, v22
v_cmp_eq_u32_e32 vcc, 1, v22
s_and_saveexec_b64 s[18:19], vcc
s_xor_b64 s[18:19], exec, s[18:19]
; mask branch BB5_155
s_cbranch_execz BB5_155
BB5_152: ; in Loop: Header=BB5_7 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[77:80], v53 offset0:80 offset1:81
v_cmp_ne_u32_e32 vcc, v58, v18
s_and_b64 s[22:23], exec, s[2:3]
s_or_b64 s[22:23], s[22:23], vcc
s_and_b64 s[36:37], exec, s[0:1]
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v26, v68, v78
s_or_b64 s[22:23], s[36:37], s[22:23]
v_subrev_f32_e32 v22, v67, v77
v_mul_f32_e32 v38, v26, v26
v_cndmask_b32_e64 v46, 0, 1.0, s[22:23]
v_subrev_f32_e32 v30, v69, v79
v_mac_f32_e32 v38, v22, v22
v_mac_f32_e32 v38, v30, v30
v_mul_f32_e32 v46, s5, v46
v_cmp_lt_f32_e32 vcc, v38, v46
s_and_saveexec_b64 s[22:23], vcc
s_xor_b64 s[22:23], exec, s[22:23]
; mask branch BB5_154
s_cbranch_execz BB5_154
BB5_153: ; in Loop: Header=BB5_7 Depth=1
v_lshrrev_b32_e32 v72, 29, v14
v_and_b32_e32 v72, 1, v72
v_max_f32_e32 v38, 0x34cd15ae, v38
v_cmp_eq_u32_e32 vcc, 1, v72
v_rsq_f32_e32 v72, v38
v_cndmask_b32_e64 v77, 0, 1.0, vcc
s_mov_b32 m0, -1
v_mul_f32_e32 v46, v70, v80
v_mul_f32_e32 v78, v72, v72
v_mul_f32_e32 v74, v78, v78
v_mul_f32_e32 v74, v77, v74
v_mul_f32_e32 v79, v78, v74
ds_read_b64 v[74:75], v54 offset:320
v_mov_b32_e32 v81, 0x3a92b707
v_mov_b32_e32 v82, 0x3c739487
v_mov_b32_e32 v83, 0x35c55945
v_cmp_gt_f32_e32 vcc, s24, v38
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v75, v66, v75
v_mul_f32_e32 v75, v79, v75
v_mad_f32 v74, v65, v74, -v75
v_mul_f32_e32 v75, v47, v38
v_mul_f32_e32 v80, v75, v75
v_madak_f32_e32 v81, v81, v80, 0x3ded3cb2
v_madak_f32_e32 v82, v82, v80, 0x3f01e2bc
v_mad_f32 v81, v81, v80, 1.0
v_mac_f32_e32 v81, v75, v82
v_mov_b32_e32 v82, 0xb2951928
v_madak_f32_e32 v82, v82, v80, 0xb85ffb93
v_madak_f32_e32 v83, v83, v80, 0x3a83ca0c
v_madak_f32_e32 v82, v82, v80, 0xbc9ded90
v_madak_f32_e32 v83, v83, v80, 0x3d8eaf3b
v_madak_f32_e32 v80, v82, v80, 0xbf409397
v_mac_f32_e32 v80, v75, v83
v_rcp_f32_e32 v75, v81
v_cndmask_b32_e64 v38, 0, 1.0, vcc
v_mul_f32_e32 v38, v38, v78
v_mul_f32_e32 v77, v77, v78
v_mul_f32_e32 v75, v51, v75
v_mul_f32_e32 v75, v80, v75
v_mul_f32_e32 v38, v79, v38
v_mac_f32_e32 v75, v72, v77
v_mul_f32_e32 v72, v74, v38
v_mad_f32 v72, v46, v75, -v72
v_mul_f32_e32 v46, v75, v46
v_mad_f32 v38, v38, v74, -v46
v_mad_f32 v76, v30, v38, v76
v_mad_f32 v73, v26, v38, v73
v_mac_f32_e32 v71, v22, v38
v_mad_f32 v17, v30, v72, v17
v_mad_f32 v16, v26, v72, v16
v_mac_f32_e32 v15, v22, v72
BB5_154: ; %Flow1148
; in Loop: Header=BB5_7 Depth=1
s_or_b64 exec, exec, s[22:23]
BB5_155: ; in Loop: Header=BB5_7 Depth=1
s_or_b64 exec, exec, s[18:19]
v_lshrrev_b32_e32 v22, 30, v63
v_and_b32_e32 v22, 1, v22
v_cmp_eq_u32_e32 vcc, 1, v22
s_and_saveexec_b64 s[18:19], vcc
s_xor_b64 s[18:19], exec, s[18:19]
; mask branch BB5_159
s_cbranch_execz BB5_159
BB5_156: ; in Loop: Header=BB5_7 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[77:80], v53 offset0:96 offset1:97
v_cmp_ne_u32_e32 vcc, v57, v18
s_and_b64 s[22:23], exec, s[2:3]
s_or_b64 s[22:23], s[22:23], vcc
s_and_b64 s[36:37], exec, s[0:1]
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v26, v68, v78
s_or_b64 s[22:23], s[36:37], s[22:23]
v_subrev_f32_e32 v22, v67, v77
v_mul_f32_e32 v38, v26, v26
v_cndmask_b32_e64 v46, 0, 1.0, s[22:23]
v_subrev_f32_e32 v30, v69, v79
v_mac_f32_e32 v38, v22, v22
v_mac_f32_e32 v38, v30, v30
v_mul_f32_e32 v46, s5, v46
v_cmp_lt_f32_e32 vcc, v38, v46
s_and_saveexec_b64 s[22:23], vcc
s_xor_b64 s[22:23], exec, s[22:23]
; mask branch BB5_158
s_cbranch_execz BB5_158
BB5_157: ; in Loop: Header=BB5_7 Depth=1
v_lshrrev_b32_e32 v72, 30, v14
v_and_b32_e32 v72, 1, v72
v_max_f32_e32 v38, 0x34cd15ae, v38
v_cmp_eq_u32_e32 vcc, 1, v72
v_rsq_f32_e32 v72, v38
v_cndmask_b32_e64 v77, 0, 1.0, vcc
s_mov_b32 m0, -1
v_mul_f32_e32 v46, v70, v80
v_mul_f32_e32 v78, v72, v72
v_mul_f32_e32 v74, v78, v78
v_mul_f32_e32 v74, v77, v74
v_mul_f32_e32 v79, v78, v74
ds_read_b64 v[74:75], v54 offset:384
v_mov_b32_e32 v81, 0x3a92b707
v_mov_b32_e32 v82, 0x3c739487
v_mov_b32_e32 v83, 0x35c55945
v_cmp_gt_f32_e32 vcc, s24, v38
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v75, v66, v75
v_mul_f32_e32 v75, v79, v75
v_mad_f32 v74, v65, v74, -v75
v_mul_f32_e32 v75, v47, v38
v_mul_f32_e32 v80, v75, v75
v_madak_f32_e32 v81, v81, v80, 0x3ded3cb2
v_madak_f32_e32 v82, v82, v80, 0x3f01e2bc
v_mad_f32 v81, v81, v80, 1.0
v_mac_f32_e32 v81, v75, v82
v_mov_b32_e32 v82, 0xb2951928
v_madak_f32_e32 v82, v82, v80, 0xb85ffb93
v_madak_f32_e32 v83, v83, v80, 0x3a83ca0c
v_madak_f32_e32 v82, v82, v80, 0xbc9ded90
v_madak_f32_e32 v83, v83, v80, 0x3d8eaf3b
v_madak_f32_e32 v80, v82, v80, 0xbf409397
v_mac_f32_e32 v80, v75, v83
v_rcp_f32_e32 v75, v81
v_cndmask_b32_e64 v38, 0, 1.0, vcc
v_mul_f32_e32 v38, v38, v78
v_mul_f32_e32 v77, v77, v78
v_mul_f32_e32 v75, v51, v75
v_mul_f32_e32 v75, v80, v75
v_mul_f32_e32 v38, v79, v38
v_mac_f32_e32 v75, v72, v77
v_mul_f32_e32 v72, v74, v38
v_mad_f32 v72, v46, v75, -v72
v_mul_f32_e32 v46, v75, v46
v_mad_f32 v38, v38, v74, -v46
v_mad_f32 v76, v30, v38, v76
v_mad_f32 v73, v26, v38, v73
v_mac_f32_e32 v71, v22, v38
v_mad_f32 v13, v30, v72, v13
v_mad_f32 v12, v26, v72, v12
v_mac_f32_e32 v11, v22, v72
BB5_158: ; %Flow1147
; in Loop: Header=BB5_7 Depth=1
s_or_b64 exec, exec, s[22:23]
BB5_159: ; in Loop: Header=BB5_7 Depth=1
s_or_b64 exec, exec, s[18:19]
v_cmp_gt_i32_e32 vcc, 0, v63
s_and_saveexec_b64 s[18:19], vcc
s_xor_b64 s[18:19], exec, s[18:19]
; mask branch BB5_163
s_cbranch_execz BB5_163
BB5_160: ; in Loop: Header=BB5_7 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[77:80], v53 offset0:112 offset1:113
v_cmp_ne_u32_e32 vcc, v33, v18
s_and_b64 s[22:23], exec, s[2:3]
s_or_b64 s[22:23], s[22:23], vcc
s_and_b64 s[36:37], exec, s[0:1]
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v22, v68, v78
s_or_b64 s[22:23], s[36:37], s[22:23]
v_subrev_f32_e32 v18, v67, v77
v_mul_f32_e32 v30, v22, v22
v_cndmask_b32_e64 v38, 0, 1.0, s[22:23]
v_subrev_f32_e32 v26, v69, v79
v_mac_f32_e32 v30, v18, v18
v_mac_f32_e32 v30, v26, v26
v_mul_f32_e32 v38, s5, v38
v_cmp_lt_f32_e32 vcc, v30, v38
s_and_saveexec_b64 s[22:23], vcc
s_xor_b64 s[22:23], exec, s[22:23]
; mask branch BB5_162
s_cbranch_execz BB5_162
BB5_161: ; in Loop: Header=BB5_7 Depth=1
v_cmp_gt_i32_e32 vcc, 0, v14
v_max_f32_e32 v14, 0x34cd15ae, v30
v_rsq_f32_e32 v30, v14
v_cndmask_b32_e64 v46, 0, 1.0, vcc
s_mov_b32 m0, -1
v_mul_f32_e32 v38, v70, v80
v_mul_f32_e32 v63, v30, v30
v_mul_f32_e32 v67, v63, v63
v_mul_f32_e32 v67, v46, v67
v_mul_f32_e32 v69, v63, v67
ds_read_b64 v[67:68], v54 offset:448
v_mov_b32_e32 v70, 0x3c739487
v_mov_b32_e32 v72, 0x35c55945
v_cmp_gt_f32_e32 vcc, s24, v14
v_mul_f32_e32 v46, v46, v63
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v66, v66, v68
v_mul_f32_e32 v66, v69, v66
v_mad_f32 v65, v65, v67, -v66
v_mul_f32_e32 v66, v47, v14
v_mul_f32_e32 v67, v66, v66
v_mov_b32_e32 v68, 0x3a92b707
v_madak_f32_e32 v68, v68, v67, 0x3ded3cb2
v_madak_f32_e32 v70, v70, v67, 0x3f01e2bc
v_mad_f32 v68, v68, v67, 1.0
v_mac_f32_e32 v68, v66, v70
v_mov_b32_e32 v70, 0xb2951928
v_madak_f32_e32 v70, v70, v67, 0xb85ffb93
v_madak_f32_e32 v72, v72, v67, 0x3a83ca0c
v_madak_f32_e32 v70, v70, v67, 0xbc9ded90
v_madak_f32_e32 v72, v72, v67, 0x3d8eaf3b
v_madak_f32_e32 v67, v70, v67, 0xbf409397
v_mac_f32_e32 v67, v66, v72
v_rcp_f32_e32 v66, v68
v_cndmask_b32_e64 v14, 0, 1.0, vcc
v_mul_f32_e32 v14, v14, v63
v_mul_f32_e32 v14, v69, v14
v_mul_f32_e32 v66, v51, v66
v_mul_f32_e32 v66, v67, v66
v_mac_f32_e32 v66, v30, v46
v_mul_f32_e32 v30, v65, v14
v_mad_f32 v30, v38, v66, -v30
v_mul_f32_e32 v38, v66, v38
v_mad_f32 v14, v14, v65, -v38
v_mad_f32 v76, v26, v14, v76
v_mad_f32 v73, v22, v14, v73
v_mac_f32_e32 v71, v18, v14
v_mad_f32 v10, v26, v30, v10
v_mad_f32 v9, v22, v30, v9
v_mac_f32_e32 v8, v18, v30
BB5_162: ; %Flow1146
; in Loop: Header=BB5_7 Depth=1
s_or_b64 exec, exec, s[22:23]
BB5_163: ; in Loop: Header=BB5_7 Depth=1
s_or_b64 exec, exec, s[18:19]
s_mov_b32 m0, -1
v_cmp_gt_i32_e32 vcc, 3, v2
ds_write_b32 v5, v71
ds_write_b32 v6, v73
ds_write_b32 v7, v76
s_and_saveexec_b64 s[18:19], vcc
s_xor_b64 s[18:19], exec, s[18:19]
s_waitcnt lgkmcnt(0)
; mask branch BB5_169
s_cbranch_execz BB5_169
BB5_164: ; in Loop: Header=BB5_7 Depth=1
v_lshlrev_b32_e32 v18, 6, v2
v_add_i32_e32 v14, vcc, v0, v18
v_lshlrev_b32_e32 v14, 2, v14
v_add_i32_e32 v22, vcc, s4, v14
s_mov_b32 m0, -1
ds_read_b32 v14, v22
v_add_i32_e32 v26, vcc, 8, v0
v_or_b32_e32 v30, 1, v0
v_cmp_lt_i32_e32 vcc, v30, v26
s_and_saveexec_b64 s[22:23], vcc
s_xor_b64 s[22:23], exec, s[22:23]
s_waitcnt lgkmcnt(0)
; mask branch BB5_166
s_cbranch_execz BB5_166
BB5_165: ; in Loop: Header=BB5_7 Depth=1
s_mov_b32 m0, -1
ds_read2_b32 v[65:66], v22 offset0:1 offset1:2
v_or_b32_e32 v26, 3, v0
v_add_i32_e32 v18, vcc, v26, v18
v_lshlrev_b32_e32 v18, 2, v18
ds_read2_b32 v[67:68], v22 offset0:3 offset1:4
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v14, v14, v65
v_add_i32_e32 v18, vcc, s4, v18
v_add_f32_e32 v14, v66, v14
ds_read2_b32 v[65:66], v18 offset0:2 offset1:3
ds_read_b32 v22, v22 offset:28
v_add_f32_e32 v14, v67, v14
v_add_f32_e32 v14, v68, v14
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v14, v65, v14
v_add_f32_e32 v14, v66, v14
v_add_f32_e32 v14, v22, v14
BB5_166: ; %._crit_edge.i
; in Loop: Header=BB5_7 Depth=1
s_or_b64 exec, exec, s[22:23]
v_mul_lo_i32 v18, v64, 3
v_mov_b32_e32 v22, s13
s_mov_b64 s[36:37], s[12:13]
s_mov_b64 s[38:39], s[30:31]
v_add_i32_e32 v63, vcc, v18, v2
v_ashrrev_i32_e32 v64, 31, v63
v_lshl_b64 v[65:66], v[63:64], 2
v_add_i32_e32 v63, vcc, s12, v65
v_addc_u32_e32 v64, vcc, v66, v22, vcc
buffer_load_dword v66, v[65:66], s[36:39], 0 addr64
s_mov_b64 s[22:23], 0
s_waitcnt vmcnt(0)
BB5_167: ; Parent Loop BB5_7 Depth=1
; => This Inner Loop Header: Depth=2
v_add_f32_e32 v65, v14, v66
v_mov_b32_e32 v68, v66
v_mov_b32_e32 v67, v65
buffer_atomic_cmpswap v[67:68], v[63:64], s[28:31], 0 addr64 glc
v_mov_b32_e32 v18, -1
v_mov_b32_e32 v18, 0xf000
s_waitcnt vmcnt(0) expcnt(0)
v_cmp_eq_u32_e32 vcc, v67, v66
s_or_b64 s[22:23], vcc, s[22:23]
v_mov_b32_e32 v66, v67
s_andn2_b64 exec, exec, s[22:23]
s_cbranch_execnz BB5_167
; BB#168: ; %Flow1144
; in Loop: Header=BB5_7 Depth=1
s_or_b64 exec, exec, s[22:23]
BB5_169: ; %Flow1145
; in Loop: Header=BB5_7 Depth=1
s_or_b64 exec, exec, s[18:19]
BB5_170: ; %Flow1154
; in Loop: Header=BB5_7 Depth=1
s_or_b64 exec, exec, s[14:15]
BB5_171: ; %Flow1188
; in Loop: Header=BB5_7 Depth=1
s_or_b64 exec, exec, s[34:35]
v_add_i32_e32 v55, vcc, 1, v55
v_addc_u32_e32 v56, vcc, 0, v56, vcc
v_cmp_ne_u32_e32 vcc, v55, v34
s_and_b64 vcc, exec, vcc
s_cbranch_vccnz BB5_7
BB5_172: ; %Flow1191
s_mov_b32 m0, -1
ds_write_b32 v5, v43
ds_write_b32 v6, v44
ds_write_b32 v7, v45
s_waitcnt lgkmcnt(0)
s_barrier
s_load_dword s0, s[6:7], 0x32
v_cmp_ne_u32_e32 vcc, 22, v32
v_lshlrev_b32_e32 v18, 2, v39
v_mov_b32_e32 v3, 0
v_lshlrev_b32_e32 v14, 6, v31
s_waitcnt lgkmcnt(0)
v_cmp_ne_u32_e64 s[0:1], s0, 0
s_and_b64 s[2:3], s[0:1], vcc
v_add_i32_e32 v18, vcc, s4, v18
v_add_i32_e32 v26, vcc, 64, v2
v_add_i32_e32 v22, vcc, 0x80, v2
v_cmp_gt_i32_e64 s[0:1], 4, v1
s_and_saveexec_b64 s[6:7], s[0:1]
s_xor_b64 s[6:7], exec, s[6:7]
; mask branch BB5_183
s_cbranch_execz BB5_183
BB5_173:
s_mov_b32 m0, -1
ds_read_b32 v3, v18 offset:128
ds_read_b32 v30, v18
v_add_i32_e32 v31, vcc, v0, v26
v_lshlrev_b32_e32 v31, 2, v31
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v3, v3, v30
ds_write_b32 v18, v3
v_add_i32_e32 v30, vcc, s4, v31
s_waitcnt lgkmcnt(0)
ds_read_b32 v3, v30 offset:128
ds_read_b32 v31, v18 offset:256
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v3, v3, v31
ds_write_b32 v18, v3 offset:256
s_waitcnt lgkmcnt(0)
v_add_i32_e32 v3, vcc, v0, v22
v_lshlrev_b32_e32 v3, 2, v3
v_add_i32_e32 v31, vcc, s4, v3
ds_read_b32 v3, v31 offset:128
ds_read_b32 v32, v18 offset:512
v_cmp_lt_i32_e32 vcc, 1, v1
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v3, v3, v32
v_mov_b32_e32 v32, 0
ds_write_b32 v18, v3 offset:512
s_waitcnt lgkmcnt(0)
; implicit-def: %VGPR3
s_and_saveexec_b64 s[10:11], vcc
s_xor_b64 s[10:11], exec, s[10:11]
; mask branch BB5_175
BB5_174:
v_cmp_eq_u32_e32 vcc, 2, v1
v_mov_b32_e32 v3, 0
v_cndmask_b32_e64 v32, 0, -1, vcc
BB5_175: ; %Flow1141
s_or_saveexec_b64 s[10:11], s[10:11]
s_xor_b64 exec, exec, s[10:11]
; mask branch BB5_177
s_cbranch_execz BB5_177
BB5_176: ; %.thread85.i
s_mov_b32 m0, -1
ds_read_b32 v32, v18 offset:64
ds_read_b32 v33, v18
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v32, v32, v33
ds_write_b32 v18, v32
ds_read_b32 v30, v30 offset:64
s_waitcnt lgkmcnt(0)
ds_read_b32 v32, v18 offset:256
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v30, v30, v32
ds_write_b32 v18, v30 offset:256
s_waitcnt lgkmcnt(0)
ds_read_b32 v30, v31 offset:64
ds_read_b32 v31, v18 offset:512
v_mov_b32_e32 v32, -1
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v30, v30, v31
ds_write_b32 v18, v30 offset:512
s_waitcnt lgkmcnt(0)
BB5_177: ; %Flow1142
s_or_b64 exec, exec, s[10:11]
v_cmp_ne_u32_e32 vcc, 0, v32
s_and_saveexec_b64 s[10:11], vcc
s_xor_b64 s[10:11], exec, s[10:11]
; mask branch BB5_182
s_cbranch_execz BB5_182
BB5_178:
v_mov_b32_e32 v30, 0xe0
v_mad_i32_i24 v30, v30, v1, v18
s_mov_b32 m0, -1
v_add_i32_e32 v3, vcc, v14, v2
ds_read_b32 v31, v30
ds_read_b32 v30, v30 offset:32
v_mul_lo_i32 v3, v3, 3
s_mov_b32 s19, 0xf000
s_mov_b32 s18, 0
s_mov_b64 s[16:17], s[12:13]
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v30, v31, v30
v_add_i32_e32 v31, vcc, v1, v3
v_ashrrev_i32_e32 v32, 31, v31
v_lshl_b64 v[33:34], v[31:32], 2
v_add_i32_e32 v31, vcc, s12, v33
v_mov_b32_e32 v3, s13
v_addc_u32_e32 v32, vcc, v34, v3, vcc
buffer_load_dword v34, v[33:34], s[16:19], 0 addr64
s_mov_b64 s[16:17], 0
s_mov_b64 s[14:15], s[16:17]
s_waitcnt vmcnt(0)
BB5_179: ; =>This Inner Loop Header: Depth=1
v_add_f32_e32 v33, v30, v34
v_mov_b32_e32 v39, v34
v_mov_b32_e32 v38, v33
buffer_atomic_cmpswap v[38:39], v[31:32], s[16:19], 0 addr64 glc
v_mov_b32_e32 v3, -1
v_mov_b32_e32 v3, 0xf000
s_waitcnt vmcnt(0) expcnt(0)
v_cmp_eq_u32_e32 vcc, v38, v34
s_or_b64 s[14:15], vcc, s[14:15]
v_mov_b32_e32 v34, v38
s_andn2_b64 exec, exec, s[14:15]
s_cbranch_execnz BB5_179
; BB#180: ; %atomicAdd_g_f.exit.i
s_or_b64 exec, exec, s[14:15]
s_and_b64 s[14:15], exec, s[2:3]
v_cndmask_b32_e64 v31, 0, 1, s[14:15]
v_cmp_ne_u32_e32 vcc, 1, v31
v_mov_b32_e32 v3, 0
s_and_b64 vcc, exec, vcc
s_cbranch_vccnz BB5_182
; BB#181:
v_mov_b32_e32 v3, v30
BB5_182: ; %Flow1143
s_or_b64 exec, exec, s[10:11]
BB5_183: ; %reduce_force_i_pow2.exit
s_or_b64 exec, exec, s[6:7]
s_mov_b32 m0, -1
s_barrier
ds_write_b32 v5, v35
ds_write_b32 v6, v36
ds_write_b32 v7, v37
s_waitcnt lgkmcnt(0)
s_barrier
s_and_saveexec_b64 s[6:7], s[0:1]
s_xor_b64 s[6:7], exec, s[6:7]
; mask branch BB5_194
s_cbranch_execz BB5_194
BB5_184:
s_mov_b32 m0, -1
ds_read_b32 v30, v18 offset:128
ds_read_b32 v31, v18
v_add_i32_e32 v32, vcc, v0, v26
v_lshlrev_b32_e32 v32, 2, v32
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v30, v30, v31
ds_write_b32 v18, v30
s_waitcnt lgkmcnt(0)
v_add_i32_e32 v30, vcc, s4, v32
ds_read_b32 v31, v30 offset:128
ds_read_b32 v32, v18 offset:256
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v31, v31, v32
ds_write_b32 v18, v31 offset:256
s_waitcnt lgkmcnt(0)
v_add_i32_e32 v31, vcc, v0, v22
v_lshlrev_b32_e32 v31, 2, v31
v_add_i32_e32 v31, vcc, s4, v31
ds_read_b32 v32, v31 offset:128
ds_read_b32 v33, v18 offset:512
v_cmp_lt_i32_e32 vcc, 1, v1
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v32, v32, v33
ds_write_b32 v18, v32 offset:512
s_waitcnt lgkmcnt(0)
v_mov_b32_e32 v32, 0
s_and_saveexec_b64 s[10:11], vcc
s_xor_b64 s[10:11], exec, s[10:11]
; mask branch BB5_186
BB5_185:
v_cmp_eq_u32_e32 vcc, 2, v1
v_cndmask_b32_e64 v32, 0, -1, vcc
BB5_186: ; %Flow1138
s_or_saveexec_b64 s[10:11], s[10:11]
s_xor_b64 exec, exec, s[10:11]
; mask branch BB5_188
s_cbranch_execz BB5_188
BB5_187: ; %.thread85.i491
s_mov_b32 m0, -1
ds_read_b32 v32, v18 offset:64
ds_read_b32 v33, v18
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v32, v32, v33
ds_write_b32 v18, v32
ds_read_b32 v30, v30 offset:64
s_waitcnt lgkmcnt(0)
ds_read_b32 v32, v18 offset:256
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v30, v30, v32
ds_write_b32 v18, v30 offset:256
s_waitcnt lgkmcnt(0)
ds_read_b32 v30, v31 offset:64
ds_read_b32 v31, v18 offset:512
v_mov_b32_e32 v32, -1
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v30, v30, v31
ds_write_b32 v18, v30 offset:512
s_waitcnt lgkmcnt(0)
BB5_188: ; %Flow1139
s_or_b64 exec, exec, s[10:11]
v_cmp_ne_u32_e32 vcc, 0, v32
s_and_saveexec_b64 s[10:11], vcc
s_xor_b64 s[10:11], exec, s[10:11]
; mask branch BB5_193
s_cbranch_execz BB5_193
BB5_189:
v_or_b32_e32 v30, 8, v14
v_add_i32_e32 v30, vcc, v30, v2
v_mul_lo_i32 v31, v30, 3
v_mov_b32_e32 v30, 0xe0
v_mad_i32_i24 v30, v30, v1, v18
s_mov_b32 m0, -1
ds_read_b32 v32, v30
ds_read_b32 v30, v30 offset:32
v_add_i32_e32 v31, vcc, v1, v31
s_mov_b32 s19, 0xf000
s_mov_b32 s18, 0
s_mov_b64 s[16:17], s[12:13]
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v30, v32, v30
v_ashrrev_i32_e32 v32, 31, v31
v_lshl_b64 v[33:34], v[31:32], 2
v_add_i32_e32 v31, vcc, s12, v33
v_mov_b32_e32 v32, s13
v_addc_u32_e32 v32, vcc, v34, v32, vcc
buffer_load_dword v34, v[33:34], s[16:19], 0 addr64
s_mov_b64 s[16:17], 0
s_mov_b64 s[14:15], s[16:17]
s_waitcnt vmcnt(0)
BB5_190: ; =>This Inner Loop Header: Depth=1
v_mov_b32_e32 v33, -1
v_add_f32_e32 v33, v30, v34
v_mov_b32_e32 v36, v34
v_mov_b32_e32 v35, v33
buffer_atomic_cmpswap v[35:36], v[31:32], s[16:19], 0 addr64 glc
v_mov_b32_e32 v33, 0xf000
s_waitcnt vmcnt(0) expcnt(0)
v_cmp_eq_u32_e32 vcc, v35, v34
s_or_b64 s[14:15], vcc, s[14:15]
v_mov_b32_e32 v34, v35
s_andn2_b64 exec, exec, s[14:15]
s_cbranch_execnz BB5_190
; BB#191: ; %atomicAdd_g_f.exit.i479
s_or_b64 exec, exec, s[14:15]
s_and_b64 s[14:15], exec, s[2:3]
v_cndmask_b32_e64 v31, 0, 1, s[14:15]
v_cmp_ne_u32_e32 vcc, 1, v31
s_and_b64 vcc, exec, vcc
s_cbranch_vccnz BB5_193
; BB#192:
v_add_f32_e32 v3, v30, v3
BB5_193: ; %Flow1140
s_or_b64 exec, exec, s[10:11]
BB5_194: ; %reduce_force_i_pow2.exit493
s_or_b64 exec, exec, s[6:7]
s_mov_b32 m0, -1
s_barrier
ds_write_b32 v5, v27
ds_write_b32 v6, v28
ds_write_b32 v7, v29
s_waitcnt lgkmcnt(0)
s_barrier
s_and_saveexec_b64 s[6:7], s[0:1]
s_xor_b64 s[6:7], exec, s[6:7]
; mask branch BB5_205
s_cbranch_execz BB5_205
BB5_195:
s_mov_b32 m0, -1
ds_read_b32 v27, v18 offset:128
ds_read_b32 v28, v18
v_add_i32_e32 v29, vcc, v0, v26
v_lshlrev_b32_e32 v29, 2, v29
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v27, v27, v28
ds_write_b32 v18, v27
s_waitcnt lgkmcnt(0)
v_add_i32_e32 v27, vcc, s4, v29
ds_read_b32 v28, v27 offset:128
ds_read_b32 v29, v18 offset:256
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v28, v28, v29
ds_write_b32 v18, v28 offset:256
s_waitcnt lgkmcnt(0)
v_add_i32_e32 v28, vcc, v0, v22
v_lshlrev_b32_e32 v28, 2, v28
v_add_i32_e32 v28, vcc, s4, v28
ds_read_b32 v29, v28 offset:128
ds_read_b32 v30, v18 offset:512
v_cmp_lt_i32_e32 vcc, 1, v1
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v29, v29, v30
ds_write_b32 v18, v29 offset:512
s_waitcnt lgkmcnt(0)
v_mov_b32_e32 v29, 0
s_and_saveexec_b64 s[10:11], vcc
s_xor_b64 s[10:11], exec, s[10:11]
; mask branch BB5_197
BB5_196:
v_cmp_eq_u32_e32 vcc, 2, v1
v_cndmask_b32_e64 v29, 0, -1, vcc
BB5_197: ; %Flow1135
s_or_saveexec_b64 s[10:11], s[10:11]
s_xor_b64 exec, exec, s[10:11]
; mask branch BB5_199
s_cbranch_execz BB5_199
BB5_198: ; %.thread85.i442
s_mov_b32 m0, -1
ds_read_b32 v29, v18 offset:64
ds_read_b32 v30, v18
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v29, v29, v30
ds_write_b32 v18, v29
ds_read_b32 v27, v27 offset:64
s_waitcnt lgkmcnt(0)
ds_read_b32 v29, v18 offset:256
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v27, v27, v29
ds_write_b32 v18, v27 offset:256
s_waitcnt lgkmcnt(0)
ds_read_b32 v27, v28 offset:64
ds_read_b32 v28, v18 offset:512
v_mov_b32_e32 v29, -1
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v27, v27, v28
ds_write_b32 v18, v27 offset:512
s_waitcnt lgkmcnt(0)
BB5_199: ; %Flow1136
s_or_b64 exec, exec, s[10:11]
v_cmp_ne_u32_e32 vcc, 0, v29
s_and_saveexec_b64 s[10:11], vcc
s_xor_b64 s[10:11], exec, s[10:11]
; mask branch BB5_204
s_cbranch_execz BB5_204
BB5_200:
v_or_b32_e32 v27, 16, v14
v_add_i32_e32 v27, vcc, v27, v2
v_mul_lo_i32 v28, v27, 3
v_mov_b32_e32 v27, 0xe0
v_mad_i32_i24 v27, v27, v1, v18
s_mov_b32 m0, -1
ds_read_b32 v29, v27
ds_read_b32 v27, v27 offset:32
v_add_i32_e32 v28, vcc, v1, v28
s_mov_b32 s19, 0xf000
s_mov_b32 s18, 0
s_mov_b64 s[16:17], s[12:13]
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v27, v29, v27
v_ashrrev_i32_e32 v29, 31, v28
v_lshl_b64 v[30:31], v[28:29], 2
v_add_i32_e32 v28, vcc, s12, v30
v_mov_b32_e32 v29, s13
v_addc_u32_e32 v29, vcc, v31, v29, vcc
buffer_load_dword v31, v[30:31], s[16:19], 0 addr64
s_mov_b64 s[16:17], 0
s_mov_b64 s[14:15], s[16:17]
s_waitcnt vmcnt(0)
BB5_201: ; =>This Inner Loop Header: Depth=1
v_mov_b32_e32 v30, -1
v_add_f32_e32 v30, v27, v31
v_mov_b32_e32 v33, v31
v_mov_b32_e32 v32, v30
buffer_atomic_cmpswap v[32:33], v[28:29], s[16:19], 0 addr64 glc
v_mov_b32_e32 v30, 0xf000
s_waitcnt vmcnt(0) expcnt(0)
v_cmp_eq_u32_e32 vcc, v32, v31
s_or_b64 s[14:15], vcc, s[14:15]
v_mov_b32_e32 v31, v32
s_andn2_b64 exec, exec, s[14:15]
s_cbranch_execnz BB5_201
; BB#202: ; %atomicAdd_g_f.exit.i430
s_or_b64 exec, exec, s[14:15]
s_and_b64 s[14:15], exec, s[2:3]
v_cndmask_b32_e64 v28, 0, 1, s[14:15]
v_cmp_ne_u32_e32 vcc, 1, v28
s_and_b64 vcc, exec, vcc
s_cbranch_vccnz BB5_204
; BB#203:
v_add_f32_e32 v3, v27, v3
BB5_204: ; %Flow1137
s_or_b64 exec, exec, s[10:11]
BB5_205: ; %reduce_force_i_pow2.exit444
s_or_b64 exec, exec, s[6:7]
s_mov_b32 m0, -1
s_barrier
ds_write_b32 v5, v23
ds_write_b32 v6, v24
ds_write_b32 v7, v25
s_waitcnt lgkmcnt(0)
s_barrier
s_and_saveexec_b64 s[6:7], s[0:1]
s_xor_b64 s[6:7], exec, s[6:7]
; mask branch BB5_216
s_cbranch_execz BB5_216
BB5_206:
s_mov_b32 m0, -1
ds_read_b32 v23, v18 offset:128
ds_read_b32 v24, v18
v_add_i32_e32 v25, vcc, v0, v26
v_lshlrev_b32_e32 v25, 2, v25
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v23, v23, v24
ds_write_b32 v18, v23
s_waitcnt lgkmcnt(0)
v_add_i32_e32 v23, vcc, s4, v25
ds_read_b32 v24, v23 offset:128
ds_read_b32 v25, v18 offset:256
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v24, v24, v25
ds_write_b32 v18, v24 offset:256
s_waitcnt lgkmcnt(0)
v_add_i32_e32 v24, vcc, v0, v22
v_lshlrev_b32_e32 v24, 2, v24
v_add_i32_e32 v24, vcc, s4, v24
ds_read_b32 v25, v24 offset:128
ds_read_b32 v27, v18 offset:512
v_cmp_lt_i32_e32 vcc, 1, v1
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v25, v25, v27
ds_write_b32 v18, v25 offset:512
s_waitcnt lgkmcnt(0)
v_mov_b32_e32 v25, 0
s_and_saveexec_b64 s[10:11], vcc
s_xor_b64 s[10:11], exec, s[10:11]
; mask branch BB5_208
BB5_207:
v_cmp_eq_u32_e32 vcc, 2, v1
v_cndmask_b32_e64 v25, 0, -1, vcc
BB5_208: ; %Flow1132
s_or_saveexec_b64 s[10:11], s[10:11]
s_xor_b64 exec, exec, s[10:11]
; mask branch BB5_210
s_cbranch_execz BB5_210
BB5_209: ; %.thread85.i393
s_mov_b32 m0, -1
ds_read_b32 v25, v18 offset:64
ds_read_b32 v27, v18
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v25, v25, v27
ds_write_b32 v18, v25
ds_read_b32 v23, v23 offset:64
s_waitcnt lgkmcnt(0)
ds_read_b32 v25, v18 offset:256
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v23, v23, v25
ds_write_b32 v18, v23 offset:256
s_waitcnt lgkmcnt(0)
ds_read_b32 v23, v24 offset:64
ds_read_b32 v24, v18 offset:512
v_mov_b32_e32 v25, -1
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v23, v23, v24
ds_write_b32 v18, v23 offset:512
s_waitcnt lgkmcnt(0)
BB5_210: ; %Flow1133
s_or_b64 exec, exec, s[10:11]
v_cmp_ne_u32_e32 vcc, 0, v25
s_and_saveexec_b64 s[10:11], vcc
s_xor_b64 s[10:11], exec, s[10:11]
; mask branch BB5_215
s_cbranch_execz BB5_215
BB5_211:
v_or_b32_e32 v23, 24, v14
v_add_i32_e32 v23, vcc, v23, v2
v_mul_lo_i32 v24, v23, 3
v_mov_b32_e32 v23, 0xe0
v_mad_i32_i24 v23, v23, v1, v18
s_mov_b32 m0, -1
ds_read_b32 v25, v23
ds_read_b32 v23, v23 offset:32
v_add_i32_e32 v24, vcc, v1, v24
v_mov_b32_e32 v28, s13
s_mov_b32 s19, 0xf000
s_mov_b32 s18, 0
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v23, v25, v23
v_ashrrev_i32_e32 v25, 31, v24
v_lshl_b64 v[24:25], v[24:25], 2
v_add_i32_e32 v27, vcc, s12, v24
s_mov_b64 s[16:17], s[12:13]
v_addc_u32_e32 v28, vcc, v25, v28, vcc
buffer_load_dword v25, v[24:25], s[16:19], 0 addr64
s_mov_b64 s[16:17], 0
s_mov_b64 s[14:15], s[16:17]
s_waitcnt vmcnt(0)
BB5_212: ; =>This Inner Loop Header: Depth=1
v_mov_b32_e32 v24, -1
v_add_f32_e32 v24, v23, v25
v_mov_b32_e32 v30, v25
v_mov_b32_e32 v29, v24
buffer_atomic_cmpswap v[29:30], v[27:28], s[16:19], 0 addr64 glc
v_mov_b32_e32 v24, 0xf000
s_waitcnt vmcnt(0) expcnt(0)
v_cmp_eq_u32_e32 vcc, v29, v25
s_or_b64 s[14:15], vcc, s[14:15]
v_mov_b32_e32 v25, v29
s_andn2_b64 exec, exec, s[14:15]
s_cbranch_execnz BB5_212
; BB#213: ; %atomicAdd_g_f.exit.i381
s_or_b64 exec, exec, s[14:15]
s_and_b64 s[14:15], exec, s[2:3]
v_cndmask_b32_e64 v24, 0, 1, s[14:15]
v_cmp_ne_u32_e32 vcc, 1, v24
s_and_b64 vcc, exec, vcc
s_cbranch_vccnz BB5_215
; BB#214:
v_add_f32_e32 v3, v23, v3
BB5_215: ; %Flow1134
s_or_b64 exec, exec, s[10:11]
BB5_216: ; %reduce_force_i_pow2.exit395
s_or_b64 exec, exec, s[6:7]
s_mov_b32 m0, -1
s_barrier
ds_write_b32 v5, v19
ds_write_b32 v6, v20
ds_write_b32 v7, v21
s_waitcnt lgkmcnt(0)
s_barrier
s_and_saveexec_b64 s[6:7], s[0:1]
s_xor_b64 s[6:7], exec, s[6:7]
; mask branch BB5_227
s_cbranch_execz BB5_227
BB5_217:
s_mov_b32 m0, -1
ds_read_b32 v19, v18 offset:128
ds_read_b32 v20, v18
v_add_i32_e32 v21, vcc, v0, v26
v_lshlrev_b32_e32 v21, 2, v21
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v19, v19, v20
ds_write_b32 v18, v19
s_waitcnt lgkmcnt(0)
v_add_i32_e32 v19, vcc, s4, v21
ds_read_b32 v20, v19 offset:128
ds_read_b32 v21, v18 offset:256
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v20, v20, v21
ds_write_b32 v18, v20 offset:256
s_waitcnt lgkmcnt(0)
v_add_i32_e32 v20, vcc, v0, v22
v_lshlrev_b32_e32 v20, 2, v20
v_add_i32_e32 v20, vcc, s4, v20
ds_read_b32 v21, v20 offset:128
ds_read_b32 v23, v18 offset:512
v_cmp_lt_i32_e32 vcc, 1, v1
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v21, v21, v23
ds_write_b32 v18, v21 offset:512
s_waitcnt lgkmcnt(0)
v_mov_b32_e32 v21, 0
s_and_saveexec_b64 s[10:11], vcc
s_xor_b64 s[10:11], exec, s[10:11]
; mask branch BB5_219
BB5_218:
v_cmp_eq_u32_e32 vcc, 2, v1
v_cndmask_b32_e64 v21, 0, -1, vcc
BB5_219: ; %Flow1129
s_or_saveexec_b64 s[10:11], s[10:11]
s_xor_b64 exec, exec, s[10:11]
; mask branch BB5_221
s_cbranch_execz BB5_221
BB5_220: ; %.thread85.i344
s_mov_b32 m0, -1
ds_read_b32 v21, v18 offset:64
ds_read_b32 v23, v18
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v21, v21, v23
ds_write_b32 v18, v21
ds_read_b32 v19, v19 offset:64
s_waitcnt lgkmcnt(0)
ds_read_b32 v21, v18 offset:256
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v19, v19, v21
ds_write_b32 v18, v19 offset:256
s_waitcnt lgkmcnt(0)
ds_read_b32 v19, v20 offset:64
ds_read_b32 v20, v18 offset:512
v_mov_b32_e32 v21, -1
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v19, v19, v20
ds_write_b32 v18, v19 offset:512
s_waitcnt lgkmcnt(0)
BB5_221: ; %Flow1130
s_or_b64 exec, exec, s[10:11]
v_cmp_ne_u32_e32 vcc, 0, v21
s_and_saveexec_b64 s[10:11], vcc
s_xor_b64 s[10:11], exec, s[10:11]
; mask branch BB5_226
s_cbranch_execz BB5_226
BB5_222:
v_or_b32_e32 v19, 32, v14
v_add_i32_e32 v19, vcc, v19, v2
v_mul_lo_i32 v20, v19, 3
v_mov_b32_e32 v19, 0xe0
v_mad_i32_i24 v19, v19, v1, v18
s_mov_b32 m0, -1
ds_read_b32 v21, v19
ds_read_b32 v19, v19 offset:32
v_add_i32_e32 v20, vcc, v1, v20
v_mov_b32_e32 v23, s13
s_mov_b32 s19, 0xf000
s_mov_b32 s18, 0
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v19, v21, v19
v_ashrrev_i32_e32 v21, 31, v20
v_lshl_b64 v[20:21], v[20:21], 2
v_add_i32_e32 v27, vcc, s12, v20
s_mov_b64 s[16:17], s[12:13]
v_addc_u32_e32 v28, vcc, v21, v23, vcc
buffer_load_dword v21, v[20:21], s[16:19], 0 addr64
s_mov_b64 s[16:17], 0
s_mov_b64 s[14:15], s[16:17]
s_waitcnt vmcnt(0)
BB5_223: ; =>This Inner Loop Header: Depth=1
v_mov_b32_e32 v20, -1
v_add_f32_e32 v20, v19, v21
v_mov_b32_e32 v24, v21
v_mov_b32_e32 v23, v20
buffer_atomic_cmpswap v[23:24], v[27:28], s[16:19], 0 addr64 glc
v_mov_b32_e32 v20, 0xf000
s_waitcnt vmcnt(0) expcnt(0)
v_cmp_eq_u32_e32 vcc, v23, v21
s_or_b64 s[14:15], vcc, s[14:15]
v_mov_b32_e32 v21, v23
s_andn2_b64 exec, exec, s[14:15]
s_cbranch_execnz BB5_223
; BB#224: ; %atomicAdd_g_f.exit.i332
s_or_b64 exec, exec, s[14:15]
s_and_b64 s[14:15], exec, s[2:3]
v_cndmask_b32_e64 v20, 0, 1, s[14:15]
v_cmp_ne_u32_e32 vcc, 1, v20
s_and_b64 vcc, exec, vcc
s_cbranch_vccnz BB5_226
; BB#225:
v_add_f32_e32 v3, v19, v3
BB5_226: ; %Flow1131
s_or_b64 exec, exec, s[10:11]
BB5_227: ; %reduce_force_i_pow2.exit346
s_or_b64 exec, exec, s[6:7]
s_mov_b32 m0, -1
s_barrier
ds_write_b32 v5, v15
ds_write_b32 v6, v16
ds_write_b32 v7, v17
s_waitcnt lgkmcnt(0)
s_barrier
s_and_saveexec_b64 s[6:7], s[0:1]
s_xor_b64 s[6:7], exec, s[6:7]
; mask branch BB5_238
s_cbranch_execz BB5_238
BB5_228:
s_mov_b32 m0, -1
ds_read_b32 v15, v18 offset:128
ds_read_b32 v16, v18
v_add_i32_e32 v17, vcc, v0, v26
v_lshlrev_b32_e32 v17, 2, v17
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v15, v15, v16
ds_write_b32 v18, v15
s_waitcnt lgkmcnt(0)
v_add_i32_e32 v15, vcc, s4, v17
ds_read_b32 v16, v15 offset:128
ds_read_b32 v17, v18 offset:256
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v16, v16, v17
ds_write_b32 v18, v16 offset:256
s_waitcnt lgkmcnt(0)
v_add_i32_e32 v16, vcc, v0, v22
v_lshlrev_b32_e32 v16, 2, v16
v_add_i32_e32 v16, vcc, s4, v16
ds_read_b32 v17, v16 offset:128
ds_read_b32 v19, v18 offset:512
v_cmp_lt_i32_e32 vcc, 1, v1
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v17, v17, v19
ds_write_b32 v18, v17 offset:512
s_waitcnt lgkmcnt(0)
v_mov_b32_e32 v17, 0
s_and_saveexec_b64 s[10:11], vcc
s_xor_b64 s[10:11], exec, s[10:11]
; mask branch BB5_230
BB5_229:
v_cmp_eq_u32_e32 vcc, 2, v1
v_cndmask_b32_e64 v17, 0, -1, vcc
BB5_230: ; %Flow1126
s_or_saveexec_b64 s[10:11], s[10:11]
s_xor_b64 exec, exec, s[10:11]
; mask branch BB5_232
s_cbranch_execz BB5_232
BB5_231: ; %.thread85.i295
s_mov_b32 m0, -1
ds_read_b32 v17, v18 offset:64
ds_read_b32 v19, v18
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v17, v17, v19
ds_write_b32 v18, v17
ds_read_b32 v15, v15 offset:64
s_waitcnt lgkmcnt(0)
ds_read_b32 v17, v18 offset:256
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v15, v15, v17
ds_write_b32 v18, v15 offset:256
s_waitcnt lgkmcnt(0)
ds_read_b32 v15, v16 offset:64
ds_read_b32 v16, v18 offset:512
v_mov_b32_e32 v17, -1
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v15, v15, v16
ds_write_b32 v18, v15 offset:512
s_waitcnt lgkmcnt(0)
BB5_232: ; %Flow1127
s_or_b64 exec, exec, s[10:11]
v_cmp_ne_u32_e32 vcc, 0, v17
s_and_saveexec_b64 s[10:11], vcc
s_xor_b64 s[10:11], exec, s[10:11]
; mask branch BB5_237
s_cbranch_execz BB5_237
BB5_233:
v_or_b32_e32 v15, 40, v14
v_add_i32_e32 v15, vcc, v15, v2
v_mul_lo_i32 v16, v15, 3
v_mov_b32_e32 v15, 0xe0
v_mad_i32_i24 v15, v15, v1, v18
s_mov_b32 m0, -1
ds_read_b32 v17, v15
ds_read_b32 v15, v15 offset:32
v_add_i32_e32 v16, vcc, v1, v16
v_mov_b32_e32 v19, s13
s_mov_b32 s19, 0xf000
s_mov_b32 s18, 0
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v15, v17, v15
v_ashrrev_i32_e32 v17, 31, v16
v_lshl_b64 v[16:17], v[16:17], 2
v_add_i32_e32 v27, vcc, s12, v16
s_mov_b64 s[16:17], s[12:13]
v_addc_u32_e32 v28, vcc, v17, v19, vcc
buffer_load_dword v17, v[16:17], s[16:19], 0 addr64
s_mov_b64 s[16:17], 0
s_mov_b64 s[14:15], s[16:17]
s_waitcnt vmcnt(0)
BB5_234: ; =>This Inner Loop Header: Depth=1
v_mov_b32_e32 v16, -1
v_add_f32_e32 v16, v15, v17
v_mov_b32_e32 v20, v17
v_mov_b32_e32 v19, v16
buffer_atomic_cmpswap v[19:20], v[27:28], s[16:19], 0 addr64 glc
v_mov_b32_e32 v16, 0xf000
s_waitcnt vmcnt(0) expcnt(0)
v_cmp_eq_u32_e32 vcc, v19, v17
s_or_b64 s[14:15], vcc, s[14:15]
v_mov_b32_e32 v17, v19
s_andn2_b64 exec, exec, s[14:15]
s_cbranch_execnz BB5_234
; BB#235: ; %atomicAdd_g_f.exit.i283
s_or_b64 exec, exec, s[14:15]
s_and_b64 s[14:15], exec, s[2:3]
v_cndmask_b32_e64 v16, 0, 1, s[14:15]
v_cmp_ne_u32_e32 vcc, 1, v16
s_and_b64 vcc, exec, vcc
s_cbranch_vccnz BB5_237
; BB#236:
v_add_f32_e32 v3, v15, v3
BB5_237: ; %Flow1128
s_or_b64 exec, exec, s[10:11]
BB5_238: ; %reduce_force_i_pow2.exit297
s_or_b64 exec, exec, s[6:7]
s_mov_b32 m0, -1
s_barrier
ds_write_b32 v5, v11
ds_write_b32 v6, v12
ds_write_b32 v7, v13
s_waitcnt lgkmcnt(0)
s_barrier
s_and_saveexec_b64 s[6:7], s[0:1]
s_xor_b64 s[6:7], exec, s[6:7]
; mask branch BB5_249
s_cbranch_execz BB5_249
BB5_239:
s_mov_b32 m0, -1
ds_read_b32 v11, v18 offset:128
ds_read_b32 v12, v18
v_add_i32_e32 v13, vcc, v0, v26
v_lshlrev_b32_e32 v13, 2, v13
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v11, v11, v12
ds_write_b32 v18, v11
s_waitcnt lgkmcnt(0)
v_add_i32_e32 v11, vcc, s4, v13
ds_read_b32 v12, v11 offset:128
ds_read_b32 v13, v18 offset:256
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v12, v12, v13
ds_write_b32 v18, v12 offset:256
s_waitcnt lgkmcnt(0)
v_add_i32_e32 v12, vcc, v0, v22
v_lshlrev_b32_e32 v12, 2, v12
v_add_i32_e32 v12, vcc, s4, v12
ds_read_b32 v13, v12 offset:128
ds_read_b32 v15, v18 offset:512
v_cmp_lt_i32_e32 vcc, 1, v1
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v13, v13, v15
ds_write_b32 v18, v13 offset:512
s_waitcnt lgkmcnt(0)
v_mov_b32_e32 v13, 0
s_and_saveexec_b64 s[10:11], vcc
s_xor_b64 s[10:11], exec, s[10:11]
; mask branch BB5_241
BB5_240:
v_cmp_eq_u32_e32 vcc, 2, v1
v_cndmask_b32_e64 v13, 0, -1, vcc
BB5_241: ; %Flow1123
s_or_saveexec_b64 s[10:11], s[10:11]
s_xor_b64 exec, exec, s[10:11]
; mask branch BB5_243
s_cbranch_execz BB5_243
BB5_242: ; %.thread85.i246
s_mov_b32 m0, -1
ds_read_b32 v13, v18 offset:64
ds_read_b32 v15, v18
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v13, v13, v15
ds_write_b32 v18, v13
ds_read_b32 v11, v11 offset:64
s_waitcnt lgkmcnt(0)
ds_read_b32 v13, v18 offset:256
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v11, v11, v13
ds_write_b32 v18, v11 offset:256
s_waitcnt lgkmcnt(0)
ds_read_b32 v11, v12 offset:64
ds_read_b32 v12, v18 offset:512
v_mov_b32_e32 v13, -1
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v11, v11, v12
ds_write_b32 v18, v11 offset:512
s_waitcnt lgkmcnt(0)
BB5_243: ; %Flow1124
s_or_b64 exec, exec, s[10:11]
v_cmp_ne_u32_e32 vcc, 0, v13
s_and_saveexec_b64 s[10:11], vcc
s_xor_b64 s[10:11], exec, s[10:11]
; mask branch BB5_248
s_cbranch_execz BB5_248
BB5_244:
v_or_b32_e32 v11, 48, v14
v_add_i32_e32 v11, vcc, v11, v2
v_mul_lo_i32 v12, v11, 3
v_mov_b32_e32 v11, 0xe0
v_mad_i32_i24 v11, v11, v1, v18
s_mov_b32 m0, -1
ds_read_b32 v13, v11
ds_read_b32 v11, v11 offset:32
v_add_i32_e32 v12, vcc, v1, v12
v_mov_b32_e32 v15, s13
s_mov_b32 s19, 0xf000
s_mov_b32 s18, 0
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v11, v13, v11
v_ashrrev_i32_e32 v13, 31, v12
v_lshl_b64 v[12:13], v[12:13], 2
v_add_i32_e32 v27, vcc, s12, v12
s_mov_b64 s[16:17], s[12:13]
v_addc_u32_e32 v28, vcc, v13, v15, vcc
buffer_load_dword v13, v[12:13], s[16:19], 0 addr64
s_mov_b64 s[16:17], 0
s_mov_b64 s[14:15], s[16:17]
s_waitcnt vmcnt(0)
BB5_245: ; =>This Inner Loop Header: Depth=1
v_mov_b32_e32 v12, -1
v_add_f32_e32 v12, v11, v13
v_mov_b32_e32 v16, v13
v_mov_b32_e32 v15, v12
buffer_atomic_cmpswap v[15:16], v[27:28], s[16:19], 0 addr64 glc
v_mov_b32_e32 v12, 0xf000
s_waitcnt vmcnt(0) expcnt(0)
v_cmp_eq_u32_e32 vcc, v15, v13
s_or_b64 s[14:15], vcc, s[14:15]
v_mov_b32_e32 v13, v15
s_andn2_b64 exec, exec, s[14:15]
s_cbranch_execnz BB5_245
; BB#246: ; %atomicAdd_g_f.exit.i234
s_or_b64 exec, exec, s[14:15]
s_and_b64 s[14:15], exec, s[2:3]
v_cndmask_b32_e64 v12, 0, 1, s[14:15]
v_cmp_ne_u32_e32 vcc, 1, v12
s_and_b64 vcc, exec, vcc
s_cbranch_vccnz BB5_248
; BB#247:
v_add_f32_e32 v3, v11, v3
BB5_248: ; %Flow1125
s_or_b64 exec, exec, s[10:11]
BB5_249: ; %reduce_force_i_pow2.exit248
s_or_b64 exec, exec, s[6:7]
s_mov_b32 m0, -1
s_barrier
ds_write_b32 v5, v8
ds_write_b32 v6, v9
ds_write_b32 v7, v10
s_waitcnt lgkmcnt(0)
s_barrier
s_and_saveexec_b64 s[6:7], s[0:1]
s_xor_b64 s[0:1], exec, s[6:7]
; mask branch BB5_260
s_cbranch_execz BB5_260
BB5_250:
s_mov_b32 m0, -1
ds_read_b32 v5, v18 offset:128
ds_read_b32 v6, v18
v_add_i32_e32 v7, vcc, v0, v26
v_lshlrev_b32_e32 v7, 2, v7
v_add_i32_e32 v0, vcc, v0, v22
v_lshlrev_b32_e32 v0, 2, v0
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v5, v5, v6
ds_write_b32 v18, v5
s_waitcnt lgkmcnt(0)
v_add_i32_e32 v5, vcc, s4, v7
ds_read_b32 v6, v5 offset:128
ds_read_b32 v7, v18 offset:256
v_add_i32_e32 v0, vcc, s4, v0
v_cmp_lt_i32_e32 vcc, 1, v1
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v6, v6, v7
ds_write_b32 v18, v6 offset:256
s_waitcnt lgkmcnt(0)
ds_read_b32 v6, v0 offset:128
ds_read_b32 v7, v18 offset:512
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v6, v6, v7
ds_write_b32 v18, v6 offset:512
s_waitcnt lgkmcnt(0)
v_mov_b32_e32 v6, 0
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[4:5], exec, s[4:5]
; mask branch BB5_252
BB5_251:
v_cmp_eq_u32_e32 vcc, 2, v1
v_cndmask_b32_e64 v6, 0, -1, vcc
BB5_252: ; %Flow1120
s_or_saveexec_b64 s[4:5], s[4:5]
s_xor_b64 exec, exec, s[4:5]
; mask branch BB5_254
s_cbranch_execz BB5_254
BB5_253: ; %.thread85.i197
s_mov_b32 m0, -1
ds_read_b32 v6, v18 offset:64
ds_read_b32 v7, v18
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v6, v6, v7
ds_write_b32 v18, v6
ds_read_b32 v5, v5 offset:64
s_waitcnt lgkmcnt(0)
ds_read_b32 v6, v18 offset:256
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v5, v5, v6
ds_write_b32 v18, v5 offset:256
ds_read_b32 v0, v0 offset:64
s_waitcnt lgkmcnt(0)
ds_read_b32 v5, v18 offset:512
v_mov_b32_e32 v6, -1
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v0, v0, v5
ds_write_b32 v18, v0 offset:512
s_waitcnt lgkmcnt(0)
BB5_254: ; %Flow1121
s_or_b64 exec, exec, s[4:5]
v_cmp_ne_u32_e32 vcc, 0, v6
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[4:5], exec, s[4:5]
; mask branch BB5_259
s_cbranch_execz BB5_259
BB5_255:
v_or_b32_e32 v0, 56, v14
v_add_i32_e32 v0, vcc, v0, v2
v_mul_lo_i32 v2, v0, 3
v_mov_b32_e32 v0, 0xe0
v_mad_i32_i24 v0, v0, v1, v18
s_mov_b32 m0, -1
ds_read_b32 v5, v0
ds_read_b32 v0, v0 offset:32
s_mov_b32 s15, 0xf000
s_mov_b32 s14, 0
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v0, v5, v0
v_add_i32_e32 v5, vcc, v1, v2
v_ashrrev_i32_e32 v6, 31, v5
v_lshl_b64 v[7:8], v[5:6], 2
v_add_i32_e32 v5, vcc, s12, v7
v_mov_b32_e32 v2, s13
v_addc_u32_e32 v6, vcc, v8, v2, vcc
buffer_load_dword v8, v[7:8], s[12:15], 0 addr64
s_mov_b64 s[12:13], 0
s_mov_b64 s[6:7], s[12:13]
s_waitcnt vmcnt(0)
BB5_256: ; =>This Inner Loop Header: Depth=1
v_add_f32_e32 v7, v0, v8
v_mov_b32_e32 v10, v8
v_mov_b32_e32 v9, v7
buffer_atomic_cmpswap v[9:10], v[5:6], s[12:15], 0 addr64 glc
v_mov_b32_e32 v2, -1
v_mov_b32_e32 v2, 0xf000
s_waitcnt vmcnt(0) expcnt(0)
v_cmp_eq_u32_e32 vcc, v9, v8
s_or_b64 s[6:7], vcc, s[6:7]
v_mov_b32_e32 v8, v9
s_andn2_b64 exec, exec, s[6:7]
s_cbranch_execnz BB5_256
; BB#257: ; %atomicAdd_g_f.exit.i185
s_or_b64 exec, exec, s[6:7]
s_and_b64 s[6:7], exec, s[2:3]
v_cndmask_b32_e64 v2, 0, 1, s[6:7]
v_cmp_ne_u32_e32 vcc, 1, v2
s_and_b64 vcc, exec, vcc
s_cbranch_vccnz BB5_259
; BB#258:
v_add_f32_e32 v3, v0, v3
BB5_259: ; %Flow1122
s_or_b64 exec, exec, s[4:5]
BB5_260: ; %reduce_force_i_pow2.exit199
s_or_b64 exec, exec, s[0:1]
s_barrier
v_cmp_gt_u32_e32 vcc, 3, v1
s_and_b64 s[0:1], exec, s[2:3]
s_and_b64 s[0:1], vcc, s[0:1]
s_and_saveexec_b64 s[2:3], s[0:1]
s_xor_b64 s[0:1], exec, s[2:3]
; mask branch BB5_264
s_cbranch_execz BB5_264
BB5_261:
v_add_i32_e32 v0, vcc, v4, v1
v_mov_b32_e32 v1, 0
v_lshl_b64 v[0:1], v[0:1], 2
v_add_i32_e32 v4, vcc, s8, v0
v_mov_b32_e32 v2, s9
s_mov_b32 s11, 0xf000
s_mov_b32 s10, 0
v_addc_u32_e32 v5, vcc, v1, v2, vcc
buffer_load_dword v1, v[0:1], s[8:11], 0 addr64
s_mov_b64 s[8:9], 0
s_mov_b64 s[2:3], s[8:9]
s_waitcnt vmcnt(0)
BB5_262: ; =>This Inner Loop Header: Depth=1
v_mov_b32_e32 v0, -1
v_add_f32_e32 v0, v3, v1
v_mov_b32_e32 v7, v1
v_mov_b32_e32 v6, v0
buffer_atomic_cmpswap v[6:7], v[4:5], s[8:11], 0 addr64 glc
v_mov_b32_e32 v0, 0xf000
s_waitcnt vmcnt(0) expcnt(0)
v_cmp_eq_u32_e32 vcc, v6, v1
s_or_b64 s[2:3], vcc, s[2:3]
v_mov_b32_e32 v1, v6
s_andn2_b64 exec, exec, s[2:3]
s_cbranch_execnz BB5_262
; BB#263: ; %Flow
s_or_b64 exec, exec, s[2:3]
BB5_264: ; %Flow1119
s_or_b64 exec, exec, s[0:1]
s_endpgm
.Lfunc_end5:
.size nbnxn_kernel_ElecEwTwinCut_VdwLJCombGeom_F_opencl, .Lfunc_end5-nbnxn_kernel_ElecEwTwinCut_VdwLJCombGeom_F_opencl
.section .AMDGPU.csdata
; Kernel info:
; codeLenInByte = 21256
; NumSgprs: 42
; NumVgprs: 84
; FloatMode: 192
; IeeeMode: 1
; ScratchSize: 0
; LDSByteSize: 0 bytes/workgroup (compile time only)
; SGPRBlocks: 5
; VGPRBlocks: 20
; NumSGPRsForWavesPerEU: 42
; NumVGPRsForWavesPerEU: 84
; ReservedVGPRFirst: 0
; ReservedVGPRCount: 0
; COMPUTE_PGM_RSRC2:USER_SGPR: 8
; COMPUTE_PGM_RSRC2:TGID_X_EN: 1
; COMPUTE_PGM_RSRC2:TGID_Y_EN: 0
; COMPUTE_PGM_RSRC2:TGID_Z_EN: 0
; COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 1
.section .AMDGPU.config
.long 47176
.long 11272665
.long 47180
.long 2192
.long 47200
.long 0
.long 4
.long 0
.long 8
.long 0
.text
.globl nbnxn_kernel_ElecEw_VdwLJCombGeom_VF_opencl
.p2align 8
.type nbnxn_kernel_ElecEw_VdwLJCombGeom_VF_opencl,@function
.amdgpu_hsa_kernel nbnxn_kernel_ElecEw_VdwLJCombGeom_VF_opencl
nbnxn_kernel_ElecEw_VdwLJCombGeom_VF_opencl: ; @nbnxn_kernel_ElecEw_VdwLJCombGeom_VF_opencl
.amd_kernel_code_t
amd_code_version_major = 1
amd_code_version_minor = 0
amd_machine_kind = 1
amd_machine_version_major = 7
amd_machine_version_minor = 0
amd_machine_version_stepping = 1
kernel_code_entry_byte_offset = 256
kernel_code_prefetch_byte_size = 0
max_scratch_backing_memory_byte_size = 0
granulated_workitem_vgpr_count = 25
granulated_wavefront_sgpr_count = 7
priority = 0
float_mode = 192
priv = 0
enable_dx10_clamp = 1
debug_mode = 0
enable_ieee_mode = 1
enable_sgpr_private_segment_wave_byte_offset = 0
user_sgpr_count = 8
enable_sgpr_workgroup_id_x = 1
enable_sgpr_workgroup_id_y = 0
enable_sgpr_workgroup_id_z = 0
enable_sgpr_workgroup_info = 0
enable_vgpr_workitem_id = 1
enable_exception_msb = 0
granulated_lds_size = 0
enable_exception = 0
enable_sgpr_private_segment_buffer = 1
enable_sgpr_dispatch_ptr = 1
enable_sgpr_queue_ptr = 0
enable_sgpr_kernarg_segment_ptr = 1
enable_sgpr_dispatch_id = 0
enable_sgpr_flat_scratch_init = 0
enable_sgpr_private_segment_size = 0
enable_sgpr_grid_workgroup_count_x = 0
enable_sgpr_grid_workgroup_count_y = 0
enable_sgpr_grid_workgroup_count_z = 0
enable_ordered_append_gds = 0
private_element_size = 1
is_ptr64 = 1
is_dynamic_callstack = 0
is_debug_enabled = 0
is_xnack_enabled = 0
workitem_private_segment_byte_size = 0
workgroup_group_segment_byte_size = 0
gds_segment_byte_size = 0
kernarg_segment_byte_size = 232
workgroup_fbarrier_count = 0
wavefront_sgpr_count = 58
workitem_vgpr_count = 104
reserved_vgpr_first = 0
reserved_vgpr_count = 0
reserved_sgpr_first = 0
reserved_sgpr_count = 0
debug_wavefront_private_segment_offset_sgpr = 0
debug_private_segment_buffer_sgpr = 0
kernarg_segment_alignment = 4
group_segment_alignment = 4
private_segment_alignment = 4
wavefront_size = 6
call_convention = -1
runtime_loader_kernel_symbol = 0
.end_amd_kernel_code_t
; BB#0:
s_load_dwordx2 s[20:21], s[6:7], 0x2c
s_mov_b32 s9, 0
s_lshl_b64 s[0:1], s[8:9], 4
v_mov_b32_e32 v4, s1
v_mov_b32_e32 v3, s0
s_mov_b32 s23, 0xf000
s_mov_b32 s22, s9
s_waitcnt lgkmcnt(0)
buffer_load_dwordx4 v[36:39], v[3:4], s[20:23], 0 addr64
v_mov_b32_e32 v2, v0
s_load_dwordx2 s[0:1], s[6:7], 0x24
s_load_dwordx2 s[32:33], s[6:7], 0x18
s_mov_b64 s[34:35], s[22:23]
s_mov_b64 s[2:3], s[22:23]
s_load_dword s14, s[6:7], 0x33
s_load_dwordx2 s[36:37], s[6:7], 0x22
s_mov_b32 m0, -1
s_mov_b64 s[38:39], s[22:23]
s_load_dword s18, s[6:7], 0x5
s_waitcnt vmcnt(0)
v_lshlrev_b32_e32 v41, 3, v36
v_mul_lo_i32 v4, v37, 3
v_add_i32_e32 v0, vcc, v1, v41
v_lshlrev_b32_e32 v0, 3, v0
v_add_i32_e32 v9, vcc, v2, v0
v_ashrrev_i32_e32 v10, 31, v9
v_ashrrev_i32_e32 v5, 31, v4
v_lshl_b64 v[11:12], v[4:5], 2
v_lshl_b64 v[6:7], v[9:10], 4
s_waitcnt lgkmcnt(0)
buffer_load_dwordx4 v[5:8], v[6:7], s[32:35], 0 addr64
buffer_load_dwordx2 v[13:14], v[11:12], s[0:3], 0 addr64
buffer_load_dword v0, v[11:12], s[0:3], 0 addr64 offset:8
s_load_dword s2, s[6:7], 0x2
v_lshlrev_b32_e32 v11, 3, v1
v_add_i32_e32 v40, vcc, v2, v11
s_load_dword s0, s[4:5], 0x1
s_add_i32 s4, s14, 0x420
s_waitcnt lgkmcnt(0)
s_and_b32 s0, s0, 0xffff
s_waitcnt vmcnt(1)
v_add_f32_e32 v15, v6, v14
v_add_f32_e32 v14, v5, v13
s_waitcnt vmcnt(0)
v_add_f32_e32 v5, v7, v0
v_lshlrev_b32_e32 v0, 4, v40
v_add_i32_e32 v3, vcc, s14, v0
v_mul_f32_e32 v6, s2, v8
ds_write2_b64 v3, v[14:15], v[5:6] offset1:1
s_waitcnt lgkmcnt(0)
v_lshl_b64 v[5:6], v[9:10], 3
buffer_load_dwordx2 v[5:6], v[5:6], s[36:39], 0 addr64
v_mad_u32_u24 v0, s0, v1, v2
v_lshlrev_b32_e32 v7, 3, v40
v_add_i32_e32 v7, vcc, s4, v7
v_or_b32_e32 v3, 32, v0
v_lshrrev_b32_e32 v46, 5, v0
v_cmp_eq_u32_e32 vcc, 32, v3
s_waitcnt vmcnt(0)
ds_write_b64 v7, v[5:6]
s_and_saveexec_b64 s[0:1], vcc
s_xor_b64 s[0:1], exec, s[0:1]
s_waitcnt lgkmcnt(0)
; mask branch BB6_2
BB6_1:
v_lshlrev_b32_e32 v3, 2, v46
v_add_i32_e32 v3, vcc, s14, v3
v_mov_b32_e32 v5, 0
s_mov_b32 m0, -1
ds_write_b32 v3, v5 offset:2336
s_waitcnt lgkmcnt(0)
BB6_2:
s_or_b64 exec, exec, s[0:1]
s_barrier
s_load_dwordx2 s[40:41], s[6:7], 0x2e
v_cmp_ne_u32_e32 vcc, 22, v37
s_and_b64 vcc, exec, vcc
v_mov_b32_e32 v3, 0
v_cmp_eq_u32_e64 s[0:1], 22, v37
s_waitcnt lgkmcnt(0)
s_mov_b64 vcc, vcc
s_cbranch_vccnz BB6_5
; BB#3:
v_ashrrev_i32_e32 v6, 31, v38
v_mov_b32_e32 v5, v38
s_mov_b32 s43, 0xf000
s_mov_b32 s42, 0
v_lshl_b64 v[5:6], v[5:6], 5
buffer_load_dword v5, v[5:6], s[40:43], 0 addr64
s_waitcnt vmcnt(0)
v_cmp_ne_u32_e32 vcc, v5, v41
s_and_b64 vcc, exec, vcc
v_mov_b32_e32 v5, v3
s_cbranch_vccnz BB6_6
; BB#4: ; %.preheader549.preheader
v_lshlrev_b32_e32 v5, 4, v2
v_add_i32_e32 v9, vcc, s14, v5
s_mov_b32 m0, -1
ds_read2_b64 v[5:8], v9 offset0:1 offset1:17
s_waitcnt lgkmcnt(0)
v_mov_b32_e32 v5, 0x41000000
ds_read2_b64 v[12:15], v9 offset0:33 offset1:49
v_mul_f32_e32 v5, s2, v5
ds_read2_b64 v[16:19], v9 offset0:65 offset1:81
v_mul_f32_e32 v10, v6, v6
v_mov_b32_e32 v6, 0x6f800000
v_cmp_lt_f32_e64 vcc, v6, |v5|
v_mov_b32_e32 v6, 0x2f800000
s_waitcnt lgkmcnt(0)
v_cndmask_b32_e32 v12, 1.0, v6, vcc
v_mac_f32_e32 v10, v8, v8
v_mul_f32_e32 v5, v12, v5
v_mac_f32_e32 v10, v13, v13
v_rcp_f32_e32 v13, v5
ds_read2_b64 v[5:8], v9 offset0:97 offset1:113
v_mac_f32_e32 v10, v15, v15
v_mac_f32_e32 v10, v17, v17
v_mac_f32_e32 v10, v19, v19
s_waitcnt lgkmcnt(0)
v_mac_f32_e32 v10, v6, v6
v_mac_f32_e32 v10, v8, v8
v_mul_f32_e32 v5, v13, v10
v_mov_b32_e32 v6, 0xbf106ebb
v_mul_f32_e32 v5, v5, v12
v_mul_f32_e32 v6, s18, v6
v_mul_f32_e32 v5, v5, v6
s_branch BB6_6
BB6_5:
v_mov_b32_e32 v5, v3
BB6_6: ; %.preheader548
s_load_dwordx2 s[28:29], s[6:7], 0x1a
v_cmp_lt_i32_e32 vcc, v38, v39
v_mov_b32_e32 v17, -1
s_and_b64 vcc, exec, vcc
s_waitcnt lgkmcnt(0)
s_mov_b64 vcc, vcc
s_cbranch_vccnz BB6_8
; BB#7: ; %.preheader548.._crit_edge_crit_edge
v_mov_b32_e32 v8, 0
v_lshlrev_b32_e32 v6, 2, v0
v_mov_b32_e32 v9, v8
v_mov_b32_e32 v10, v8
v_add_i32_e32 v7, vcc, s14, v6
v_mov_b32_e32 v51, v11
v_mov_b32_e32 v16, v11
v_add_i32_e32 v6, vcc, 0x620, v7
v_add_i32_e32 v12, vcc, 0x820, v7
v_add_i32_e32 v7, vcc, 0x720, v7
v_mov_b32_e32 v17, 0
v_mov_b32_e32 v50, v10
v_mov_b32_e32 v49, v9
v_mov_b32_e32 v48, v8
v_mov_b32_e32 v15, v10
v_mov_b32_e32 v14, v9
v_mov_b32_e32 v13, v8
s_branch BB6_9
BB6_8:
; implicit-def: %VGPR8
; implicit-def: %VGPR48_VGPR49_VGPR50_VGPR51
; implicit-def: %VGPR6
; implicit-def: %VGPR12
; implicit-def: %VGPR13_VGPR14_VGPR15_VGPR16
; implicit-def: %VGPR7
BB6_9: ; %Flow1256
s_load_dwordx2 s[24:25], s[6:7], 0x20
s_load_dwordx2 s[20:21], s[6:7], 0x1c
s_load_dwordx2 s[16:17], s[6:7], 0x1e
v_cmp_ne_u32_e32 vcc, 0, v17
v_cndmask_b32_e64 v9, 0, 1, vcc
v_mov_b32_e32 v42, v48
v_mov_b32_e32 v32, v48
v_mov_b32_e32 v28, v48
v_mov_b32_e32 v24, v48
v_mov_b32_e32 v20, v48
v_mov_b32_e32 v16, v48
v_cmp_ne_u32_e32 vcc, 1, v9
s_movk_i32 s5, 0x620
s_add_i32 s15, s14, s5
s_and_b64 vcc, exec, vcc
v_mov_b32_e32 v43, v49
v_mov_b32_e32 v44, v50
v_mov_b32_e32 v45, v51
v_mov_b32_e32 v33, v49
v_mov_b32_e32 v34, v50
v_mov_b32_e32 v35, v51
v_mov_b32_e32 v29, v49
v_mov_b32_e32 v30, v50
v_mov_b32_e32 v31, v51
v_mov_b32_e32 v25, v49
v_mov_b32_e32 v26, v50
v_mov_b32_e32 v27, v51
v_mov_b32_e32 v21, v49
v_mov_b32_e32 v22, v50
v_mov_b32_e32 v23, v51
v_mov_b32_e32 v17, v49
v_mov_b32_e32 v18, v50
v_mov_b32_e32 v19, v51
s_waitcnt lgkmcnt(0)
s_mov_b64 vcc, vcc
s_cbranch_vccnz BB6_176
; BB#10: ; %.lr.ph
v_or_b32_e32 v6, 4, v1
v_mov_b32_e32 v13, 0
v_cmp_eq_u32_e32 vcc, 4, v6
v_cmp_gt_u32_e64 s[2:3], 4, v2
s_and_b64 s[48:49], s[2:3], vcc
v_add_i32_e32 v6, vcc, v1, v2
v_and_b32_e32 v8, 4, v1
s_load_dword s19, s[6:7], 0x6
s_load_dword s22, s[6:7], 0x9
s_load_dword s23, s[6:7], 0xf
s_load_dword s26, s[6:7], 0x12
s_load_dwordx2 s[42:43], s[6:7], 0x30
v_mov_b32_e32 v14, v13
v_mov_b32_e32 v15, v13
v_mov_b32_e32 v19, v16
s_add_i32 s8, s14, 0x400
v_lshlrev_b32_e32 v6, 2, v6
v_lshlrev_b32_e32 v8, 2, v8
v_add_i32_e32 v10, vcc, s8, v6
v_lshlrev_b32_e32 v6, 2, v0
v_add_i32_e32 v54, vcc, s8, v8
v_lshlrev_b32_e32 v8, 4, v2
v_mov_b32_e32 v18, v15
v_mov_b32_e32 v17, v14
v_mov_b32_e32 v16, v13
v_add_i32_e32 v12, vcc, s14, v6
v_mov_b32_e32 v23, v16
v_mov_b32_e32 v27, v16
v_mov_b32_e32 v31, v16
v_mov_b32_e32 v35, v16
v_mov_b32_e32 v45, v16
v_mov_b32_e32 v51, v16
v_mul_f32_e64 v9, s18, s18
v_mov_b32_e32 v47, 0
v_add_i32_e32 v55, vcc, s14, v8
v_lshlrev_b32_e32 v8, 3, v2
v_add_i32_e32 v56, vcc, s4, v8
s_mov_b32 s46, 0
v_and_b32_e32 v52, 31, v0
v_mov_b32_e32 v53, v47
v_cmp_gt_u32_e64 s[2:3], v1, v2
v_mul_f32_e32 v37, s18, v9
v_add_i32_e32 v6, vcc, s5, v12
v_add_i32_e32 v7, vcc, 0x720, v12
v_add_i32_e32 v12, vcc, 0x820, v12
s_mov_b32 s47, 0xf000
s_mov_b64 s[44:45], 0
s_brev_b32 s27, -2
s_mov_b32 s50, 0x7ffff000
s_brev_b32 s51, 1
v_ashrrev_i32_e32 v58, 31, v38
v_mov_b32_e32 v57, v38
v_or_b32_e32 v38, 7, v41
v_or_b32_e32 v59, 6, v41
v_or_b32_e32 v60, 5, v41
v_or_b32_e32 v61, 4, v41
v_or_b32_e32 v62, 3, v41
v_or_b32_e32 v63, 2, v41
v_or_b32_e32 v64, 1, v41
v_mov_b32_e32 v22, v15
v_mov_b32_e32 v21, v14
v_mov_b32_e32 v20, v13
v_mov_b32_e32 v26, v15
v_mov_b32_e32 v25, v14
v_mov_b32_e32 v24, v13
v_mov_b32_e32 v30, v15
v_mov_b32_e32 v29, v14
v_mov_b32_e32 v28, v13
v_mov_b32_e32 v34, v15
v_mov_b32_e32 v33, v14
v_mov_b32_e32 v32, v13
v_mov_b32_e32 v44, v15
v_mov_b32_e32 v43, v14
v_mov_b32_e32 v42, v13
v_mov_b32_e32 v50, v15
v_mov_b32_e32 v49, v14
v_mov_b32_e32 v48, v13
v_mov_b32_e32 v8, v13
s_waitcnt lgkmcnt(0)
BB6_11: ; =>This Loop Header: Depth=1
; Child Loop BB6_51 Depth 2
; Child Loop BB6_91 Depth 2
; Child Loop BB6_131 Depth 2
; Child Loop BB6_171 Depth 2
v_lshl_b64 v[65:66], v[57:58], 5
v_add_i32_e32 v67, vcc, s40, v65
v_mov_b32_e32 v19, s41
v_addc_u32_e32 v66, vcc, v66, v19, vcc
v_lshl_b64 v[68:69], v[46:47], 3
v_add_i32_e32 v67, vcc, v67, v68
v_addc_u32_e32 v68, vcc, v66, v69, vcc
buffer_load_dwordx2 v[65:66], v[67:68], s[44:47], 0 addr64 offset:16
s_waitcnt vmcnt(0)
v_cmp_ne_u32_e32 vcc, 0, v65
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[52:53], exec, s[4:5]
; mask branch BB6_175
s_cbranch_execz BB6_175
BB6_12: ; in Loop: Header=BB6_11 Depth=1
v_ashrrev_i32_e32 v68, 31, v66
v_mov_b32_e32 v67, v66
v_lshl_b64 v[66:67], v[67:68], 7
v_add_i32_e32 v68, vcc, s42, v66
v_mov_b32_e32 v19, s43
v_addc_u32_e32 v67, vcc, v67, v19, vcc
v_lshl_b64 v[69:70], v[52:53], 2
v_add_i32_e32 v68, vcc, v68, v69
v_addc_u32_e32 v69, vcc, v67, v70, vcc
buffer_load_dword v19, v[68:69], s[44:47], 0 addr64
s_and_saveexec_b64 s[4:5], s[48:49]
s_xor_b64 s[4:5], exec, s[4:5]
s_waitcnt vmcnt(0)
; mask branch BB6_14
s_cbranch_execz BB6_14
BB6_13: ; in Loop: Header=BB6_11 Depth=1
v_lshl_b64 v[66:67], v[57:58], 5
v_add_i32_e32 v68, vcc, s40, v66
v_mov_b32_e32 v23, s41
v_addc_u32_e32 v67, vcc, v67, v23, vcc
v_lshl_b64 v[69:70], v[2:3], 2
v_add_i32_e32 v68, vcc, v68, v69
v_addc_u32_e32 v69, vcc, v67, v70, vcc
buffer_load_dword v23, v[68:69], s[44:47], 0 addr64
s_mov_b32 m0, -1
s_waitcnt vmcnt(0)
ds_write_b32 v10, v23
s_waitcnt lgkmcnt(0)
BB6_14: ; %.preheader.preheader
; in Loop: Header=BB6_11 Depth=1
s_or_b64 exec, exec, s[4:5]
v_and_b32_e32 v23, 0xff, v65
v_cmp_ne_u32_e32 vcc, 0, v23
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[54:55], exec, s[4:5]
; mask branch BB6_54
s_cbranch_execz BB6_54
BB6_15: ; in Loop: Header=BB6_11 Depth=1
s_mov_b32 m0, -1
ds_read_b32 v23, v54
s_mov_b64 s[34:35], s[46:47]
s_mov_b64 s[38:39], s[46:47]
s_waitcnt lgkmcnt(0)
v_lshlrev_b32_e32 v27, 3, v23
v_add_i32_e32 v66, vcc, v27, v1
v_ashrrev_i32_e32 v67, 31, v66
v_lshl_b64 v[68:69], v[66:67], 4
v_lshl_b64 v[70:71], v[66:67], 3
buffer_load_dwordx4 v[75:78], v[68:69], s[32:35], 0 addr64
buffer_load_dwordx2 v[67:68], v[70:71], s[36:39], 0 addr64
v_mov_b32_e32 v69, 0
v_and_b32_e32 v27, 1, v65
v_cmp_eq_u32_e32 vcc, 1, v27
v_mov_b32_e32 v71, v69
v_mov_b32_e32 v74, v69
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[30:31], exec, s[4:5]
s_waitcnt vmcnt(0)
; mask branch BB6_19
s_cbranch_execz BB6_19
BB6_16: ; in Loop: Header=BB6_11 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[79:82], v55 offset1:1
s_and_b64 s[4:5], exec, s[0:1]
v_cmp_ne_u32_e32 vcc, v41, v23
s_xor_b64 s[4:5], s[4:5], -1
s_or_b64 s[4:5], s[4:5], vcc
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v27, v76, v80
s_and_b64 s[8:9], exec, s[2:3]
s_or_b64 s[4:5], s[8:9], s[4:5]
v_subrev_f32_e32 v31, v75, v79
v_mul_f32_e32 v45, v27, v27
v_cndmask_b32_e64 v51, 0, 1.0, s[4:5]
v_subrev_f32_e32 v35, v77, v81
v_mac_f32_e32 v45, v31, v31
v_mov_b32_e32 v69, 0
v_mac_f32_e32 v45, v35, v35
v_mul_f32_e32 v51, s22, v51
v_cmp_lt_f32_e32 vcc, v45, v51
v_mov_b32_e32 v71, v69
v_mov_b32_e32 v74, v69
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[34:35], exec, s[4:5]
; mask branch BB6_18
s_cbranch_execz BB6_18
BB6_17: ; in Loop: Header=BB6_11 Depth=1
v_max_f32_e32 v45, 0x34cd15ae, v45
v_mul_f32_e32 v69, v9, v45
v_mul_f32_e32 v70, v69, v69
v_mov_b32_e32 v71, 0x3a92b707
v_madak_f32_e32 v71, v71, v70, 0x3ded3cb2
v_mov_b32_e32 v72, 0x3c739487
v_madak_f32_e32 v72, v72, v70, 0x3f01e2bc
v_mad_f32 v71, v71, v70, 1.0
v_mac_f32_e32 v71, v69, v72
v_mov_b32_e32 v72, 0xb2951928
v_madak_f32_e32 v72, v72, v70, 0xb85ffb93
v_mov_b32_e32 v73, 0x35c55945
v_madak_f32_e32 v73, v73, v70, 0x3a83ca0c
v_madak_f32_e32 v72, v72, v70, 0xbc9ded90
v_madak_f32_e32 v73, v73, v70, 0x3d8eaf3b
v_madak_f32_e32 v72, v72, v70, 0xbf409397
v_mac_f32_e32 v72, v69, v73
v_rsq_f32_e32 v73, v45
v_and_b32_e32 v69, 1, v19
v_cmp_eq_u32_e32 vcc, 1, v69
s_mov_b32 m0, -1
v_mul_f32_e32 v79, v73, v73
v_cndmask_b32_e64 v74, 0, 1.0, vcc
v_mul_f32_e32 v69, v79, v79
v_mul_f32_e32 v80, v74, v69
ds_read_b64 v[69:70], v56
v_mul_f32_e32 v81, v79, v80
v_mad_f32 v80, v80, v79, s23
v_mul_f32_e32 v51, v78, v82
v_mad_f32 v83, v81, v81, s26
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v70, v68, v70
v_mul_f32_e32 v82, v81, v70
v_mad_f32 v82, v69, v67, -v82
v_mul_f32_e32 v69, v67, v69
v_mul_f32_e32 v80, 0xbe2aaaab, v80
v_mul_f32_e32 v70, v83, v70
v_mul_f32_e32 v69, v69, v80
v_mac_f32_e32 v69, 0x3daaaaaa, v70
v_mac_f32_e32 v8, v74, v69
v_rcp_f32_e32 v69, v71
v_mul_f32_e32 v45, s18, v45
v_mul_f32_e32 v70, v74, v79
v_mul_f32_e32 v45, v73, v45
v_mul_f32_e32 v69, v37, v69
v_mul_f32_e32 v69, v72, v69
v_mac_f32_e32 v69, v73, v70
v_and_b32_e32 v70, s27, v45
v_mov_b32_e32 v71, 0x3fa00000
v_cmp_gt_f32_e64 s[4:5], v71, v70
v_mul_f32_e32 v71, v70, v70
v_rcp_f32_e32 v72, v71
v_add_f32_e32 v80, -1.0, v70
v_mov_b32_e32 v83, 0xbd777f97
v_mov_b32_e32 v84, 0xbf100000
v_cndmask_b32_e64 v72, v72, v80, s[4:5]
v_mov_b32_e32 v80, 0x3f580000
v_cmp_gt_f32_e64 s[8:9], v80, v70
v_cndmask_b32_e64 v71, v72, v71, s[8:9]
v_mov_b32_e32 v80, 0xc11d077e
v_mov_b32_e32 v72, 0x4036db6e
v_madak_f32_e32 v80, v80, v71, 0xc2a2932b
v_cmp_gt_f32_e32 vcc, v72, v70
v_mov_b32_e32 v72, 0xc3f1c275
v_madak_f32_e32 v72, v72, v71, 0xc480230b
v_madak_f32_e32 v80, v71, v80, 0xc3389ae7
v_madak_f32_e32 v72, v71, v72, 0xc41f6441
v_madak_f32_e32 v80, v71, v80, 0xc322658c
v_madak_f32_e32 v72, v71, v72, 0xc320a2ea
v_madak_f32_e32 v80, v71, v80, 0xc2798057
v_madak_f32_e32 v72, v71, v72, 0xc18e104b
v_madak_f32_e32 v80, v71, v80, 0xc128f022
v_madak_f32_e32 v72, v71, v72, 0xbf4c9dd4
v_madak_f32_e32 v80, v71, v80, 0xbf31a0b7
v_madak_f32_e32 v72, v71, v72, 0xbc21a092
v_madak_f32_e32 v80, v71, v80, 0xbc21a093
v_madak_f32_e32 v83, v83, v71, 0x40d23f7c
v_cndmask_b32_e32 v72, v72, v80, vcc
v_mov_b32_e32 v80, 0xc1b38712
v_madak_f32_e32 v80, v80, v71, 0x43ed43a7
v_madak_f32_e32 v83, v71, v83, 0x42d9451f
v_madak_f32_e32 v80, v71, v80, 0x451f90ce
v_madak_f32_e32 v83, v71, v83, 0x43d6810b
v_madak_f32_e32 v80, v71, v80, 0x4547fdbb
v_madak_f32_e32 v83, v71, v83, 0x442158c9
v_madak_f32_e32 v80, v71, v80, 0x44c01759
v_madak_f32_e32 v83, v71, v83, 0x43d9486f
v_madak_f32_e32 v80, v71, v80, 0x43a2e571
v_madak_f32_e32 v83, v71, v83, 0x4309a863
v_madak_f32_e32 v80, v71, v80, 0x41f2b459
v_madak_f32_e32 v83, v71, v83, 0x419d35ce
v_cndmask_b32_e32 v80, v80, v83, vcc
v_mov_b32_e32 v83, 0xbb0df9c0
v_madak_f32_e32 v83, v83, v71, 0x3d1151b3
v_madak_f32_e32 v83, v71, v83, 0xbde31cc2
v_madak_f32_e32 v83, v71, v83, 0x3ea2fe54
v_madak_f32_e32 v83, v71, v83, 0xbebe9208
v_madak_f32_e32 v83, v71, v83, 0x3ed46805
v_madak_f32_e32 v83, v71, v83, 0xbb1acdc6
v_cndmask_b32_e64 v72, v72, v83, s[4:5]
v_mov_b32_e32 v83, 0x3c445aa3
v_madak_f32_e32 v83, v83, v71, 0x3c5f6e13
v_madak_f32_e32 v83, v71, v83, 0x3e013307
v_madak_f32_e32 v83, v71, v83, 0x3d931ae7
v_madak_f32_e32 v83, v71, v83, 0x3f0a5785
v_madak_f32_e32 v83, v71, v83, 0x3dd9f331
v_cndmask_b32_e64 v80, v80, v83, s[4:5]
v_mov_b32_e32 v83, 0xb684e21a
v_madak_f32_e32 v83, v83, v71, 0x390aee49
v_madak_f32_e32 v83, v71, v83, 0x3ba68116
v_madak_f32_e32 v83, v71, v83, 0x3d852a63
v_madak_f32_e32 v83, v71, v83, 0x3ecbbbce
v_cndmask_b32_e64 v80, v80, v83, s[8:9]
v_mov_b32_e32 v83, 0xb7c756b1
v_madak_f32_e32 v83, v83, v71, 0xbbbd1489
v_madak_f32_e32 v83, v71, v83, 0xbce9528f
v_madak_f32_e32 v83, v71, v83, 0xbea66beb
v_madak_f32_e32 v83, v71, v83, 0x3e0375d4
v_mad_f32 v71, v71, v80, 1.0
v_and_b32_e32 v80, s50, v45
v_mad_f32 v84, v80, -v80, v84
v_cmp_gt_f32_e32 vcc, 0, v84
v_cndmask_b32_e64 v85, 0.5, -0.5, vcc
v_mov_b32_e32 v86, 0x3fb8aa3b
v_mac_f32_e32 v85, v86, v84
v_cvt_i32_f32_e32 v85, v85
v_mov_b32_e32 v91, 0xbf317180
v_mov_b32_e32 v93, 0xb717f7d1
v_mov_b32_e32 v96, 0xb5ddea0e
v_cvt_f32_i32_e32 v90, v85
v_mov_b32_e32 v97, 0x3331bb4c
v_mov_b32_e32 v99, 0x388ab355
v_mov_b32_e32 v100, 0xbb360b61
v_mad_f32 v92, v91, v90, v84
v_mad_f32 v94, v93, v90, v92
v_mul_f32_e32 v95, v94, v94
v_mad_f32 v98, v97, v95, v96
v_mad_f32 v98, v98, v95, v99
v_mad_f32 v98, v98, v95, v100
v_mov_b32_e32 v101, 0x3e2aaaab
v_mad_f32 v98, v98, v95, v101
v_mad_f32 v95, -v95, v98, v94
v_mov_b32_e32 v87, 0x6f800000
v_cmp_gt_f32_e64 vcc, |v71|, v87
v_mov_b32_e32 v88, 0x2f800000
v_sub_f32_e32 v98, 2.0, v95
v_cndmask_b32_e32 v89, 1.0, v88, vcc
v_cmp_gt_f32_e64 vcc, |v98|, v87
v_mul_f32_e32 v71, v89, v71
v_cndmask_b32_e32 v102, 1.0, v88, vcc
v_mul_f32_e32 v98, v102, v98
v_rcp_f32_e32 v71, v71
v_rcp_f32_e32 v98, v98
v_cndmask_b32_e64 v72, v72, v83, s[8:9]
v_lshlrev_b32_e32 v83, 23, v85
v_mul_f32_e32 v71, v71, v72
v_mul_f32_e32 v72, v95, v94
v_mul_f32_e32 v72, v98, v72
v_mul_f32_e32 v72, v72, v102
v_mad_f32 v72, -v90, v93, -v72
v_subrev_f32_e32 v72, v92, v72
v_sub_f32_e32 v72, 1.0, v72
v_add_i32_e32 v72, vcc, v72, v83
v_subrev_f32_e32 v83, v70, v80
v_mul_f32_e32 v85, v71, v89
v_add_f32_e32 v80, v70, v80
v_mad_f32 v80, v80, v83, v85
v_cmp_gt_f32_e32 vcc, 0, v80
v_cndmask_b32_e64 v83, 0.5, -0.5, vcc
v_mac_f32_e32 v83, v86, v80
v_cvt_i32_f32_e32 v83, v83
v_madak_f32_e32 v71, v89, v71, 0x3f58560b
v_cvt_f32_i32_e32 v86, v83
v_lshlrev_b32_e32 v83, 23, v83
v_mad_f32 v90, v91, v86, v80
v_mad_f32 v91, v93, v86, v90
v_mul_f32_e32 v92, v91, v91
v_mac_f32_e32 v96, v97, v92
v_mac_f32_e32 v99, v96, v92
v_mac_f32_e32 v100, v99, v92
v_mac_f32_e32 v101, v100, v92
v_mad_f32 v92, -v92, v101, v91
v_mul_f32_e32 v91, v92, v91
v_sub_f32_e32 v92, 2.0, v92
v_cmp_gt_f32_e64 vcc, |v92|, v87
v_cndmask_b32_e32 v94, 1.0, v88, vcc
v_mul_f32_e32 v92, v94, v92
v_rcp_f32_e32 v92, v92
v_mul_f32_e32 v91, v92, v91
v_mul_f32_e32 v91, v91, v94
v_mad_f32 v86, -v86, v93, -v91
v_mov_b32_e32 v91, 0xc2aeac4f
v_cmp_ge_f32_e32 vcc, v84, v91
v_mov_b32_e32 v92, 0x42b17218
v_cndmask_b32_e32 v72, 0, v72, vcc
v_cmp_lt_f32_e32 vcc, v84, v92
v_mov_b32_e32 v93, 0x7f800000
v_cndmask_b32_e32 v72, v93, v72, vcc
v_cmp_u_f32_e32 vcc, v84, v84
v_cndmask_b32_e32 v72, v72, v84, vcc
v_subrev_f32_e32 v84, v90, v86
v_sub_f32_e32 v84, 1.0, v84
v_add_i32_e32 v83, vcc, v84, v83
v_cmp_ge_f32_e32 vcc, v80, v91
v_cndmask_b32_e32 v83, 0, v83, vcc
v_cmp_lt_f32_e64 s[10:11], v80, v92
v_cndmask_b32_e64 v83, v93, v83, s[10:11]
v_cmp_u_f32_e32 vcc, v80, v80
v_cndmask_b32_e32 v80, v83, v80, vcc
v_mul_f32_e32 v72, v80, v72
v_mov_b32_e32 v80, 0x40c00000
v_cmp_gt_f32_e64 s[10:11], v80, v70
v_mov_b32_e32 v80, 0x31800000
v_cmp_gt_f32_e64 vcc, |v70|, v87
v_cmp_gt_f32_e64 s[12:13], v80, v70
v_cndmask_b32_e32 v80, 1.0, v88, vcc
v_mul_f32_e32 v70, v80, v70
v_rcp_f32_e32 v70, v70
v_cmp_u_f32_e32 vcc, v45, v45
v_mul_f32_e32 v70, v70, v72
v_mad_f32 v70, -v80, v70, 1.0
v_cndmask_b32_e64 v70, 1.0, v70, s[10:11]
v_cndmask_b32_e64 v70, v70, v71, s[4:5]
v_and_b32_e32 v71, s51, v45
v_or_b32_e32 v70, v71, v70
v_mad_f32 v71, v85, v45, v45
v_cndmask_b32_e64 v70, v70, v71, s[8:9]
v_mul_f32_e32 v71, 0x3f8375d4, v45
v_mac_f32_e32 v71, 0x41000000, v45
v_mul_f32_e32 v71, 0x3e000000, v71
v_cndmask_b32_e64 v70, v70, v71, s[12:13]
v_cndmask_b32_e32 v45, v70, v45, vcc
v_subrev_f32_e32 v45, v45, v74
v_mul_f32_e32 v70, s19, v74
v_mad_f32 v45, v73, v45, -v70
v_mul_f32_e32 v70, v79, v81
v_mac_f32_e32 v5, v45, v51
v_mul_f32_e32 v45, v82, v70
v_mad_f32 v45, v51, v69, -v45
v_mul_f32_e32 v51, v69, v51
v_mad_f32 v51, v70, v82, -v51
v_mad_f32 v50, v51, -v35, v50
v_mad_f32 v49, v51, -v27, v49
v_mad_f32 v48, v51, -v31, v48
v_mul_f32_e64 v74, v45, -v35
v_mul_f32_e64 v71, v45, -v27
v_mul_f32_e64 v69, v45, -v31
BB6_18: ; %Flow1253
; in Loop: Header=BB6_11 Depth=1
s_or_b64 exec, exec, s[34:35]
BB6_19: ; in Loop: Header=BB6_11 Depth=1
s_or_b64 exec, exec, s[30:31]
v_lshrrev_b32_e32 v27, 1, v65
v_and_b32_e32 v27, 1, v27
v_cmp_eq_u32_e32 vcc, 1, v27
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[30:31], exec, s[4:5]
; mask branch BB6_23
s_cbranch_execz BB6_23
BB6_20: ; in Loop: Header=BB6_11 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[79:82], v55 offset0:16 offset1:17
s_and_b64 s[4:5], exec, s[0:1]
v_cmp_ne_u32_e32 vcc, v64, v23
s_xor_b64 s[4:5], s[4:5], -1
s_or_b64 s[4:5], s[4:5], vcc
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v31, v76, v80
s_and_b64 s[8:9], exec, s[2:3]
s_or_b64 s[4:5], s[8:9], s[4:5]
v_subrev_f32_e32 v27, v75, v79
v_mul_f32_e32 v45, v31, v31
v_cndmask_b32_e64 v51, 0, 1.0, s[4:5]
v_subrev_f32_e32 v35, v77, v81
v_mac_f32_e32 v45, v27, v27
v_mac_f32_e32 v45, v35, v35
v_mul_f32_e32 v51, s22, v51
v_cmp_lt_f32_e32 vcc, v45, v51
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[34:35], exec, s[4:5]
; mask branch BB6_22
s_cbranch_execz BB6_22
BB6_21: ; in Loop: Header=BB6_11 Depth=1
v_max_f32_e32 v45, 0x34cd15ae, v45
v_mul_f32_e32 v80, v9, v45
v_mul_f32_e32 v72, v80, v80
v_mov_b32_e32 v73, 0x3a92b707
v_mov_b32_e32 v81, 0x3c739487
v_madak_f32_e32 v73, v73, v72, 0x3ded3cb2
v_mul_f32_e32 v51, v78, v82
v_mad_f32 v82, v73, v72, 1.0
v_mov_b32_e32 v73, 0xb2951928
v_madak_f32_e32 v81, v81, v72, 0x3f01e2bc
v_madak_f32_e32 v73, v73, v72, 0xb85ffb93
v_rsq_f32_e32 v79, v45
v_mac_f32_e32 v82, v80, v81
v_mov_b32_e32 v81, 0x35c55945
v_madak_f32_e32 v81, v81, v72, 0x3a83ca0c
v_madak_f32_e32 v73, v73, v72, 0xbc9ded90
s_mov_b32 m0, -1
v_lshrrev_b32_e32 v70, 1, v19
v_madak_f32_e32 v81, v81, v72, 0x3d8eaf3b
v_madak_f32_e32 v83, v73, v72, 0xbf409397
ds_read_b64 v[72:73], v56 offset:64
v_and_b32_e32 v70, 1, v70
v_cmp_eq_u32_e32 vcc, 1, v70
v_mac_f32_e32 v83, v80, v81
v_mul_f32_e32 v80, v79, v79
v_cndmask_b32_e64 v70, 0, 1.0, vcc
v_mul_f32_e32 v81, v80, v80
v_mul_f32_e32 v81, v70, v81
v_mul_f32_e32 v84, v80, v81
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v73, v68, v73
v_mul_f32_e32 v85, v84, v73
v_mad_f32 v81, v81, v80, s23
v_mad_f32 v85, v72, v67, -v85
v_mad_f32 v86, v84, v84, s26
v_mul_f32_e32 v72, v67, v72
v_mul_f32_e32 v81, 0xbe2aaaab, v81
v_mul_f32_e32 v73, v86, v73
v_mul_f32_e32 v72, v72, v81
v_mac_f32_e32 v72, 0x3daaaaaa, v73
v_rcp_f32_e32 v73, v82
v_mul_f32_e32 v45, s18, v45
v_mac_f32_e32 v8, v70, v72
v_mul_f32_e32 v45, v79, v45
v_mul_f32_e32 v73, v37, v73
v_mul_f32_e32 v72, v70, v80
v_mul_f32_e32 v73, v83, v73
v_mac_f32_e32 v73, v79, v72
v_and_b32_e32 v72, s27, v45
v_mov_b32_e32 v81, 0x3fa00000
v_cmp_gt_f32_e64 s[4:5], v81, v72
v_mul_f32_e32 v81, v72, v72
v_rcp_f32_e32 v82, v81
v_add_f32_e32 v83, -1.0, v72
v_mov_b32_e32 v86, 0xbd777f97
v_mov_b32_e32 v87, 0x6f800000
v_cndmask_b32_e64 v82, v82, v83, s[4:5]
v_mov_b32_e32 v83, 0x3f580000
v_cmp_gt_f32_e64 s[8:9], v83, v72
v_cndmask_b32_e64 v81, v82, v81, s[8:9]
v_mov_b32_e32 v83, 0xc11d077e
v_mov_b32_e32 v82, 0x4036db6e
v_madak_f32_e32 v83, v83, v81, 0xc2a2932b
v_cmp_gt_f32_e32 vcc, v82, v72
v_mov_b32_e32 v82, 0xc3f1c275
v_madak_f32_e32 v82, v82, v81, 0xc480230b
v_madak_f32_e32 v83, v81, v83, 0xc3389ae7
v_madak_f32_e32 v82, v81, v82, 0xc41f6441
v_madak_f32_e32 v83, v81, v83, 0xc322658c
v_madak_f32_e32 v82, v81, v82, 0xc320a2ea
v_madak_f32_e32 v83, v81, v83, 0xc2798057
v_madak_f32_e32 v82, v81, v82, 0xc18e104b
v_madak_f32_e32 v83, v81, v83, 0xc128f022
v_madak_f32_e32 v82, v81, v82, 0xbf4c9dd4
v_madak_f32_e32 v83, v81, v83, 0xbf31a0b7
v_madak_f32_e32 v82, v81, v82, 0xbc21a092
v_madak_f32_e32 v83, v81, v83, 0xbc21a093
v_madak_f32_e32 v86, v86, v81, 0x40d23f7c
v_cndmask_b32_e32 v82, v82, v83, vcc
v_mov_b32_e32 v83, 0xc1b38712
v_madak_f32_e32 v83, v83, v81, 0x43ed43a7
v_madak_f32_e32 v86, v81, v86, 0x42d9451f
v_madak_f32_e32 v83, v81, v83, 0x451f90ce
v_madak_f32_e32 v86, v81, v86, 0x43d6810b
v_madak_f32_e32 v83, v81, v83, 0x4547fdbb
v_madak_f32_e32 v86, v81, v86, 0x442158c9
v_madak_f32_e32 v83, v81, v83, 0x44c01759
v_madak_f32_e32 v86, v81, v86, 0x43d9486f
v_madak_f32_e32 v83, v81, v83, 0x43a2e571
v_madak_f32_e32 v86, v81, v86, 0x4309a863
v_madak_f32_e32 v83, v81, v83, 0x41f2b459
v_madak_f32_e32 v86, v81, v86, 0x419d35ce
v_cndmask_b32_e32 v83, v83, v86, vcc
v_mov_b32_e32 v86, 0xbb0df9c0
v_madak_f32_e32 v86, v86, v81, 0x3d1151b3
v_madak_f32_e32 v86, v81, v86, 0xbde31cc2
v_madak_f32_e32 v86, v81, v86, 0x3ea2fe54
v_madak_f32_e32 v86, v81, v86, 0xbebe9208
v_madak_f32_e32 v86, v81, v86, 0x3ed46805
v_madak_f32_e32 v86, v81, v86, 0xbb1acdc6
v_cndmask_b32_e64 v82, v82, v86, s[4:5]
v_mov_b32_e32 v86, 0x3c445aa3
v_madak_f32_e32 v86, v86, v81, 0x3c5f6e13
v_madak_f32_e32 v86, v81, v86, 0x3e013307
v_madak_f32_e32 v86, v81, v86, 0x3d931ae7
v_madak_f32_e32 v86, v81, v86, 0x3f0a5785
v_madak_f32_e32 v86, v81, v86, 0x3dd9f331
v_cndmask_b32_e64 v83, v83, v86, s[4:5]
v_mov_b32_e32 v86, 0xb684e21a
v_madak_f32_e32 v86, v86, v81, 0x390aee49
v_madak_f32_e32 v86, v81, v86, 0x3ba68116
v_madak_f32_e32 v86, v81, v86, 0x3d852a63
v_madak_f32_e32 v86, v81, v86, 0x3ecbbbce
v_cndmask_b32_e64 v83, v83, v86, s[8:9]
v_mad_f32 v83, v81, v83, 1.0
v_and_b32_e32 v90, s50, v45
v_mov_b32_e32 v91, 0xbf100000
v_cmp_gt_f32_e64 vcc, |v83|, v87
v_mov_b32_e32 v88, 0x2f800000
v_mad_f32 v91, v90, -v90, v91
v_cndmask_b32_e32 v89, 1.0, v88, vcc
v_cmp_gt_f32_e32 vcc, 0, v91
v_cndmask_b32_e64 v92, 0.5, -0.5, vcc
v_mov_b32_e32 v93, 0x3fb8aa3b
v_mac_f32_e32 v92, v93, v91
v_mov_b32_e32 v86, 0xb7c756b1
v_cvt_i32_f32_e32 v92, v92
v_madak_f32_e32 v86, v86, v81, 0xbbbd1489
v_madak_f32_e32 v86, v81, v86, 0xbce9528f
v_madak_f32_e32 v86, v81, v86, 0xbea66beb
v_mul_f32_e32 v83, v89, v83
v_madak_f32_e32 v81, v81, v86, 0x3e0375d4
v_rcp_f32_e32 v83, v83
v_cvt_f32_i32_e32 v86, v92
v_cndmask_b32_e64 v81, v82, v81, s[8:9]
v_mov_b32_e32 v82, 0xbf317180
v_mul_f32_e32 v81, v83, v81
v_mad_f32 v83, v82, v86, v91
v_mov_b32_e32 v94, 0xb717f7d1
v_mad_f32 v95, v94, v86, v83
v_mul_f32_e32 v96, v95, v95
v_mov_b32_e32 v97, 0xb5ddea0e
v_mov_b32_e32 v98, 0x3331bb4c
v_mad_f32 v99, v98, v96, v97
v_mov_b32_e32 v100, 0x388ab355
v_mad_f32 v99, v99, v96, v100
v_mov_b32_e32 v101, 0xbb360b61
v_mad_f32 v99, v99, v96, v101
v_mov_b32_e32 v102, 0x3e2aaaab
v_mad_f32 v99, v99, v96, v102
v_mad_f32 v96, -v96, v99, v95
v_mul_f32_e32 v95, v96, v95
v_sub_f32_e32 v96, 2.0, v96
v_cmp_gt_f32_e64 vcc, |v96|, v87
v_cndmask_b32_e32 v99, 1.0, v88, vcc
v_mul_f32_e32 v96, v99, v96
v_rcp_f32_e32 v96, v96
v_mul_f32_e32 v95, v96, v95
v_mul_f32_e32 v95, v95, v99
v_mad_f32 v86, -v86, v94, -v95
v_subrev_f32_e32 v83, v83, v86
v_lshlrev_b32_e32 v86, 23, v92
v_sub_f32_e32 v83, 1.0, v83
v_add_i32_e32 v83, vcc, v83, v86
v_mov_b32_e32 v86, 0xc2aeac4f
v_cmp_ge_f32_e32 vcc, v91, v86
v_mov_b32_e32 v92, 0x42b17218
v_cndmask_b32_e32 v83, 0, v83, vcc
v_cmp_lt_f32_e32 vcc, v91, v92
v_mov_b32_e32 v95, 0x7f800000
v_cndmask_b32_e32 v83, v95, v83, vcc
v_cmp_u_f32_e32 vcc, v91, v91
v_cndmask_b32_e32 v83, v83, v91, vcc
v_subrev_f32_e32 v91, v72, v90
v_mul_f32_e32 v96, v81, v89
v_add_f32_e32 v90, v72, v90
v_mad_f32 v90, v90, v91, v96
v_cmp_gt_f32_e32 vcc, 0, v90
v_cndmask_b32_e64 v91, 0.5, -0.5, vcc
v_mac_f32_e32 v91, v93, v90
v_cvt_i32_f32_e32 v91, v91
v_madak_f32_e32 v81, v89, v81, 0x3f58560b
v_cvt_f32_i32_e32 v93, v91
v_lshlrev_b32_e32 v91, 23, v91
v_mad_f32 v82, v82, v93, v90
v_mad_f32 v99, v94, v93, v82
v_mul_f32_e32 v103, v99, v99
v_mac_f32_e32 v97, v98, v103
v_mac_f32_e32 v100, v97, v103
v_mac_f32_e32 v101, v100, v103
v_mac_f32_e32 v102, v101, v103
v_mad_f32 v97, -v103, v102, v99
v_mul_f32_e32 v98, v97, v99
v_sub_f32_e32 v97, 2.0, v97
v_cmp_gt_f32_e64 vcc, |v97|, v87
v_cndmask_b32_e32 v99, 1.0, v88, vcc
v_mul_f32_e32 v97, v99, v97
v_rcp_f32_e32 v97, v97
v_mul_f32_e32 v97, v97, v98
v_mul_f32_e32 v97, v97, v99
v_mad_f32 v93, -v93, v94, -v97
v_subrev_f32_e32 v82, v82, v93
v_sub_f32_e32 v82, 1.0, v82
v_add_i32_e32 v82, vcc, v82, v91
v_cmp_ge_f32_e32 vcc, v90, v86
v_cndmask_b32_e32 v82, 0, v82, vcc
v_cmp_lt_f32_e32 vcc, v90, v92
v_cndmask_b32_e32 v82, v95, v82, vcc
v_cmp_u_f32_e32 vcc, v90, v90
v_cndmask_b32_e32 v82, v82, v90, vcc
v_mul_f32_e32 v82, v82, v83
v_mov_b32_e32 v83, 0x40c00000
v_cmp_gt_f32_e64 s[10:11], v83, v72
v_mov_b32_e32 v83, 0x31800000
v_cmp_gt_f32_e64 vcc, |v72|, v87
v_cmp_gt_f32_e64 s[12:13], v83, v72
v_cndmask_b32_e32 v83, 1.0, v88, vcc
v_mul_f32_e32 v72, v83, v72
v_rcp_f32_e32 v72, v72
v_cmp_u_f32_e32 vcc, v45, v45
v_mul_f32_e32 v72, v72, v82
v_mad_f32 v72, -v83, v72, 1.0
v_cndmask_b32_e64 v72, 1.0, v72, s[10:11]
v_cndmask_b32_e64 v72, v72, v81, s[4:5]
v_and_b32_e32 v81, s51, v45
v_or_b32_e32 v72, v81, v72
v_mad_f32 v81, v96, v45, v45
v_cndmask_b32_e64 v72, v72, v81, s[8:9]
v_mul_f32_e32 v81, 0x3f8375d4, v45
v_mac_f32_e32 v81, 0x41000000, v45
v_mul_f32_e32 v81, 0x3e000000, v81
v_cndmask_b32_e64 v72, v72, v81, s[12:13]
v_cndmask_b32_e32 v45, v72, v45, vcc
v_subrev_f32_e32 v45, v45, v70
v_mul_f32_e32 v70, s19, v70
v_mad_f32 v45, v79, v45, -v70
v_mul_f32_e32 v70, v80, v84
v_mac_f32_e32 v5, v45, v51
v_mul_f32_e32 v45, v85, v70
v_mad_f32 v45, v51, v73, -v45
v_mul_f32_e32 v51, v73, v51
v_mad_f32 v51, v70, v85, -v51
v_mad_f32 v74, v35, v51, v74
v_mad_f32 v71, v31, v51, v71
v_mac_f32_e32 v69, v27, v51
v_mad_f32 v44, v35, v45, v44
v_mad_f32 v43, v31, v45, v43
v_mac_f32_e32 v42, v27, v45
BB6_22: ; %Flow1252
; in Loop: Header=BB6_11 Depth=1
s_or_b64 exec, exec, s[34:35]
BB6_23: ; in Loop: Header=BB6_11 Depth=1
s_or_b64 exec, exec, s[30:31]
v_lshrrev_b32_e32 v27, 2, v65
v_and_b32_e32 v27, 1, v27
v_cmp_eq_u32_e32 vcc, 1, v27
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[30:31], exec, s[4:5]
; mask branch BB6_27
s_cbranch_execz BB6_27
BB6_24: ; in Loop: Header=BB6_11 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[79:82], v55 offset0:32 offset1:33
s_and_b64 s[4:5], exec, s[0:1]
v_cmp_ne_u32_e32 vcc, v63, v23
s_xor_b64 s[4:5], s[4:5], -1
s_or_b64 s[4:5], s[4:5], vcc
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v31, v76, v80
s_and_b64 s[8:9], exec, s[2:3]
s_or_b64 s[4:5], s[8:9], s[4:5]
v_subrev_f32_e32 v27, v75, v79
v_mul_f32_e32 v45, v31, v31
v_cndmask_b32_e64 v51, 0, 1.0, s[4:5]
v_subrev_f32_e32 v35, v77, v81
v_mac_f32_e32 v45, v27, v27
v_mac_f32_e32 v45, v35, v35
v_mul_f32_e32 v51, s22, v51
v_cmp_lt_f32_e32 vcc, v45, v51
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[34:35], exec, s[4:5]
; mask branch BB6_26
s_cbranch_execz BB6_26
BB6_25: ; in Loop: Header=BB6_11 Depth=1
v_max_f32_e32 v45, 0x34cd15ae, v45
v_mul_f32_e32 v80, v9, v45
v_mul_f32_e32 v72, v80, v80
v_mov_b32_e32 v73, 0x3a92b707
v_mov_b32_e32 v81, 0x3c739487
v_madak_f32_e32 v73, v73, v72, 0x3ded3cb2
v_mul_f32_e32 v51, v78, v82
v_mad_f32 v82, v73, v72, 1.0
v_mov_b32_e32 v73, 0xb2951928
v_madak_f32_e32 v81, v81, v72, 0x3f01e2bc
v_madak_f32_e32 v73, v73, v72, 0xb85ffb93
v_rsq_f32_e32 v79, v45
v_mac_f32_e32 v82, v80, v81
v_mov_b32_e32 v81, 0x35c55945
v_madak_f32_e32 v81, v81, v72, 0x3a83ca0c
v_madak_f32_e32 v73, v73, v72, 0xbc9ded90
s_mov_b32 m0, -1
v_lshrrev_b32_e32 v70, 2, v19
v_madak_f32_e32 v81, v81, v72, 0x3d8eaf3b
v_madak_f32_e32 v83, v73, v72, 0xbf409397
ds_read_b64 v[72:73], v56 offset:128
v_and_b32_e32 v70, 1, v70
v_cmp_eq_u32_e32 vcc, 1, v70
v_mac_f32_e32 v83, v80, v81
v_mul_f32_e32 v80, v79, v79
v_cndmask_b32_e64 v70, 0, 1.0, vcc
v_mul_f32_e32 v81, v80, v80
v_mul_f32_e32 v81, v70, v81
v_mul_f32_e32 v84, v80, v81
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v73, v68, v73
v_mul_f32_e32 v85, v84, v73
v_mad_f32 v81, v81, v80, s23
v_mad_f32 v85, v72, v67, -v85
v_mad_f32 v86, v84, v84, s26
v_mul_f32_e32 v72, v67, v72
v_mul_f32_e32 v81, 0xbe2aaaab, v81
v_mul_f32_e32 v73, v86, v73
v_mul_f32_e32 v72, v72, v81
v_mac_f32_e32 v72, 0x3daaaaaa, v73
v_rcp_f32_e32 v73, v82
v_mul_f32_e32 v45, s18, v45
v_mac_f32_e32 v8, v70, v72
v_mul_f32_e32 v45, v79, v45
v_mul_f32_e32 v73, v37, v73
v_mul_f32_e32 v72, v70, v80
v_mul_f32_e32 v73, v83, v73
v_mac_f32_e32 v73, v79, v72
v_and_b32_e32 v72, s27, v45
v_mov_b32_e32 v81, 0x3fa00000
v_cmp_gt_f32_e64 s[4:5], v81, v72
v_mul_f32_e32 v81, v72, v72
v_rcp_f32_e32 v82, v81
v_add_f32_e32 v83, -1.0, v72
v_mov_b32_e32 v86, 0xbd777f97
v_mov_b32_e32 v87, 0x6f800000
v_cndmask_b32_e64 v82, v82, v83, s[4:5]
v_mov_b32_e32 v83, 0x3f580000
v_cmp_gt_f32_e64 s[8:9], v83, v72
v_cndmask_b32_e64 v81, v82, v81, s[8:9]
v_mov_b32_e32 v83, 0xc11d077e
v_mov_b32_e32 v82, 0x4036db6e
v_madak_f32_e32 v83, v83, v81, 0xc2a2932b
v_cmp_gt_f32_e32 vcc, v82, v72
v_mov_b32_e32 v82, 0xc3f1c275
v_madak_f32_e32 v82, v82, v81, 0xc480230b
v_madak_f32_e32 v83, v81, v83, 0xc3389ae7
v_madak_f32_e32 v82, v81, v82, 0xc41f6441
v_madak_f32_e32 v83, v81, v83, 0xc322658c
v_madak_f32_e32 v82, v81, v82, 0xc320a2ea
v_madak_f32_e32 v83, v81, v83, 0xc2798057
v_madak_f32_e32 v82, v81, v82, 0xc18e104b
v_madak_f32_e32 v83, v81, v83, 0xc128f022
v_madak_f32_e32 v82, v81, v82, 0xbf4c9dd4
v_madak_f32_e32 v83, v81, v83, 0xbf31a0b7
v_madak_f32_e32 v82, v81, v82, 0xbc21a092
v_madak_f32_e32 v83, v81, v83, 0xbc21a093
v_madak_f32_e32 v86, v86, v81, 0x40d23f7c
v_cndmask_b32_e32 v82, v82, v83, vcc
v_mov_b32_e32 v83, 0xc1b38712
v_madak_f32_e32 v83, v83, v81, 0x43ed43a7
v_madak_f32_e32 v86, v81, v86, 0x42d9451f
v_madak_f32_e32 v83, v81, v83, 0x451f90ce
v_madak_f32_e32 v86, v81, v86, 0x43d6810b
v_madak_f32_e32 v83, v81, v83, 0x4547fdbb
v_madak_f32_e32 v86, v81, v86, 0x442158c9
v_madak_f32_e32 v83, v81, v83, 0x44c01759
v_madak_f32_e32 v86, v81, v86, 0x43d9486f
v_madak_f32_e32 v83, v81, v83, 0x43a2e571
v_madak_f32_e32 v86, v81, v86, 0x4309a863
v_madak_f32_e32 v83, v81, v83, 0x41f2b459
v_madak_f32_e32 v86, v81, v86, 0x419d35ce
v_cndmask_b32_e32 v83, v83, v86, vcc
v_mov_b32_e32 v86, 0xbb0df9c0
v_madak_f32_e32 v86, v86, v81, 0x3d1151b3
v_madak_f32_e32 v86, v81, v86, 0xbde31cc2
v_madak_f32_e32 v86, v81, v86, 0x3ea2fe54
v_madak_f32_e32 v86, v81, v86, 0xbebe9208
v_madak_f32_e32 v86, v81, v86, 0x3ed46805
v_madak_f32_e32 v86, v81, v86, 0xbb1acdc6
v_cndmask_b32_e64 v82, v82, v86, s[4:5]
v_mov_b32_e32 v86, 0x3c445aa3
v_madak_f32_e32 v86, v86, v81, 0x3c5f6e13
v_madak_f32_e32 v86, v81, v86, 0x3e013307
v_madak_f32_e32 v86, v81, v86, 0x3d931ae7
v_madak_f32_e32 v86, v81, v86, 0x3f0a5785
v_madak_f32_e32 v86, v81, v86, 0x3dd9f331
v_cndmask_b32_e64 v83, v83, v86, s[4:5]
v_mov_b32_e32 v86, 0xb684e21a
v_madak_f32_e32 v86, v86, v81, 0x390aee49
v_madak_f32_e32 v86, v81, v86, 0x3ba68116
v_madak_f32_e32 v86, v81, v86, 0x3d852a63
v_madak_f32_e32 v86, v81, v86, 0x3ecbbbce
v_cndmask_b32_e64 v83, v83, v86, s[8:9]
v_mad_f32 v83, v81, v83, 1.0
v_and_b32_e32 v90, s50, v45
v_mov_b32_e32 v91, 0xbf100000
v_cmp_gt_f32_e64 vcc, |v83|, v87
v_mov_b32_e32 v88, 0x2f800000
v_mad_f32 v91, v90, -v90, v91
v_cndmask_b32_e32 v89, 1.0, v88, vcc
v_cmp_gt_f32_e32 vcc, 0, v91
v_cndmask_b32_e64 v92, 0.5, -0.5, vcc
v_mov_b32_e32 v93, 0x3fb8aa3b
v_mac_f32_e32 v92, v93, v91
v_mov_b32_e32 v86, 0xb7c756b1
v_cvt_i32_f32_e32 v92, v92
v_madak_f32_e32 v86, v86, v81, 0xbbbd1489
v_madak_f32_e32 v86, v81, v86, 0xbce9528f
v_madak_f32_e32 v86, v81, v86, 0xbea66beb
v_mul_f32_e32 v83, v89, v83
v_madak_f32_e32 v81, v81, v86, 0x3e0375d4
v_rcp_f32_e32 v83, v83
v_cvt_f32_i32_e32 v86, v92
v_cndmask_b32_e64 v81, v82, v81, s[8:9]
v_mov_b32_e32 v82, 0xbf317180
v_mul_f32_e32 v81, v83, v81
v_mad_f32 v83, v82, v86, v91
v_mov_b32_e32 v94, 0xb717f7d1
v_mad_f32 v95, v94, v86, v83
v_mul_f32_e32 v96, v95, v95
v_mov_b32_e32 v97, 0xb5ddea0e
v_mov_b32_e32 v98, 0x3331bb4c
v_mad_f32 v99, v98, v96, v97
v_mov_b32_e32 v100, 0x388ab355
v_mad_f32 v99, v99, v96, v100
v_mov_b32_e32 v101, 0xbb360b61
v_mad_f32 v99, v99, v96, v101
v_mov_b32_e32 v102, 0x3e2aaaab
v_mad_f32 v99, v99, v96, v102
v_mad_f32 v96, -v96, v99, v95
v_mul_f32_e32 v95, v96, v95
v_sub_f32_e32 v96, 2.0, v96
v_cmp_gt_f32_e64 vcc, |v96|, v87
v_cndmask_b32_e32 v99, 1.0, v88, vcc
v_mul_f32_e32 v96, v99, v96
v_rcp_f32_e32 v96, v96
v_mul_f32_e32 v95, v96, v95
v_mul_f32_e32 v95, v95, v99
v_mad_f32 v86, -v86, v94, -v95
v_subrev_f32_e32 v83, v83, v86
v_lshlrev_b32_e32 v86, 23, v92
v_sub_f32_e32 v83, 1.0, v83
v_add_i32_e32 v83, vcc, v83, v86
v_mov_b32_e32 v86, 0xc2aeac4f
v_cmp_ge_f32_e32 vcc, v91, v86
v_mov_b32_e32 v92, 0x42b17218
v_cndmask_b32_e32 v83, 0, v83, vcc
v_cmp_lt_f32_e32 vcc, v91, v92
v_mov_b32_e32 v95, 0x7f800000
v_cndmask_b32_e32 v83, v95, v83, vcc
v_cmp_u_f32_e32 vcc, v91, v91
v_cndmask_b32_e32 v83, v83, v91, vcc
v_subrev_f32_e32 v91, v72, v90
v_mul_f32_e32 v96, v81, v89
v_add_f32_e32 v90, v72, v90
v_mad_f32 v90, v90, v91, v96
v_cmp_gt_f32_e32 vcc, 0, v90
v_cndmask_b32_e64 v91, 0.5, -0.5, vcc
v_mac_f32_e32 v91, v93, v90
v_cvt_i32_f32_e32 v91, v91
v_madak_f32_e32 v81, v89, v81, 0x3f58560b
v_cvt_f32_i32_e32 v93, v91
v_lshlrev_b32_e32 v91, 23, v91
v_mad_f32 v82, v82, v93, v90
v_mad_f32 v99, v94, v93, v82
v_mul_f32_e32 v103, v99, v99
v_mac_f32_e32 v97, v98, v103
v_mac_f32_e32 v100, v97, v103
v_mac_f32_e32 v101, v100, v103
v_mac_f32_e32 v102, v101, v103
v_mad_f32 v97, -v103, v102, v99
v_mul_f32_e32 v98, v97, v99
v_sub_f32_e32 v97, 2.0, v97
v_cmp_gt_f32_e64 vcc, |v97|, v87
v_cndmask_b32_e32 v99, 1.0, v88, vcc
v_mul_f32_e32 v97, v99, v97
v_rcp_f32_e32 v97, v97
v_mul_f32_e32 v97, v97, v98
v_mul_f32_e32 v97, v97, v99
v_mad_f32 v93, -v93, v94, -v97
v_subrev_f32_e32 v82, v82, v93
v_sub_f32_e32 v82, 1.0, v82
v_add_i32_e32 v82, vcc, v82, v91
v_cmp_ge_f32_e32 vcc, v90, v86
v_cndmask_b32_e32 v82, 0, v82, vcc
v_cmp_lt_f32_e32 vcc, v90, v92
v_cndmask_b32_e32 v82, v95, v82, vcc
v_cmp_u_f32_e32 vcc, v90, v90
v_cndmask_b32_e32 v82, v82, v90, vcc
v_mul_f32_e32 v82, v82, v83
v_mov_b32_e32 v83, 0x40c00000
v_cmp_gt_f32_e64 s[10:11], v83, v72
v_mov_b32_e32 v83, 0x31800000
v_cmp_gt_f32_e64 vcc, |v72|, v87
v_cmp_gt_f32_e64 s[12:13], v83, v72
v_cndmask_b32_e32 v83, 1.0, v88, vcc
v_mul_f32_e32 v72, v83, v72
v_rcp_f32_e32 v72, v72
v_cmp_u_f32_e32 vcc, v45, v45
v_mul_f32_e32 v72, v72, v82
v_mad_f32 v72, -v83, v72, 1.0
v_cndmask_b32_e64 v72, 1.0, v72, s[10:11]
v_cndmask_b32_e64 v72, v72, v81, s[4:5]
v_and_b32_e32 v81, s51, v45
v_or_b32_e32 v72, v81, v72
v_mad_f32 v81, v96, v45, v45
v_cndmask_b32_e64 v72, v72, v81, s[8:9]
v_mul_f32_e32 v81, 0x3f8375d4, v45
v_mac_f32_e32 v81, 0x41000000, v45
v_mul_f32_e32 v81, 0x3e000000, v81
v_cndmask_b32_e64 v72, v72, v81, s[12:13]
v_cndmask_b32_e32 v45, v72, v45, vcc
v_subrev_f32_e32 v45, v45, v70
v_mul_f32_e32 v70, s19, v70
v_mad_f32 v45, v79, v45, -v70
v_mul_f32_e32 v70, v80, v84
v_mac_f32_e32 v5, v45, v51
v_mul_f32_e32 v45, v85, v70
v_mad_f32 v45, v51, v73, -v45
v_mul_f32_e32 v51, v73, v51
v_mad_f32 v51, v70, v85, -v51
v_mad_f32 v74, v35, v51, v74
v_mad_f32 v71, v31, v51, v71
v_mac_f32_e32 v69, v27, v51
v_mad_f32 v34, v35, v45, v34
v_mad_f32 v33, v31, v45, v33
v_mac_f32_e32 v32, v27, v45
BB6_26: ; %Flow1251
; in Loop: Header=BB6_11 Depth=1
s_or_b64 exec, exec, s[34:35]
BB6_27: ; in Loop: Header=BB6_11 Depth=1
s_or_b64 exec, exec, s[30:31]
v_lshrrev_b32_e32 v27, 3, v65
v_and_b32_e32 v27, 1, v27
v_cmp_eq_u32_e32 vcc, 1, v27
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[30:31], exec, s[4:5]
; mask branch BB6_31
s_cbranch_execz BB6_31
BB6_28: ; in Loop: Header=BB6_11 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[79:82], v55 offset0:48 offset1:49
s_and_b64 s[4:5], exec, s[0:1]
v_cmp_ne_u32_e32 vcc, v62, v23
s_xor_b64 s[4:5], s[4:5], -1
s_or_b64 s[4:5], s[4:5], vcc
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v31, v76, v80
s_and_b64 s[8:9], exec, s[2:3]
s_or_b64 s[4:5], s[8:9], s[4:5]
v_subrev_f32_e32 v27, v75, v79
v_mul_f32_e32 v45, v31, v31
v_cndmask_b32_e64 v51, 0, 1.0, s[4:5]
v_subrev_f32_e32 v35, v77, v81
v_mac_f32_e32 v45, v27, v27
v_mac_f32_e32 v45, v35, v35
v_mul_f32_e32 v51, s22, v51
v_cmp_lt_f32_e32 vcc, v45, v51
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[34:35], exec, s[4:5]
; mask branch BB6_30
s_cbranch_execz BB6_30
BB6_29: ; in Loop: Header=BB6_11 Depth=1
v_max_f32_e32 v45, 0x34cd15ae, v45
v_mul_f32_e32 v80, v9, v45
v_mul_f32_e32 v72, v80, v80
v_mov_b32_e32 v73, 0x3a92b707
v_mov_b32_e32 v81, 0x3c739487
v_madak_f32_e32 v73, v73, v72, 0x3ded3cb2
v_mul_f32_e32 v51, v78, v82
v_mad_f32 v82, v73, v72, 1.0
v_mov_b32_e32 v73, 0xb2951928
v_madak_f32_e32 v81, v81, v72, 0x3f01e2bc
v_madak_f32_e32 v73, v73, v72, 0xb85ffb93
v_rsq_f32_e32 v79, v45
v_mac_f32_e32 v82, v80, v81
v_mov_b32_e32 v81, 0x35c55945
v_madak_f32_e32 v81, v81, v72, 0x3a83ca0c
v_madak_f32_e32 v73, v73, v72, 0xbc9ded90
s_mov_b32 m0, -1
v_lshrrev_b32_e32 v70, 3, v19
v_madak_f32_e32 v81, v81, v72, 0x3d8eaf3b
v_madak_f32_e32 v83, v73, v72, 0xbf409397
ds_read_b64 v[72:73], v56 offset:192
v_and_b32_e32 v70, 1, v70
v_cmp_eq_u32_e32 vcc, 1, v70
v_mac_f32_e32 v83, v80, v81
v_mul_f32_e32 v80, v79, v79
v_cndmask_b32_e64 v70, 0, 1.0, vcc
v_mul_f32_e32 v81, v80, v80
v_mul_f32_e32 v81, v70, v81
v_mul_f32_e32 v84, v80, v81
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v73, v68, v73
v_mul_f32_e32 v85, v84, v73
v_mad_f32 v81, v81, v80, s23
v_mad_f32 v85, v72, v67, -v85
v_mad_f32 v86, v84, v84, s26
v_mul_f32_e32 v72, v67, v72
v_mul_f32_e32 v81, 0xbe2aaaab, v81
v_mul_f32_e32 v73, v86, v73
v_mul_f32_e32 v72, v72, v81
v_mac_f32_e32 v72, 0x3daaaaaa, v73
v_rcp_f32_e32 v73, v82
v_mul_f32_e32 v45, s18, v45
v_mac_f32_e32 v8, v70, v72
v_mul_f32_e32 v45, v79, v45
v_mul_f32_e32 v73, v37, v73
v_mul_f32_e32 v72, v70, v80
v_mul_f32_e32 v73, v83, v73
v_mac_f32_e32 v73, v79, v72
v_and_b32_e32 v72, s27, v45
v_mov_b32_e32 v81, 0x3fa00000
v_cmp_gt_f32_e64 s[4:5], v81, v72
v_mul_f32_e32 v81, v72, v72
v_rcp_f32_e32 v82, v81
v_add_f32_e32 v83, -1.0, v72
v_mov_b32_e32 v86, 0xbd777f97
v_mov_b32_e32 v87, 0x6f800000
v_cndmask_b32_e64 v82, v82, v83, s[4:5]
v_mov_b32_e32 v83, 0x3f580000
v_cmp_gt_f32_e64 s[8:9], v83, v72
v_cndmask_b32_e64 v81, v82, v81, s[8:9]
v_mov_b32_e32 v83, 0xc11d077e
v_mov_b32_e32 v82, 0x4036db6e
v_madak_f32_e32 v83, v83, v81, 0xc2a2932b
v_cmp_gt_f32_e32 vcc, v82, v72
v_mov_b32_e32 v82, 0xc3f1c275
v_madak_f32_e32 v82, v82, v81, 0xc480230b
v_madak_f32_e32 v83, v81, v83, 0xc3389ae7
v_madak_f32_e32 v82, v81, v82, 0xc41f6441
v_madak_f32_e32 v83, v81, v83, 0xc322658c
v_madak_f32_e32 v82, v81, v82, 0xc320a2ea
v_madak_f32_e32 v83, v81, v83, 0xc2798057
v_madak_f32_e32 v82, v81, v82, 0xc18e104b
v_madak_f32_e32 v83, v81, v83, 0xc128f022
v_madak_f32_e32 v82, v81, v82, 0xbf4c9dd4
v_madak_f32_e32 v83, v81, v83, 0xbf31a0b7
v_madak_f32_e32 v82, v81, v82, 0xbc21a092
v_madak_f32_e32 v83, v81, v83, 0xbc21a093
v_madak_f32_e32 v86, v86, v81, 0x40d23f7c
v_cndmask_b32_e32 v82, v82, v83, vcc
v_mov_b32_e32 v83, 0xc1b38712
v_madak_f32_e32 v83, v83, v81, 0x43ed43a7
v_madak_f32_e32 v86, v81, v86, 0x42d9451f
v_madak_f32_e32 v83, v81, v83, 0x451f90ce
v_madak_f32_e32 v86, v81, v86, 0x43d6810b
v_madak_f32_e32 v83, v81, v83, 0x4547fdbb
v_madak_f32_e32 v86, v81, v86, 0x442158c9
v_madak_f32_e32 v83, v81, v83, 0x44c01759
v_madak_f32_e32 v86, v81, v86, 0x43d9486f
v_madak_f32_e32 v83, v81, v83, 0x43a2e571
v_madak_f32_e32 v86, v81, v86, 0x4309a863
v_madak_f32_e32 v83, v81, v83, 0x41f2b459
v_madak_f32_e32 v86, v81, v86, 0x419d35ce
v_cndmask_b32_e32 v83, v83, v86, vcc
v_mov_b32_e32 v86, 0xbb0df9c0
v_madak_f32_e32 v86, v86, v81, 0x3d1151b3
v_madak_f32_e32 v86, v81, v86, 0xbde31cc2
v_madak_f32_e32 v86, v81, v86, 0x3ea2fe54
v_madak_f32_e32 v86, v81, v86, 0xbebe9208
v_madak_f32_e32 v86, v81, v86, 0x3ed46805
v_madak_f32_e32 v86, v81, v86, 0xbb1acdc6
v_cndmask_b32_e64 v82, v82, v86, s[4:5]
v_mov_b32_e32 v86, 0x3c445aa3
v_madak_f32_e32 v86, v86, v81, 0x3c5f6e13
v_madak_f32_e32 v86, v81, v86, 0x3e013307
v_madak_f32_e32 v86, v81, v86, 0x3d931ae7
v_madak_f32_e32 v86, v81, v86, 0x3f0a5785
v_madak_f32_e32 v86, v81, v86, 0x3dd9f331
v_cndmask_b32_e64 v83, v83, v86, s[4:5]
v_mov_b32_e32 v86, 0xb684e21a
v_madak_f32_e32 v86, v86, v81, 0x390aee49
v_madak_f32_e32 v86, v81, v86, 0x3ba68116
v_madak_f32_e32 v86, v81, v86, 0x3d852a63
v_madak_f32_e32 v86, v81, v86, 0x3ecbbbce
v_cndmask_b32_e64 v83, v83, v86, s[8:9]
v_mad_f32 v83, v81, v83, 1.0
v_and_b32_e32 v90, s50, v45
v_mov_b32_e32 v91, 0xbf100000
v_cmp_gt_f32_e64 vcc, |v83|, v87
v_mov_b32_e32 v88, 0x2f800000
v_mad_f32 v91, v90, -v90, v91
v_cndmask_b32_e32 v89, 1.0, v88, vcc
v_cmp_gt_f32_e32 vcc, 0, v91
v_cndmask_b32_e64 v92, 0.5, -0.5, vcc
v_mov_b32_e32 v93, 0x3fb8aa3b
v_mac_f32_e32 v92, v93, v91
v_mov_b32_e32 v86, 0xb7c756b1
v_cvt_i32_f32_e32 v92, v92
v_madak_f32_e32 v86, v86, v81, 0xbbbd1489
v_madak_f32_e32 v86, v81, v86, 0xbce9528f
v_madak_f32_e32 v86, v81, v86, 0xbea66beb
v_mul_f32_e32 v83, v89, v83
v_madak_f32_e32 v81, v81, v86, 0x3e0375d4
v_rcp_f32_e32 v83, v83
v_cvt_f32_i32_e32 v86, v92
v_cndmask_b32_e64 v81, v82, v81, s[8:9]
v_mov_b32_e32 v82, 0xbf317180
v_mul_f32_e32 v81, v83, v81
v_mad_f32 v83, v82, v86, v91
v_mov_b32_e32 v94, 0xb717f7d1
v_mad_f32 v95, v94, v86, v83
v_mul_f32_e32 v96, v95, v95
v_mov_b32_e32 v97, 0xb5ddea0e
v_mov_b32_e32 v98, 0x3331bb4c
v_mad_f32 v99, v98, v96, v97
v_mov_b32_e32 v100, 0x388ab355
v_mad_f32 v99, v99, v96, v100
v_mov_b32_e32 v101, 0xbb360b61
v_mad_f32 v99, v99, v96, v101
v_mov_b32_e32 v102, 0x3e2aaaab
v_mad_f32 v99, v99, v96, v102
v_mad_f32 v96, -v96, v99, v95
v_mul_f32_e32 v95, v96, v95
v_sub_f32_e32 v96, 2.0, v96
v_cmp_gt_f32_e64 vcc, |v96|, v87
v_cndmask_b32_e32 v99, 1.0, v88, vcc
v_mul_f32_e32 v96, v99, v96
v_rcp_f32_e32 v96, v96
v_mul_f32_e32 v95, v96, v95
v_mul_f32_e32 v95, v95, v99
v_mad_f32 v86, -v86, v94, -v95
v_subrev_f32_e32 v83, v83, v86
v_lshlrev_b32_e32 v86, 23, v92
v_sub_f32_e32 v83, 1.0, v83
v_add_i32_e32 v83, vcc, v83, v86
v_mov_b32_e32 v86, 0xc2aeac4f
v_cmp_ge_f32_e32 vcc, v91, v86
v_mov_b32_e32 v92, 0x42b17218
v_cndmask_b32_e32 v83, 0, v83, vcc
v_cmp_lt_f32_e32 vcc, v91, v92
v_mov_b32_e32 v95, 0x7f800000
v_cndmask_b32_e32 v83, v95, v83, vcc
v_cmp_u_f32_e32 vcc, v91, v91
v_cndmask_b32_e32 v83, v83, v91, vcc
v_subrev_f32_e32 v91, v72, v90
v_mul_f32_e32 v96, v81, v89
v_add_f32_e32 v90, v72, v90
v_mad_f32 v90, v90, v91, v96
v_cmp_gt_f32_e32 vcc, 0, v90
v_cndmask_b32_e64 v91, 0.5, -0.5, vcc
v_mac_f32_e32 v91, v93, v90
v_cvt_i32_f32_e32 v91, v91
v_madak_f32_e32 v81, v89, v81, 0x3f58560b
v_cvt_f32_i32_e32 v93, v91
v_lshlrev_b32_e32 v91, 23, v91
v_mad_f32 v82, v82, v93, v90
v_mad_f32 v99, v94, v93, v82
v_mul_f32_e32 v103, v99, v99
v_mac_f32_e32 v97, v98, v103
v_mac_f32_e32 v100, v97, v103
v_mac_f32_e32 v101, v100, v103
v_mac_f32_e32 v102, v101, v103
v_mad_f32 v97, -v103, v102, v99
v_mul_f32_e32 v98, v97, v99
v_sub_f32_e32 v97, 2.0, v97
v_cmp_gt_f32_e64 vcc, |v97|, v87
v_cndmask_b32_e32 v99, 1.0, v88, vcc
v_mul_f32_e32 v97, v99, v97
v_rcp_f32_e32 v97, v97
v_mul_f32_e32 v97, v97, v98
v_mul_f32_e32 v97, v97, v99
v_mad_f32 v93, -v93, v94, -v97
v_subrev_f32_e32 v82, v82, v93
v_sub_f32_e32 v82, 1.0, v82
v_add_i32_e32 v82, vcc, v82, v91
v_cmp_ge_f32_e32 vcc, v90, v86
v_cndmask_b32_e32 v82, 0, v82, vcc
v_cmp_lt_f32_e32 vcc, v90, v92
v_cndmask_b32_e32 v82, v95, v82, vcc
v_cmp_u_f32_e32 vcc, v90, v90
v_cndmask_b32_e32 v82, v82, v90, vcc
v_mul_f32_e32 v82, v82, v83
v_mov_b32_e32 v83, 0x40c00000
v_cmp_gt_f32_e64 s[10:11], v83, v72
v_mov_b32_e32 v83, 0x31800000
v_cmp_gt_f32_e64 vcc, |v72|, v87
v_cmp_gt_f32_e64 s[12:13], v83, v72
v_cndmask_b32_e32 v83, 1.0, v88, vcc
v_mul_f32_e32 v72, v83, v72
v_rcp_f32_e32 v72, v72
v_cmp_u_f32_e32 vcc, v45, v45
v_mul_f32_e32 v72, v72, v82
v_mad_f32 v72, -v83, v72, 1.0
v_cndmask_b32_e64 v72, 1.0, v72, s[10:11]
v_cndmask_b32_e64 v72, v72, v81, s[4:5]
v_and_b32_e32 v81, s51, v45
v_or_b32_e32 v72, v81, v72
v_mad_f32 v81, v96, v45, v45
v_cndmask_b32_e64 v72, v72, v81, s[8:9]
v_mul_f32_e32 v81, 0x3f8375d4, v45
v_mac_f32_e32 v81, 0x41000000, v45
v_mul_f32_e32 v81, 0x3e000000, v81
v_cndmask_b32_e64 v72, v72, v81, s[12:13]
v_cndmask_b32_e32 v45, v72, v45, vcc
v_subrev_f32_e32 v45, v45, v70
v_mul_f32_e32 v70, s19, v70
v_mad_f32 v45, v79, v45, -v70
v_mul_f32_e32 v70, v80, v84
v_mac_f32_e32 v5, v45, v51
v_mul_f32_e32 v45, v85, v70
v_mad_f32 v45, v51, v73, -v45
v_mul_f32_e32 v51, v73, v51
v_mad_f32 v51, v70, v85, -v51
v_mad_f32 v74, v35, v51, v74
v_mad_f32 v71, v31, v51, v71
v_mac_f32_e32 v69, v27, v51
v_mad_f32 v30, v35, v45, v30
v_mad_f32 v29, v31, v45, v29
v_mac_f32_e32 v28, v27, v45
BB6_30: ; %Flow1250
; in Loop: Header=BB6_11 Depth=1
s_or_b64 exec, exec, s[34:35]
BB6_31: ; in Loop: Header=BB6_11 Depth=1
s_or_b64 exec, exec, s[30:31]
v_lshrrev_b32_e32 v27, 4, v65
v_and_b32_e32 v27, 1, v27
v_cmp_eq_u32_e32 vcc, 1, v27
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[30:31], exec, s[4:5]
; mask branch BB6_35
s_cbranch_execz BB6_35
BB6_32: ; in Loop: Header=BB6_11 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[79:82], v55 offset0:64 offset1:65
s_and_b64 s[4:5], exec, s[0:1]
v_cmp_ne_u32_e32 vcc, v61, v23
s_xor_b64 s[4:5], s[4:5], -1
s_or_b64 s[4:5], s[4:5], vcc
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v31, v76, v80
s_and_b64 s[8:9], exec, s[2:3]
s_or_b64 s[4:5], s[8:9], s[4:5]
v_subrev_f32_e32 v27, v75, v79
v_mul_f32_e32 v45, v31, v31
v_cndmask_b32_e64 v51, 0, 1.0, s[4:5]
v_subrev_f32_e32 v35, v77, v81
v_mac_f32_e32 v45, v27, v27
v_mac_f32_e32 v45, v35, v35
v_mul_f32_e32 v51, s22, v51
v_cmp_lt_f32_e32 vcc, v45, v51
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[34:35], exec, s[4:5]
; mask branch BB6_34
s_cbranch_execz BB6_34
BB6_33: ; in Loop: Header=BB6_11 Depth=1
v_max_f32_e32 v45, 0x34cd15ae, v45
v_mul_f32_e32 v80, v9, v45
v_mul_f32_e32 v72, v80, v80
v_mov_b32_e32 v73, 0x3a92b707
v_mov_b32_e32 v81, 0x3c739487
v_madak_f32_e32 v73, v73, v72, 0x3ded3cb2
v_mul_f32_e32 v51, v78, v82
v_mad_f32 v82, v73, v72, 1.0
v_mov_b32_e32 v73, 0xb2951928
v_madak_f32_e32 v81, v81, v72, 0x3f01e2bc
v_madak_f32_e32 v73, v73, v72, 0xb85ffb93
v_rsq_f32_e32 v79, v45
v_mac_f32_e32 v82, v80, v81
v_mov_b32_e32 v81, 0x35c55945
v_madak_f32_e32 v81, v81, v72, 0x3a83ca0c
v_madak_f32_e32 v73, v73, v72, 0xbc9ded90
s_mov_b32 m0, -1
v_lshrrev_b32_e32 v70, 4, v19
v_madak_f32_e32 v81, v81, v72, 0x3d8eaf3b
v_madak_f32_e32 v83, v73, v72, 0xbf409397
ds_read_b64 v[72:73], v56 offset:256
v_and_b32_e32 v70, 1, v70
v_cmp_eq_u32_e32 vcc, 1, v70
v_mac_f32_e32 v83, v80, v81
v_mul_f32_e32 v80, v79, v79
v_cndmask_b32_e64 v70, 0, 1.0, vcc
v_mul_f32_e32 v81, v80, v80
v_mul_f32_e32 v81, v70, v81
v_mul_f32_e32 v84, v80, v81
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v73, v68, v73
v_mul_f32_e32 v85, v84, v73
v_mad_f32 v81, v81, v80, s23
v_mad_f32 v85, v72, v67, -v85
v_mad_f32 v86, v84, v84, s26
v_mul_f32_e32 v72, v67, v72
v_mul_f32_e32 v81, 0xbe2aaaab, v81
v_mul_f32_e32 v73, v86, v73
v_mul_f32_e32 v72, v72, v81
v_mac_f32_e32 v72, 0x3daaaaaa, v73
v_rcp_f32_e32 v73, v82
v_mul_f32_e32 v45, s18, v45
v_mac_f32_e32 v8, v70, v72
v_mul_f32_e32 v45, v79, v45
v_mul_f32_e32 v73, v37, v73
v_mul_f32_e32 v72, v70, v80
v_mul_f32_e32 v73, v83, v73
v_mac_f32_e32 v73, v79, v72
v_and_b32_e32 v72, s27, v45
v_mov_b32_e32 v81, 0x3fa00000
v_cmp_gt_f32_e64 s[4:5], v81, v72
v_mul_f32_e32 v81, v72, v72
v_rcp_f32_e32 v82, v81
v_add_f32_e32 v83, -1.0, v72
v_mov_b32_e32 v86, 0xbd777f97
v_mov_b32_e32 v87, 0x6f800000
v_cndmask_b32_e64 v82, v82, v83, s[4:5]
v_mov_b32_e32 v83, 0x3f580000
v_cmp_gt_f32_e64 s[8:9], v83, v72
v_cndmask_b32_e64 v81, v82, v81, s[8:9]
v_mov_b32_e32 v83, 0xc11d077e
v_mov_b32_e32 v82, 0x4036db6e
v_madak_f32_e32 v83, v83, v81, 0xc2a2932b
v_cmp_gt_f32_e32 vcc, v82, v72
v_mov_b32_e32 v82, 0xc3f1c275
v_madak_f32_e32 v82, v82, v81, 0xc480230b
v_madak_f32_e32 v83, v81, v83, 0xc3389ae7
v_madak_f32_e32 v82, v81, v82, 0xc41f6441
v_madak_f32_e32 v83, v81, v83, 0xc322658c
v_madak_f32_e32 v82, v81, v82, 0xc320a2ea
v_madak_f32_e32 v83, v81, v83, 0xc2798057
v_madak_f32_e32 v82, v81, v82, 0xc18e104b
v_madak_f32_e32 v83, v81, v83, 0xc128f022
v_madak_f32_e32 v82, v81, v82, 0xbf4c9dd4
v_madak_f32_e32 v83, v81, v83, 0xbf31a0b7
v_madak_f32_e32 v82, v81, v82, 0xbc21a092
v_madak_f32_e32 v83, v81, v83, 0xbc21a093
v_madak_f32_e32 v86, v86, v81, 0x40d23f7c
v_cndmask_b32_e32 v82, v82, v83, vcc
v_mov_b32_e32 v83, 0xc1b38712
v_madak_f32_e32 v83, v83, v81, 0x43ed43a7
v_madak_f32_e32 v86, v81, v86, 0x42d9451f
v_madak_f32_e32 v83, v81, v83, 0x451f90ce
v_madak_f32_e32 v86, v81, v86, 0x43d6810b
v_madak_f32_e32 v83, v81, v83, 0x4547fdbb
v_madak_f32_e32 v86, v81, v86, 0x442158c9
v_madak_f32_e32 v83, v81, v83, 0x44c01759
v_madak_f32_e32 v86, v81, v86, 0x43d9486f
v_madak_f32_e32 v83, v81, v83, 0x43a2e571
v_madak_f32_e32 v86, v81, v86, 0x4309a863
v_madak_f32_e32 v83, v81, v83, 0x41f2b459
v_madak_f32_e32 v86, v81, v86, 0x419d35ce
v_cndmask_b32_e32 v83, v83, v86, vcc
v_mov_b32_e32 v86, 0xbb0df9c0
v_madak_f32_e32 v86, v86, v81, 0x3d1151b3
v_madak_f32_e32 v86, v81, v86, 0xbde31cc2
v_madak_f32_e32 v86, v81, v86, 0x3ea2fe54
v_madak_f32_e32 v86, v81, v86, 0xbebe9208
v_madak_f32_e32 v86, v81, v86, 0x3ed46805
v_madak_f32_e32 v86, v81, v86, 0xbb1acdc6
v_cndmask_b32_e64 v82, v82, v86, s[4:5]
v_mov_b32_e32 v86, 0x3c445aa3
v_madak_f32_e32 v86, v86, v81, 0x3c5f6e13
v_madak_f32_e32 v86, v81, v86, 0x3e013307
v_madak_f32_e32 v86, v81, v86, 0x3d931ae7
v_madak_f32_e32 v86, v81, v86, 0x3f0a5785
v_madak_f32_e32 v86, v81, v86, 0x3dd9f331
v_cndmask_b32_e64 v83, v83, v86, s[4:5]
v_mov_b32_e32 v86, 0xb684e21a
v_madak_f32_e32 v86, v86, v81, 0x390aee49
v_madak_f32_e32 v86, v81, v86, 0x3ba68116
v_madak_f32_e32 v86, v81, v86, 0x3d852a63
v_madak_f32_e32 v86, v81, v86, 0x3ecbbbce
v_cndmask_b32_e64 v83, v83, v86, s[8:9]
v_mad_f32 v83, v81, v83, 1.0
v_and_b32_e32 v90, s50, v45
v_mov_b32_e32 v91, 0xbf100000
v_cmp_gt_f32_e64 vcc, |v83|, v87
v_mov_b32_e32 v88, 0x2f800000
v_mad_f32 v91, v90, -v90, v91
v_cndmask_b32_e32 v89, 1.0, v88, vcc
v_cmp_gt_f32_e32 vcc, 0, v91
v_cndmask_b32_e64 v92, 0.5, -0.5, vcc
v_mov_b32_e32 v93, 0x3fb8aa3b
v_mac_f32_e32 v92, v93, v91
v_mov_b32_e32 v86, 0xb7c756b1
v_cvt_i32_f32_e32 v92, v92
v_madak_f32_e32 v86, v86, v81, 0xbbbd1489
v_madak_f32_e32 v86, v81, v86, 0xbce9528f
v_madak_f32_e32 v86, v81, v86, 0xbea66beb
v_mul_f32_e32 v83, v89, v83
v_madak_f32_e32 v81, v81, v86, 0x3e0375d4
v_rcp_f32_e32 v83, v83
v_cvt_f32_i32_e32 v86, v92
v_cndmask_b32_e64 v81, v82, v81, s[8:9]
v_mov_b32_e32 v82, 0xbf317180
v_mul_f32_e32 v81, v83, v81
v_mad_f32 v83, v82, v86, v91
v_mov_b32_e32 v94, 0xb717f7d1
v_mad_f32 v95, v94, v86, v83
v_mul_f32_e32 v96, v95, v95
v_mov_b32_e32 v97, 0xb5ddea0e
v_mov_b32_e32 v98, 0x3331bb4c
v_mad_f32 v99, v98, v96, v97
v_mov_b32_e32 v100, 0x388ab355
v_mad_f32 v99, v99, v96, v100
v_mov_b32_e32 v101, 0xbb360b61
v_mad_f32 v99, v99, v96, v101
v_mov_b32_e32 v102, 0x3e2aaaab
v_mad_f32 v99, v99, v96, v102
v_mad_f32 v96, -v96, v99, v95
v_mul_f32_e32 v95, v96, v95
v_sub_f32_e32 v96, 2.0, v96
v_cmp_gt_f32_e64 vcc, |v96|, v87
v_cndmask_b32_e32 v99, 1.0, v88, vcc
v_mul_f32_e32 v96, v99, v96
v_rcp_f32_e32 v96, v96
v_mul_f32_e32 v95, v96, v95
v_mul_f32_e32 v95, v95, v99
v_mad_f32 v86, -v86, v94, -v95
v_subrev_f32_e32 v83, v83, v86
v_lshlrev_b32_e32 v86, 23, v92
v_sub_f32_e32 v83, 1.0, v83
v_add_i32_e32 v83, vcc, v83, v86
v_mov_b32_e32 v86, 0xc2aeac4f
v_cmp_ge_f32_e32 vcc, v91, v86
v_mov_b32_e32 v92, 0x42b17218
v_cndmask_b32_e32 v83, 0, v83, vcc
v_cmp_lt_f32_e32 vcc, v91, v92
v_mov_b32_e32 v95, 0x7f800000
v_cndmask_b32_e32 v83, v95, v83, vcc
v_cmp_u_f32_e32 vcc, v91, v91
v_cndmask_b32_e32 v83, v83, v91, vcc
v_subrev_f32_e32 v91, v72, v90
v_mul_f32_e32 v96, v81, v89
v_add_f32_e32 v90, v72, v90
v_mad_f32 v90, v90, v91, v96
v_cmp_gt_f32_e32 vcc, 0, v90
v_cndmask_b32_e64 v91, 0.5, -0.5, vcc
v_mac_f32_e32 v91, v93, v90
v_cvt_i32_f32_e32 v91, v91
v_madak_f32_e32 v81, v89, v81, 0x3f58560b
v_cvt_f32_i32_e32 v93, v91
v_lshlrev_b32_e32 v91, 23, v91
v_mad_f32 v82, v82, v93, v90
v_mad_f32 v99, v94, v93, v82
v_mul_f32_e32 v103, v99, v99
v_mac_f32_e32 v97, v98, v103
v_mac_f32_e32 v100, v97, v103
v_mac_f32_e32 v101, v100, v103
v_mac_f32_e32 v102, v101, v103
v_mad_f32 v97, -v103, v102, v99
v_mul_f32_e32 v98, v97, v99
v_sub_f32_e32 v97, 2.0, v97
v_cmp_gt_f32_e64 vcc, |v97|, v87
v_cndmask_b32_e32 v99, 1.0, v88, vcc
v_mul_f32_e32 v97, v99, v97
v_rcp_f32_e32 v97, v97
v_mul_f32_e32 v97, v97, v98
v_mul_f32_e32 v97, v97, v99
v_mad_f32 v93, -v93, v94, -v97
v_subrev_f32_e32 v82, v82, v93
v_sub_f32_e32 v82, 1.0, v82
v_add_i32_e32 v82, vcc, v82, v91
v_cmp_ge_f32_e32 vcc, v90, v86
v_cndmask_b32_e32 v82, 0, v82, vcc
v_cmp_lt_f32_e32 vcc, v90, v92
v_cndmask_b32_e32 v82, v95, v82, vcc
v_cmp_u_f32_e32 vcc, v90, v90
v_cndmask_b32_e32 v82, v82, v90, vcc
v_mul_f32_e32 v82, v82, v83
v_mov_b32_e32 v83, 0x40c00000
v_cmp_gt_f32_e64 s[10:11], v83, v72
v_mov_b32_e32 v83, 0x31800000
v_cmp_gt_f32_e64 vcc, |v72|, v87
v_cmp_gt_f32_e64 s[12:13], v83, v72
v_cndmask_b32_e32 v83, 1.0, v88, vcc
v_mul_f32_e32 v72, v83, v72
v_rcp_f32_e32 v72, v72
v_cmp_u_f32_e32 vcc, v45, v45
v_mul_f32_e32 v72, v72, v82
v_mad_f32 v72, -v83, v72, 1.0
v_cndmask_b32_e64 v72, 1.0, v72, s[10:11]
v_cndmask_b32_e64 v72, v72, v81, s[4:5]
v_and_b32_e32 v81, s51, v45
v_or_b32_e32 v72, v81, v72
v_mad_f32 v81, v96, v45, v45
v_cndmask_b32_e64 v72, v72, v81, s[8:9]
v_mul_f32_e32 v81, 0x3f8375d4, v45
v_mac_f32_e32 v81, 0x41000000, v45
v_mul_f32_e32 v81, 0x3e000000, v81
v_cndmask_b32_e64 v72, v72, v81, s[12:13]
v_cndmask_b32_e32 v45, v72, v45, vcc
v_subrev_f32_e32 v45, v45, v70
v_mul_f32_e32 v70, s19, v70
v_mad_f32 v45, v79, v45, -v70
v_mul_f32_e32 v70, v80, v84
v_mac_f32_e32 v5, v45, v51
v_mul_f32_e32 v45, v85, v70
v_mad_f32 v45, v51, v73, -v45
v_mul_f32_e32 v51, v73, v51
v_mad_f32 v51, v70, v85, -v51
v_mad_f32 v74, v35, v51, v74
v_mad_f32 v71, v31, v51, v71
v_mac_f32_e32 v69, v27, v51
v_mad_f32 v26, v35, v45, v26
v_mad_f32 v25, v31, v45, v25
v_mac_f32_e32 v24, v27, v45
BB6_34: ; %Flow1249
; in Loop: Header=BB6_11 Depth=1
s_or_b64 exec, exec, s[34:35]
BB6_35: ; in Loop: Header=BB6_11 Depth=1
s_or_b64 exec, exec, s[30:31]
v_lshrrev_b32_e32 v27, 5, v65
v_and_b32_e32 v27, 1, v27
v_cmp_eq_u32_e32 vcc, 1, v27
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[30:31], exec, s[4:5]
; mask branch BB6_39
s_cbranch_execz BB6_39
BB6_36: ; in Loop: Header=BB6_11 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[79:82], v55 offset0:80 offset1:81
s_and_b64 s[4:5], exec, s[0:1]
v_cmp_ne_u32_e32 vcc, v60, v23
s_xor_b64 s[4:5], s[4:5], -1
s_or_b64 s[4:5], s[4:5], vcc
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v31, v76, v80
s_and_b64 s[8:9], exec, s[2:3]
s_or_b64 s[4:5], s[8:9], s[4:5]
v_subrev_f32_e32 v27, v75, v79
v_mul_f32_e32 v45, v31, v31
v_cndmask_b32_e64 v51, 0, 1.0, s[4:5]
v_subrev_f32_e32 v35, v77, v81
v_mac_f32_e32 v45, v27, v27
v_mac_f32_e32 v45, v35, v35
v_mul_f32_e32 v51, s22, v51
v_cmp_lt_f32_e32 vcc, v45, v51
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[34:35], exec, s[4:5]
; mask branch BB6_38
s_cbranch_execz BB6_38
BB6_37: ; in Loop: Header=BB6_11 Depth=1
v_max_f32_e32 v45, 0x34cd15ae, v45
v_mul_f32_e32 v80, v9, v45
v_mul_f32_e32 v72, v80, v80
v_mov_b32_e32 v73, 0x3a92b707
v_mov_b32_e32 v81, 0x3c739487
v_madak_f32_e32 v73, v73, v72, 0x3ded3cb2
v_mul_f32_e32 v51, v78, v82
v_mad_f32 v82, v73, v72, 1.0
v_mov_b32_e32 v73, 0xb2951928
v_madak_f32_e32 v81, v81, v72, 0x3f01e2bc
v_madak_f32_e32 v73, v73, v72, 0xb85ffb93
v_rsq_f32_e32 v79, v45
v_mac_f32_e32 v82, v80, v81
v_mov_b32_e32 v81, 0x35c55945
v_madak_f32_e32 v81, v81, v72, 0x3a83ca0c
v_madak_f32_e32 v73, v73, v72, 0xbc9ded90
s_mov_b32 m0, -1
v_lshrrev_b32_e32 v70, 5, v19
v_madak_f32_e32 v81, v81, v72, 0x3d8eaf3b
v_madak_f32_e32 v83, v73, v72, 0xbf409397
ds_read_b64 v[72:73], v56 offset:320
v_and_b32_e32 v70, 1, v70
v_cmp_eq_u32_e32 vcc, 1, v70
v_mac_f32_e32 v83, v80, v81
v_mul_f32_e32 v80, v79, v79
v_cndmask_b32_e64 v70, 0, 1.0, vcc
v_mul_f32_e32 v81, v80, v80
v_mul_f32_e32 v81, v70, v81
v_mul_f32_e32 v84, v80, v81
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v73, v68, v73
v_mul_f32_e32 v85, v84, v73
v_mad_f32 v81, v81, v80, s23
v_mad_f32 v85, v72, v67, -v85
v_mad_f32 v86, v84, v84, s26
v_mul_f32_e32 v72, v67, v72
v_mul_f32_e32 v81, 0xbe2aaaab, v81
v_mul_f32_e32 v73, v86, v73
v_mul_f32_e32 v72, v72, v81
v_mac_f32_e32 v72, 0x3daaaaaa, v73
v_rcp_f32_e32 v73, v82
v_mul_f32_e32 v45, s18, v45
v_mac_f32_e32 v8, v70, v72
v_mul_f32_e32 v45, v79, v45
v_mul_f32_e32 v73, v37, v73
v_mul_f32_e32 v72, v70, v80
v_mul_f32_e32 v73, v83, v73
v_mac_f32_e32 v73, v79, v72
v_and_b32_e32 v72, s27, v45
v_mov_b32_e32 v81, 0x3fa00000
v_cmp_gt_f32_e64 s[4:5], v81, v72
v_mul_f32_e32 v81, v72, v72
v_rcp_f32_e32 v82, v81
v_add_f32_e32 v83, -1.0, v72
v_mov_b32_e32 v86, 0xbd777f97
v_mov_b32_e32 v87, 0x6f800000
v_cndmask_b32_e64 v82, v82, v83, s[4:5]
v_mov_b32_e32 v83, 0x3f580000
v_cmp_gt_f32_e64 s[8:9], v83, v72
v_cndmask_b32_e64 v81, v82, v81, s[8:9]
v_mov_b32_e32 v83, 0xc11d077e
v_mov_b32_e32 v82, 0x4036db6e
v_madak_f32_e32 v83, v83, v81, 0xc2a2932b
v_cmp_gt_f32_e32 vcc, v82, v72
v_mov_b32_e32 v82, 0xc3f1c275
v_madak_f32_e32 v82, v82, v81, 0xc480230b
v_madak_f32_e32 v83, v81, v83, 0xc3389ae7
v_madak_f32_e32 v82, v81, v82, 0xc41f6441
v_madak_f32_e32 v83, v81, v83, 0xc322658c
v_madak_f32_e32 v82, v81, v82, 0xc320a2ea
v_madak_f32_e32 v83, v81, v83, 0xc2798057
v_madak_f32_e32 v82, v81, v82, 0xc18e104b
v_madak_f32_e32 v83, v81, v83, 0xc128f022
v_madak_f32_e32 v82, v81, v82, 0xbf4c9dd4
v_madak_f32_e32 v83, v81, v83, 0xbf31a0b7
v_madak_f32_e32 v82, v81, v82, 0xbc21a092
v_madak_f32_e32 v83, v81, v83, 0xbc21a093
v_madak_f32_e32 v86, v86, v81, 0x40d23f7c
v_cndmask_b32_e32 v82, v82, v83, vcc
v_mov_b32_e32 v83, 0xc1b38712
v_madak_f32_e32 v83, v83, v81, 0x43ed43a7
v_madak_f32_e32 v86, v81, v86, 0x42d9451f
v_madak_f32_e32 v83, v81, v83, 0x451f90ce
v_madak_f32_e32 v86, v81, v86, 0x43d6810b
v_madak_f32_e32 v83, v81, v83, 0x4547fdbb
v_madak_f32_e32 v86, v81, v86, 0x442158c9
v_madak_f32_e32 v83, v81, v83, 0x44c01759
v_madak_f32_e32 v86, v81, v86, 0x43d9486f
v_madak_f32_e32 v83, v81, v83, 0x43a2e571
v_madak_f32_e32 v86, v81, v86, 0x4309a863
v_madak_f32_e32 v83, v81, v83, 0x41f2b459
v_madak_f32_e32 v86, v81, v86, 0x419d35ce
v_cndmask_b32_e32 v83, v83, v86, vcc
v_mov_b32_e32 v86, 0xbb0df9c0
v_madak_f32_e32 v86, v86, v81, 0x3d1151b3
v_madak_f32_e32 v86, v81, v86, 0xbde31cc2
v_madak_f32_e32 v86, v81, v86, 0x3ea2fe54
v_madak_f32_e32 v86, v81, v86, 0xbebe9208
v_madak_f32_e32 v86, v81, v86, 0x3ed46805
v_madak_f32_e32 v86, v81, v86, 0xbb1acdc6
v_cndmask_b32_e64 v82, v82, v86, s[4:5]
v_mov_b32_e32 v86, 0x3c445aa3
v_madak_f32_e32 v86, v86, v81, 0x3c5f6e13
v_madak_f32_e32 v86, v81, v86, 0x3e013307
v_madak_f32_e32 v86, v81, v86, 0x3d931ae7
v_madak_f32_e32 v86, v81, v86, 0x3f0a5785
v_madak_f32_e32 v86, v81, v86, 0x3dd9f331
v_cndmask_b32_e64 v83, v83, v86, s[4:5]
v_mov_b32_e32 v86, 0xb684e21a
v_madak_f32_e32 v86, v86, v81, 0x390aee49
v_madak_f32_e32 v86, v81, v86, 0x3ba68116
v_madak_f32_e32 v86, v81, v86, 0x3d852a63
v_madak_f32_e32 v86, v81, v86, 0x3ecbbbce
v_cndmask_b32_e64 v83, v83, v86, s[8:9]
v_mad_f32 v83, v81, v83, 1.0
v_and_b32_e32 v90, s50, v45
v_mov_b32_e32 v91, 0xbf100000
v_cmp_gt_f32_e64 vcc, |v83|, v87
v_mov_b32_e32 v88, 0x2f800000
v_mad_f32 v91, v90, -v90, v91
v_cndmask_b32_e32 v89, 1.0, v88, vcc
v_cmp_gt_f32_e32 vcc, 0, v91
v_cndmask_b32_e64 v92, 0.5, -0.5, vcc
v_mov_b32_e32 v93, 0x3fb8aa3b
v_mac_f32_e32 v92, v93, v91
v_mov_b32_e32 v86, 0xb7c756b1
v_cvt_i32_f32_e32 v92, v92
v_madak_f32_e32 v86, v86, v81, 0xbbbd1489
v_madak_f32_e32 v86, v81, v86, 0xbce9528f
v_madak_f32_e32 v86, v81, v86, 0xbea66beb
v_mul_f32_e32 v83, v89, v83
v_madak_f32_e32 v81, v81, v86, 0x3e0375d4
v_rcp_f32_e32 v83, v83
v_cvt_f32_i32_e32 v86, v92
v_cndmask_b32_e64 v81, v82, v81, s[8:9]
v_mov_b32_e32 v82, 0xbf317180
v_mul_f32_e32 v81, v83, v81
v_mad_f32 v83, v82, v86, v91
v_mov_b32_e32 v94, 0xb717f7d1
v_mad_f32 v95, v94, v86, v83
v_mul_f32_e32 v96, v95, v95
v_mov_b32_e32 v97, 0xb5ddea0e
v_mov_b32_e32 v98, 0x3331bb4c
v_mad_f32 v99, v98, v96, v97
v_mov_b32_e32 v100, 0x388ab355
v_mad_f32 v99, v99, v96, v100
v_mov_b32_e32 v101, 0xbb360b61
v_mad_f32 v99, v99, v96, v101
v_mov_b32_e32 v102, 0x3e2aaaab
v_mad_f32 v99, v99, v96, v102
v_mad_f32 v96, -v96, v99, v95
v_mul_f32_e32 v95, v96, v95
v_sub_f32_e32 v96, 2.0, v96
v_cmp_gt_f32_e64 vcc, |v96|, v87
v_cndmask_b32_e32 v99, 1.0, v88, vcc
v_mul_f32_e32 v96, v99, v96
v_rcp_f32_e32 v96, v96
v_mul_f32_e32 v95, v96, v95
v_mul_f32_e32 v95, v95, v99
v_mad_f32 v86, -v86, v94, -v95
v_subrev_f32_e32 v83, v83, v86
v_lshlrev_b32_e32 v86, 23, v92
v_sub_f32_e32 v83, 1.0, v83
v_add_i32_e32 v83, vcc, v83, v86
v_mov_b32_e32 v86, 0xc2aeac4f
v_cmp_ge_f32_e32 vcc, v91, v86
v_mov_b32_e32 v92, 0x42b17218
v_cndmask_b32_e32 v83, 0, v83, vcc
v_cmp_lt_f32_e32 vcc, v91, v92
v_mov_b32_e32 v95, 0x7f800000
v_cndmask_b32_e32 v83, v95, v83, vcc
v_cmp_u_f32_e32 vcc, v91, v91
v_cndmask_b32_e32 v83, v83, v91, vcc
v_subrev_f32_e32 v91, v72, v90
v_mul_f32_e32 v96, v81, v89
v_add_f32_e32 v90, v72, v90
v_mad_f32 v90, v90, v91, v96
v_cmp_gt_f32_e32 vcc, 0, v90
v_cndmask_b32_e64 v91, 0.5, -0.5, vcc
v_mac_f32_e32 v91, v93, v90
v_cvt_i32_f32_e32 v91, v91
v_madak_f32_e32 v81, v89, v81, 0x3f58560b
v_cvt_f32_i32_e32 v93, v91
v_lshlrev_b32_e32 v91, 23, v91
v_mad_f32 v82, v82, v93, v90
v_mad_f32 v99, v94, v93, v82
v_mul_f32_e32 v103, v99, v99
v_mac_f32_e32 v97, v98, v103
v_mac_f32_e32 v100, v97, v103
v_mac_f32_e32 v101, v100, v103
v_mac_f32_e32 v102, v101, v103
v_mad_f32 v97, -v103, v102, v99
v_mul_f32_e32 v98, v97, v99
v_sub_f32_e32 v97, 2.0, v97
v_cmp_gt_f32_e64 vcc, |v97|, v87
v_cndmask_b32_e32 v99, 1.0, v88, vcc
v_mul_f32_e32 v97, v99, v97
v_rcp_f32_e32 v97, v97
v_mul_f32_e32 v97, v97, v98
v_mul_f32_e32 v97, v97, v99
v_mad_f32 v93, -v93, v94, -v97
v_subrev_f32_e32 v82, v82, v93
v_sub_f32_e32 v82, 1.0, v82
v_add_i32_e32 v82, vcc, v82, v91
v_cmp_ge_f32_e32 vcc, v90, v86
v_cndmask_b32_e32 v82, 0, v82, vcc
v_cmp_lt_f32_e32 vcc, v90, v92
v_cndmask_b32_e32 v82, v95, v82, vcc
v_cmp_u_f32_e32 vcc, v90, v90
v_cndmask_b32_e32 v82, v82, v90, vcc
v_mul_f32_e32 v82, v82, v83
v_mov_b32_e32 v83, 0x40c00000
v_cmp_gt_f32_e64 s[10:11], v83, v72
v_mov_b32_e32 v83, 0x31800000
v_cmp_gt_f32_e64 vcc, |v72|, v87
v_cmp_gt_f32_e64 s[12:13], v83, v72
v_cndmask_b32_e32 v83, 1.0, v88, vcc
v_mul_f32_e32 v72, v83, v72
v_rcp_f32_e32 v72, v72
v_cmp_u_f32_e32 vcc, v45, v45
v_mul_f32_e32 v72, v72, v82
v_mad_f32 v72, -v83, v72, 1.0
v_cndmask_b32_e64 v72, 1.0, v72, s[10:11]
v_cndmask_b32_e64 v72, v72, v81, s[4:5]
v_and_b32_e32 v81, s51, v45
v_or_b32_e32 v72, v81, v72
v_mad_f32 v81, v96, v45, v45
v_cndmask_b32_e64 v72, v72, v81, s[8:9]
v_mul_f32_e32 v81, 0x3f8375d4, v45
v_mac_f32_e32 v81, 0x41000000, v45
v_mul_f32_e32 v81, 0x3e000000, v81
v_cndmask_b32_e64 v72, v72, v81, s[12:13]
v_cndmask_b32_e32 v45, v72, v45, vcc
v_subrev_f32_e32 v45, v45, v70
v_mul_f32_e32 v70, s19, v70
v_mad_f32 v45, v79, v45, -v70
v_mul_f32_e32 v70, v80, v84
v_mac_f32_e32 v5, v45, v51
v_mul_f32_e32 v45, v85, v70
v_mad_f32 v45, v51, v73, -v45
v_mul_f32_e32 v51, v73, v51
v_mad_f32 v51, v70, v85, -v51
v_mad_f32 v74, v35, v51, v74
v_mad_f32 v71, v31, v51, v71
v_mac_f32_e32 v69, v27, v51
v_mad_f32 v22, v35, v45, v22
v_mad_f32 v21, v31, v45, v21
v_mac_f32_e32 v20, v27, v45
BB6_38: ; %Flow1248
; in Loop: Header=BB6_11 Depth=1
s_or_b64 exec, exec, s[34:35]
BB6_39: ; in Loop: Header=BB6_11 Depth=1
s_or_b64 exec, exec, s[30:31]
v_lshrrev_b32_e32 v27, 6, v65
v_and_b32_e32 v27, 1, v27
v_cmp_eq_u32_e32 vcc, 1, v27
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[30:31], exec, s[4:5]
; mask branch BB6_43
s_cbranch_execz BB6_43
BB6_40: ; in Loop: Header=BB6_11 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[79:82], v55 offset0:96 offset1:97
s_and_b64 s[4:5], exec, s[0:1]
v_cmp_ne_u32_e32 vcc, v59, v23
s_xor_b64 s[4:5], s[4:5], -1
s_or_b64 s[4:5], s[4:5], vcc
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v31, v76, v80
s_and_b64 s[8:9], exec, s[2:3]
s_or_b64 s[4:5], s[8:9], s[4:5]
v_subrev_f32_e32 v27, v75, v79
v_mul_f32_e32 v45, v31, v31
v_cndmask_b32_e64 v51, 0, 1.0, s[4:5]
v_subrev_f32_e32 v35, v77, v81
v_mac_f32_e32 v45, v27, v27
v_mac_f32_e32 v45, v35, v35
v_mul_f32_e32 v51, s22, v51
v_cmp_lt_f32_e32 vcc, v45, v51
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[34:35], exec, s[4:5]
; mask branch BB6_42
s_cbranch_execz BB6_42
BB6_41: ; in Loop: Header=BB6_11 Depth=1
v_max_f32_e32 v45, 0x34cd15ae, v45
v_mul_f32_e32 v80, v9, v45
v_mul_f32_e32 v72, v80, v80
v_mov_b32_e32 v73, 0x3a92b707
v_mov_b32_e32 v81, 0x3c739487
v_madak_f32_e32 v73, v73, v72, 0x3ded3cb2
v_mul_f32_e32 v51, v78, v82
v_mad_f32 v82, v73, v72, 1.0
v_mov_b32_e32 v73, 0xb2951928
v_madak_f32_e32 v81, v81, v72, 0x3f01e2bc
v_madak_f32_e32 v73, v73, v72, 0xb85ffb93
v_rsq_f32_e32 v79, v45
v_mac_f32_e32 v82, v80, v81
v_mov_b32_e32 v81, 0x35c55945
v_madak_f32_e32 v81, v81, v72, 0x3a83ca0c
v_madak_f32_e32 v73, v73, v72, 0xbc9ded90
s_mov_b32 m0, -1
v_lshrrev_b32_e32 v70, 6, v19
v_madak_f32_e32 v81, v81, v72, 0x3d8eaf3b
v_madak_f32_e32 v83, v73, v72, 0xbf409397
ds_read_b64 v[72:73], v56 offset:384
v_and_b32_e32 v70, 1, v70
v_cmp_eq_u32_e32 vcc, 1, v70
v_mac_f32_e32 v83, v80, v81
v_mul_f32_e32 v80, v79, v79
v_cndmask_b32_e64 v70, 0, 1.0, vcc
v_mul_f32_e32 v81, v80, v80
v_mul_f32_e32 v81, v70, v81
v_mul_f32_e32 v84, v80, v81
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v73, v68, v73
v_mul_f32_e32 v85, v84, v73
v_mad_f32 v81, v81, v80, s23
v_mad_f32 v85, v72, v67, -v85
v_mad_f32 v86, v84, v84, s26
v_mul_f32_e32 v72, v67, v72
v_mul_f32_e32 v81, 0xbe2aaaab, v81
v_mul_f32_e32 v73, v86, v73
v_mul_f32_e32 v72, v72, v81
v_mac_f32_e32 v72, 0x3daaaaaa, v73
v_rcp_f32_e32 v73, v82
v_mul_f32_e32 v45, s18, v45
v_mac_f32_e32 v8, v70, v72
v_mul_f32_e32 v45, v79, v45
v_mul_f32_e32 v73, v37, v73
v_mul_f32_e32 v72, v70, v80
v_mul_f32_e32 v73, v83, v73
v_mac_f32_e32 v73, v79, v72
v_and_b32_e32 v72, s27, v45
v_mov_b32_e32 v81, 0x3fa00000
v_cmp_gt_f32_e64 s[4:5], v81, v72
v_mul_f32_e32 v81, v72, v72
v_rcp_f32_e32 v82, v81
v_add_f32_e32 v83, -1.0, v72
v_mov_b32_e32 v86, 0xbd777f97
v_mov_b32_e32 v87, 0x6f800000
v_cndmask_b32_e64 v82, v82, v83, s[4:5]
v_mov_b32_e32 v83, 0x3f580000
v_cmp_gt_f32_e64 s[8:9], v83, v72
v_cndmask_b32_e64 v81, v82, v81, s[8:9]
v_mov_b32_e32 v83, 0xc11d077e
v_mov_b32_e32 v82, 0x4036db6e
v_madak_f32_e32 v83, v83, v81, 0xc2a2932b
v_cmp_gt_f32_e32 vcc, v82, v72
v_mov_b32_e32 v82, 0xc3f1c275
v_madak_f32_e32 v82, v82, v81, 0xc480230b
v_madak_f32_e32 v83, v81, v83, 0xc3389ae7
v_madak_f32_e32 v82, v81, v82, 0xc41f6441
v_madak_f32_e32 v83, v81, v83, 0xc322658c
v_madak_f32_e32 v82, v81, v82, 0xc320a2ea
v_madak_f32_e32 v83, v81, v83, 0xc2798057
v_madak_f32_e32 v82, v81, v82, 0xc18e104b
v_madak_f32_e32 v83, v81, v83, 0xc128f022
v_madak_f32_e32 v82, v81, v82, 0xbf4c9dd4
v_madak_f32_e32 v83, v81, v83, 0xbf31a0b7
v_madak_f32_e32 v82, v81, v82, 0xbc21a092
v_madak_f32_e32 v83, v81, v83, 0xbc21a093
v_madak_f32_e32 v86, v86, v81, 0x40d23f7c
v_cndmask_b32_e32 v82, v82, v83, vcc
v_mov_b32_e32 v83, 0xc1b38712
v_madak_f32_e32 v83, v83, v81, 0x43ed43a7
v_madak_f32_e32 v86, v81, v86, 0x42d9451f
v_madak_f32_e32 v83, v81, v83, 0x451f90ce
v_madak_f32_e32 v86, v81, v86, 0x43d6810b
v_madak_f32_e32 v83, v81, v83, 0x4547fdbb
v_madak_f32_e32 v86, v81, v86, 0x442158c9
v_madak_f32_e32 v83, v81, v83, 0x44c01759
v_madak_f32_e32 v86, v81, v86, 0x43d9486f
v_madak_f32_e32 v83, v81, v83, 0x43a2e571
v_madak_f32_e32 v86, v81, v86, 0x4309a863
v_madak_f32_e32 v83, v81, v83, 0x41f2b459
v_madak_f32_e32 v86, v81, v86, 0x419d35ce
v_cndmask_b32_e32 v83, v83, v86, vcc
v_mov_b32_e32 v86, 0xbb0df9c0
v_madak_f32_e32 v86, v86, v81, 0x3d1151b3
v_madak_f32_e32 v86, v81, v86, 0xbde31cc2
v_madak_f32_e32 v86, v81, v86, 0x3ea2fe54
v_madak_f32_e32 v86, v81, v86, 0xbebe9208
v_madak_f32_e32 v86, v81, v86, 0x3ed46805
v_madak_f32_e32 v86, v81, v86, 0xbb1acdc6
v_cndmask_b32_e64 v82, v82, v86, s[4:5]
v_mov_b32_e32 v86, 0x3c445aa3
v_madak_f32_e32 v86, v86, v81, 0x3c5f6e13
v_madak_f32_e32 v86, v81, v86, 0x3e013307
v_madak_f32_e32 v86, v81, v86, 0x3d931ae7
v_madak_f32_e32 v86, v81, v86, 0x3f0a5785
v_madak_f32_e32 v86, v81, v86, 0x3dd9f331
v_cndmask_b32_e64 v83, v83, v86, s[4:5]
v_mov_b32_e32 v86, 0xb684e21a
v_madak_f32_e32 v86, v86, v81, 0x390aee49
v_madak_f32_e32 v86, v81, v86, 0x3ba68116
v_madak_f32_e32 v86, v81, v86, 0x3d852a63
v_madak_f32_e32 v86, v81, v86, 0x3ecbbbce
v_cndmask_b32_e64 v83, v83, v86, s[8:9]
v_mad_f32 v83, v81, v83, 1.0
v_and_b32_e32 v90, s50, v45
v_mov_b32_e32 v91, 0xbf100000
v_cmp_gt_f32_e64 vcc, |v83|, v87
v_mov_b32_e32 v88, 0x2f800000
v_mad_f32 v91, v90, -v90, v91
v_cndmask_b32_e32 v89, 1.0, v88, vcc
v_cmp_gt_f32_e32 vcc, 0, v91
v_cndmask_b32_e64 v92, 0.5, -0.5, vcc
v_mov_b32_e32 v93, 0x3fb8aa3b
v_mac_f32_e32 v92, v93, v91
v_mov_b32_e32 v86, 0xb7c756b1
v_cvt_i32_f32_e32 v92, v92
v_madak_f32_e32 v86, v86, v81, 0xbbbd1489
v_madak_f32_e32 v86, v81, v86, 0xbce9528f
v_madak_f32_e32 v86, v81, v86, 0xbea66beb
v_mul_f32_e32 v83, v89, v83
v_madak_f32_e32 v81, v81, v86, 0x3e0375d4
v_rcp_f32_e32 v83, v83
v_cvt_f32_i32_e32 v86, v92
v_cndmask_b32_e64 v81, v82, v81, s[8:9]
v_mov_b32_e32 v82, 0xbf317180
v_mul_f32_e32 v81, v83, v81
v_mad_f32 v83, v82, v86, v91
v_mov_b32_e32 v94, 0xb717f7d1
v_mad_f32 v95, v94, v86, v83
v_mul_f32_e32 v96, v95, v95
v_mov_b32_e32 v97, 0xb5ddea0e
v_mov_b32_e32 v98, 0x3331bb4c
v_mad_f32 v99, v98, v96, v97
v_mov_b32_e32 v100, 0x388ab355
v_mad_f32 v99, v99, v96, v100
v_mov_b32_e32 v101, 0xbb360b61
v_mad_f32 v99, v99, v96, v101
v_mov_b32_e32 v102, 0x3e2aaaab
v_mad_f32 v99, v99, v96, v102
v_mad_f32 v96, -v96, v99, v95
v_mul_f32_e32 v95, v96, v95
v_sub_f32_e32 v96, 2.0, v96
v_cmp_gt_f32_e64 vcc, |v96|, v87
v_cndmask_b32_e32 v99, 1.0, v88, vcc
v_mul_f32_e32 v96, v99, v96
v_rcp_f32_e32 v96, v96
v_mul_f32_e32 v95, v96, v95
v_mul_f32_e32 v95, v95, v99
v_mad_f32 v86, -v86, v94, -v95
v_subrev_f32_e32 v83, v83, v86
v_lshlrev_b32_e32 v86, 23, v92
v_sub_f32_e32 v83, 1.0, v83
v_add_i32_e32 v83, vcc, v83, v86
v_mov_b32_e32 v86, 0xc2aeac4f
v_cmp_ge_f32_e32 vcc, v91, v86
v_mov_b32_e32 v92, 0x42b17218
v_cndmask_b32_e32 v83, 0, v83, vcc
v_cmp_lt_f32_e32 vcc, v91, v92
v_mov_b32_e32 v95, 0x7f800000
v_cndmask_b32_e32 v83, v95, v83, vcc
v_cmp_u_f32_e32 vcc, v91, v91
v_cndmask_b32_e32 v83, v83, v91, vcc
v_subrev_f32_e32 v91, v72, v90
v_mul_f32_e32 v96, v81, v89
v_add_f32_e32 v90, v72, v90
v_mad_f32 v90, v90, v91, v96
v_cmp_gt_f32_e32 vcc, 0, v90
v_cndmask_b32_e64 v91, 0.5, -0.5, vcc
v_mac_f32_e32 v91, v93, v90
v_cvt_i32_f32_e32 v91, v91
v_madak_f32_e32 v81, v89, v81, 0x3f58560b
v_cvt_f32_i32_e32 v93, v91
v_lshlrev_b32_e32 v91, 23, v91
v_mad_f32 v82, v82, v93, v90
v_mad_f32 v99, v94, v93, v82
v_mul_f32_e32 v103, v99, v99
v_mac_f32_e32 v97, v98, v103
v_mac_f32_e32 v100, v97, v103
v_mac_f32_e32 v101, v100, v103
v_mac_f32_e32 v102, v101, v103
v_mad_f32 v97, -v103, v102, v99
v_mul_f32_e32 v98, v97, v99
v_sub_f32_e32 v97, 2.0, v97
v_cmp_gt_f32_e64 vcc, |v97|, v87
v_cndmask_b32_e32 v99, 1.0, v88, vcc
v_mul_f32_e32 v97, v99, v97
v_rcp_f32_e32 v97, v97
v_mul_f32_e32 v97, v97, v98
v_mul_f32_e32 v97, v97, v99
v_mad_f32 v93, -v93, v94, -v97
v_subrev_f32_e32 v82, v82, v93
v_sub_f32_e32 v82, 1.0, v82
v_add_i32_e32 v82, vcc, v82, v91
v_cmp_ge_f32_e32 vcc, v90, v86
v_cndmask_b32_e32 v82, 0, v82, vcc
v_cmp_lt_f32_e32 vcc, v90, v92
v_cndmask_b32_e32 v82, v95, v82, vcc
v_cmp_u_f32_e32 vcc, v90, v90
v_cndmask_b32_e32 v82, v82, v90, vcc
v_mul_f32_e32 v82, v82, v83
v_mov_b32_e32 v83, 0x40c00000
v_cmp_gt_f32_e64 s[10:11], v83, v72
v_mov_b32_e32 v83, 0x31800000
v_cmp_gt_f32_e64 vcc, |v72|, v87
v_cmp_gt_f32_e64 s[12:13], v83, v72
v_cndmask_b32_e32 v83, 1.0, v88, vcc
v_mul_f32_e32 v72, v83, v72
v_rcp_f32_e32 v72, v72
v_cmp_u_f32_e32 vcc, v45, v45
v_mul_f32_e32 v72, v72, v82
v_mad_f32 v72, -v83, v72, 1.0
v_cndmask_b32_e64 v72, 1.0, v72, s[10:11]
v_cndmask_b32_e64 v72, v72, v81, s[4:5]
v_and_b32_e32 v81, s51, v45
v_or_b32_e32 v72, v81, v72
v_mad_f32 v81, v96, v45, v45
v_cndmask_b32_e64 v72, v72, v81, s[8:9]
v_mul_f32_e32 v81, 0x3f8375d4, v45
v_mac_f32_e32 v81, 0x41000000, v45
v_mul_f32_e32 v81, 0x3e000000, v81
v_cndmask_b32_e64 v72, v72, v81, s[12:13]
v_cndmask_b32_e32 v45, v72, v45, vcc
v_subrev_f32_e32 v45, v45, v70
v_mul_f32_e32 v70, s19, v70
v_mad_f32 v45, v79, v45, -v70
v_mul_f32_e32 v70, v80, v84
v_mac_f32_e32 v5, v45, v51
v_mul_f32_e32 v45, v85, v70
v_mad_f32 v45, v51, v73, -v45
v_mul_f32_e32 v51, v73, v51
v_mad_f32 v51, v70, v85, -v51
v_mad_f32 v74, v35, v51, v74
v_mad_f32 v71, v31, v51, v71
v_mac_f32_e32 v69, v27, v51
v_mad_f32 v18, v35, v45, v18
v_mad_f32 v17, v31, v45, v17
v_mac_f32_e32 v16, v27, v45
BB6_42: ; %Flow1247
; in Loop: Header=BB6_11 Depth=1
s_or_b64 exec, exec, s[34:35]
BB6_43: ; in Loop: Header=BB6_11 Depth=1
s_or_b64 exec, exec, s[30:31]
v_lshrrev_b32_e32 v27, 7, v65
v_and_b32_e32 v27, 1, v27
v_cmp_eq_u32_e32 vcc, 1, v27
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[12:13], exec, s[4:5]
; mask branch BB6_47
s_cbranch_execz BB6_47
BB6_44: ; in Loop: Header=BB6_11 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[79:82], v55 offset0:112 offset1:113
s_and_b64 s[4:5], exec, s[0:1]
v_cmp_ne_u32_e32 vcc, v38, v23
s_xor_b64 s[4:5], s[4:5], -1
s_or_b64 s[4:5], s[4:5], vcc
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v27, v76, v80
s_and_b64 s[8:9], exec, s[2:3]
s_or_b64 s[4:5], s[8:9], s[4:5]
v_subrev_f32_e32 v23, v75, v79
v_mul_f32_e32 v35, v27, v27
v_cndmask_b32_e64 v45, 0, 1.0, s[4:5]
v_subrev_f32_e32 v31, v77, v81
v_mac_f32_e32 v35, v23, v23
v_mac_f32_e32 v35, v31, v31
v_mul_f32_e32 v45, s22, v45
v_cmp_lt_f32_e32 vcc, v35, v45
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[30:31], exec, s[4:5]
; mask branch BB6_46
s_cbranch_execz BB6_46
BB6_45: ; in Loop: Header=BB6_11 Depth=1
v_lshrrev_b32_e32 v51, 7, v19
v_and_b32_e32 v51, 1, v51
v_max_f32_e32 v35, 0x34cd15ae, v35
v_cmp_eq_u32_e32 vcc, 1, v51
v_rsq_f32_e32 v51, v35
s_mov_b32 m0, -1
v_cndmask_b32_e64 v70, 0, 1.0, vcc
v_mul_f32_e32 v45, v78, v82
v_mul_f32_e32 v75, v51, v51
v_mul_f32_e32 v72, v75, v75
v_mul_f32_e32 v76, v70, v72
ds_read_b64 v[72:73], v56 offset:448
v_mul_f32_e32 v78, v9, v35
v_mul_f32_e32 v79, v78, v78
v_mov_b32_e32 v80, 0x3a92b707
v_madak_f32_e32 v80, v80, v79, 0x3ded3cb2
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v68, v68, v73
v_mul_f32_e32 v73, v75, v76
v_mul_f32_e32 v77, v73, v68
v_mad_f32 v77, v72, v67, -v77
v_mul_f32_e32 v67, v67, v72
v_mad_f32 v72, v73, v73, s26
v_mul_f32_e32 v68, v72, v68
v_mad_f32 v72, v76, v75, s23
v_mov_b32_e32 v81, 0x3c739487
v_mul_f32_e32 v72, 0xbe2aaaab, v72
v_mul_f32_e32 v67, v67, v72
v_madak_f32_e32 v81, v81, v79, 0x3f01e2bc
v_mad_f32 v80, v80, v79, 1.0
v_mac_f32_e32 v67, 0x3daaaaaa, v68
v_mac_f32_e32 v80, v78, v81
v_mov_b32_e32 v81, 0xb2951928
v_mac_f32_e32 v8, v70, v67
v_rcp_f32_e32 v67, v80
v_madak_f32_e32 v81, v81, v79, 0xb85ffb93
v_mov_b32_e32 v82, 0x35c55945
v_madak_f32_e32 v82, v82, v79, 0x3a83ca0c
v_madak_f32_e32 v81, v81, v79, 0xbc9ded90
v_madak_f32_e32 v82, v82, v79, 0x3d8eaf3b
v_madak_f32_e32 v79, v81, v79, 0xbf409397
v_mul_f32_e32 v35, s18, v35
v_mac_f32_e32 v79, v78, v82
v_mul_f32_e32 v67, v37, v67
v_mul_f32_e32 v67, v79, v67
v_mul_f32_e32 v68, v70, v75
v_mul_f32_e32 v35, v51, v35
v_mac_f32_e32 v67, v51, v68
v_and_b32_e32 v68, s27, v35
v_mov_b32_e32 v72, 0x3fa00000
v_cmp_gt_f32_e64 s[4:5], v72, v68
v_mul_f32_e32 v72, v68, v68
v_rcp_f32_e32 v76, v72
v_add_f32_e32 v78, -1.0, v68
v_mov_b32_e32 v79, 0xc11d077e
v_mov_b32_e32 v80, 0xbd777f97
v_cndmask_b32_e64 v76, v76, v78, s[4:5]
v_mov_b32_e32 v78, 0x3f580000
v_cmp_gt_f32_e64 s[8:9], v78, v68
v_cndmask_b32_e64 v72, v76, v72, s[8:9]
v_mov_b32_e32 v76, 0xc3f1c275
v_madak_f32_e32 v79, v79, v72, 0xc2a2932b
v_madak_f32_e32 v76, v76, v72, 0xc480230b
v_mov_b32_e32 v78, 0xc1b38712
v_madak_f32_e32 v79, v72, v79, 0xc3389ae7
v_madak_f32_e32 v80, v80, v72, 0x40d23f7c
v_madak_f32_e32 v76, v72, v76, 0xc41f6441
v_madak_f32_e32 v78, v78, v72, 0x43ed43a7
v_madak_f32_e32 v79, v72, v79, 0xc322658c
v_madak_f32_e32 v80, v72, v80, 0x42d9451f
v_madak_f32_e32 v76, v72, v76, 0xc320a2ea
v_madak_f32_e32 v78, v72, v78, 0x451f90ce
v_madak_f32_e32 v79, v72, v79, 0xc2798057
v_madak_f32_e32 v80, v72, v80, 0x43d6810b
v_madak_f32_e32 v76, v72, v76, 0xc18e104b
v_madak_f32_e32 v78, v72, v78, 0x4547fdbb
v_madak_f32_e32 v79, v72, v79, 0xc128f022
v_madak_f32_e32 v80, v72, v80, 0x442158c9
v_madak_f32_e32 v76, v72, v76, 0xbf4c9dd4
v_madak_f32_e32 v78, v72, v78, 0x44c01759
v_madak_f32_e32 v79, v72, v79, 0xbf31a0b7
v_madak_f32_e32 v80, v72, v80, 0x43d9486f
v_mov_b32_e32 v81, 0x4036db6e
v_madak_f32_e32 v78, v72, v78, 0x43a2e571
v_madak_f32_e32 v80, v72, v80, 0x4309a863
v_madak_f32_e32 v76, v72, v76, 0xbc21a092
v_madak_f32_e32 v79, v72, v79, 0xbc21a093
v_cmp_gt_f32_e32 vcc, v81, v68
v_cndmask_b32_e32 v76, v76, v79, vcc
v_mov_b32_e32 v79, 0xbb0df9c0
v_madak_f32_e32 v78, v72, v78, 0x41f2b459
v_madak_f32_e32 v80, v72, v80, 0x419d35ce
v_madak_f32_e32 v79, v79, v72, 0x3d1151b3
v_cndmask_b32_e32 v78, v78, v80, vcc
v_mov_b32_e32 v80, 0x3c445aa3
v_madak_f32_e32 v79, v72, v79, 0xbde31cc2
v_madak_f32_e32 v80, v80, v72, 0x3c5f6e13
v_madak_f32_e32 v79, v72, v79, 0x3ea2fe54
v_madak_f32_e32 v80, v72, v80, 0x3e013307
v_madak_f32_e32 v79, v72, v79, 0xbebe9208
v_madak_f32_e32 v80, v72, v80, 0x3d931ae7
v_madak_f32_e32 v79, v72, v79, 0x3ed46805
v_madak_f32_e32 v80, v72, v80, 0x3f0a5785
v_madak_f32_e32 v79, v72, v79, 0xbb1acdc6
v_madak_f32_e32 v80, v72, v80, 0x3dd9f331
v_cndmask_b32_e64 v76, v76, v79, s[4:5]
v_mov_b32_e32 v79, 0xb7c756b1
v_cndmask_b32_e64 v78, v78, v80, s[4:5]
v_mov_b32_e32 v80, 0xb684e21a
v_madak_f32_e32 v79, v79, v72, 0xbbbd1489
v_madak_f32_e32 v80, v80, v72, 0x390aee49
v_madak_f32_e32 v79, v72, v79, 0xbce9528f
v_madak_f32_e32 v80, v72, v80, 0x3ba68116
v_madak_f32_e32 v79, v72, v79, 0xbea66beb
v_madak_f32_e32 v80, v72, v80, 0x3d852a63
v_madak_f32_e32 v79, v72, v79, 0x3e0375d4
v_madak_f32_e32 v80, v72, v80, 0x3ecbbbce
v_cndmask_b32_e64 v78, v78, v80, s[8:9]
v_cndmask_b32_e64 v76, v76, v79, s[8:9]
v_and_b32_e32 v79, s50, v35
v_mov_b32_e32 v80, 0xbf100000
v_mad_f32 v80, v79, -v79, v80
v_cmp_gt_f32_e64 s[10:11], 0, v80
v_cndmask_b32_e64 v81, 0.5, -0.5, s[10:11]
v_mov_b32_e32 v82, 0x3fb8aa3b
v_mac_f32_e32 v81, v82, v80
v_cvt_i32_f32_e32 v81, v81
v_mov_b32_e32 v86, 0xbf317180
v_mov_b32_e32 v88, 0xb717f7d1
v_mov_b32_e32 v91, 0xb5ddea0e
v_cvt_f32_i32_e32 v85, v81
v_mov_b32_e32 v92, 0x3331bb4c
v_mov_b32_e32 v94, 0x388ab355
v_mov_b32_e32 v95, 0xbb360b61
v_mad_f32 v87, v86, v85, v80
v_mad_f32 v89, v88, v85, v87
v_mul_f32_e32 v90, v89, v89
v_mad_f32 v93, v92, v90, v91
v_mad_f32 v93, v93, v90, v94
v_mad_f32 v93, v93, v90, v95
v_mov_b32_e32 v96, 0x3e2aaaab
v_mad_f32 v93, v93, v90, v96
v_mad_f32 v90, -v90, v93, v89
v_mad_f32 v72, v72, v78, 1.0
v_mov_b32_e32 v78, 0x6f800000
v_cmp_gt_f32_e64 vcc, |v72|, v78
v_mov_b32_e32 v83, 0x2f800000
v_sub_f32_e32 v93, 2.0, v90
v_cndmask_b32_e32 v84, 1.0, v83, vcc
v_cmp_gt_f32_e64 vcc, |v93|, v78
v_cndmask_b32_e32 v97, 1.0, v83, vcc
v_mul_f32_e32 v93, v97, v93
v_rcp_f32_e32 v93, v93
v_mul_f32_e32 v72, v84, v72
v_mul_f32_e32 v89, v90, v89
v_rcp_f32_e32 v72, v72
v_mul_f32_e32 v89, v93, v89
v_mul_f32_e32 v89, v89, v97
v_mad_f32 v85, -v85, v88, -v89
v_subrev_f32_e32 v85, v87, v85
v_mul_f32_e32 v72, v72, v76
v_mul_f32_e32 v76, v72, v84
v_subrev_f32_e32 v87, v68, v79
v_add_f32_e32 v79, v68, v79
v_sub_f32_e32 v85, 1.0, v85
v_lshlrev_b32_e32 v81, 23, v81
v_add_i32_e32 v81, vcc, v85, v81
v_mad_f32 v79, v79, v87, v76
v_cmp_gt_f32_e32 vcc, 0, v79
v_cndmask_b32_e64 v87, 0.5, -0.5, vcc
v_mac_f32_e32 v87, v82, v79
v_cvt_i32_f32_e32 v82, v87
v_mov_b32_e32 v85, 0xc2aeac4f
v_cmp_ge_f32_e32 vcc, v80, v85
v_mov_b32_e32 v87, 0x42b17218
v_cvt_f32_i32_e32 v89, v82
v_cndmask_b32_e32 v81, 0, v81, vcc
v_cmp_lt_f32_e32 vcc, v80, v87
v_mov_b32_e32 v90, 0x7f800000
v_mad_f32 v86, v86, v89, v79
v_mad_f32 v93, v88, v89, v86
v_mul_f32_e32 v97, v93, v93
v_mac_f32_e32 v91, v92, v97
v_mac_f32_e32 v94, v91, v97
v_mac_f32_e32 v95, v94, v97
v_mac_f32_e32 v96, v95, v97
v_mad_f32 v91, -v97, v96, v93
v_sub_f32_e32 v92, 2.0, v91
v_cndmask_b32_e32 v81, v90, v81, vcc
v_cmp_gt_f32_e64 vcc, |v92|, v78
v_cndmask_b32_e32 v94, 1.0, v83, vcc
v_mul_f32_e32 v92, v94, v92
v_rcp_f32_e32 v92, v92
v_cmp_u_f32_e32 vcc, v80, v80
v_cndmask_b32_e32 v80, v81, v80, vcc
v_mul_f32_e32 v81, v91, v93
v_mul_f32_e32 v81, v92, v81
v_mul_f32_e32 v81, v81, v94
v_mad_f32 v81, -v89, v88, -v81
v_subrev_f32_e32 v81, v86, v81
v_sub_f32_e32 v81, 1.0, v81
v_lshlrev_b32_e32 v82, 23, v82
v_add_i32_e32 v81, vcc, v81, v82
v_cmp_ge_f32_e32 vcc, v79, v85
v_cndmask_b32_e32 v81, 0, v81, vcc
v_cmp_lt_f32_e64 s[10:11], v79, v87
v_cndmask_b32_e64 v81, v90, v81, s[10:11]
v_cmp_u_f32_e32 vcc, v79, v79
v_cndmask_b32_e32 v79, v81, v79, vcc
v_cmp_gt_f32_e64 vcc, |v68|, v78
v_cndmask_b32_e32 v78, 1.0, v83, vcc
v_mul_f32_e32 v81, v78, v68
v_rcp_f32_e32 v81, v81
v_mul_f32_e32 v79, v79, v80
v_mov_b32_e32 v80, 0x40c00000
v_cmp_gt_f32_e32 vcc, v80, v68
v_mov_b32_e32 v80, 0x31800000
v_mul_f32_e32 v79, v81, v79
v_cmp_gt_f32_e64 s[10:11], v80, v68
v_mad_f32 v68, -v78, v79, 1.0
v_cndmask_b32_e32 v68, 1.0, v68, vcc
v_madak_f32_e32 v72, v84, v72, 0x3f58560b
v_cndmask_b32_e64 v68, v68, v72, s[4:5]
v_and_b32_e32 v72, s51, v35
v_or_b32_e32 v68, v72, v68
v_mad_f32 v72, v76, v35, v35
v_cndmask_b32_e64 v68, v68, v72, s[8:9]
v_mul_f32_e32 v72, 0x3f8375d4, v35
v_mac_f32_e32 v72, 0x41000000, v35
v_mul_f32_e32 v72, 0x3e000000, v72
v_cndmask_b32_e64 v68, v68, v72, s[10:11]
v_cmp_u_f32_e32 vcc, v35, v35
v_cndmask_b32_e32 v35, v68, v35, vcc
v_subrev_f32_e32 v35, v35, v70
v_mul_f32_e32 v68, s19, v70
v_mad_f32 v35, v51, v35, -v68
v_mul_f32_e32 v51, v75, v73
v_mac_f32_e32 v5, v35, v45
v_mul_f32_e32 v35, v77, v51
v_mad_f32 v35, v45, v67, -v35
v_mul_f32_e32 v45, v67, v45
v_mad_f32 v45, v51, v77, -v45
v_mad_f32 v74, v31, v45, v74
v_mad_f32 v71, v27, v45, v71
v_mac_f32_e32 v69, v23, v45
v_mad_f32 v15, v31, v35, v15
v_mad_f32 v14, v27, v35, v14
v_mac_f32_e32 v13, v23, v35
BB6_46: ; %Flow1246
; in Loop: Header=BB6_11 Depth=1
s_or_b64 exec, exec, s[30:31]
BB6_47: ; in Loop: Header=BB6_11 Depth=1
s_or_b64 exec, exec, s[12:13]
s_mov_b32 m0, -1
v_cmp_gt_i32_e32 vcc, 3, v2
ds_write_b32 v6, v69
ds_write_b32 v7, v71
ds_write_b32 v12, v74
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[4:5], exec, s[4:5]
s_waitcnt lgkmcnt(0)
; mask branch BB6_53
s_cbranch_execz BB6_53
BB6_48: ; in Loop: Header=BB6_11 Depth=1
v_lshlrev_b32_e32 v27, 6, v2
v_add_i32_e32 v23, vcc, v11, v27
v_lshlrev_b32_e32 v23, 2, v23
v_add_i32_e32 v31, vcc, s15, v23
s_mov_b32 m0, -1
ds_read_b32 v23, v31
v_add_i32_e32 v35, vcc, 8, v11
v_or_b32_e32 v45, 1, v11
v_cmp_lt_i32_e32 vcc, v45, v35
s_and_saveexec_b64 s[8:9], vcc
s_xor_b64 s[8:9], exec, s[8:9]
s_waitcnt lgkmcnt(0)
; mask branch BB6_50
s_cbranch_execz BB6_50
BB6_49: ; in Loop: Header=BB6_11 Depth=1
s_mov_b32 m0, -1
ds_read2_b32 v[67:68], v31 offset0:1 offset1:2
v_or_b32_e32 v35, 3, v11
v_add_i32_e32 v27, vcc, v35, v27
v_lshlrev_b32_e32 v27, 2, v27
ds_read2_b32 v[69:70], v31 offset0:3 offset1:4
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v23, v23, v67
v_add_i32_e32 v27, vcc, s15, v27
v_add_f32_e32 v23, v68, v23
ds_read2_b32 v[67:68], v27 offset0:2 offset1:3
ds_read_b32 v31, v31 offset:28
v_add_f32_e32 v23, v69, v23
v_add_f32_e32 v23, v70, v23
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v23, v67, v23
v_add_f32_e32 v23, v68, v23
v_add_f32_e32 v23, v31, v23
BB6_50: ; %._crit_edge.i118
; in Loop: Header=BB6_11 Depth=1
s_or_b64 exec, exec, s[8:9]
v_mul_lo_i32 v27, v66, 3
v_mov_b32_e32 v31, s29
s_mov_b64 s[30:31], s[46:47]
s_mov_b64 s[8:9], 0
v_add_i32_e32 v66, vcc, v27, v2
v_ashrrev_i32_e32 v67, 31, v66
v_lshl_b64 v[68:69], v[66:67], 2
v_add_i32_e32 v66, vcc, s28, v68
v_addc_u32_e32 v67, vcc, v69, v31, vcc
buffer_load_dword v69, v[68:69], s[28:31], 0 addr64
s_waitcnt vmcnt(0)
BB6_51: ; Parent Loop BB6_11 Depth=1
; => This Inner Loop Header: Depth=2
v_add_f32_e32 v68, v23, v69
v_mov_b32_e32 v71, v69
v_mov_b32_e32 v70, v68
buffer_atomic_cmpswap v[70:71], v[66:67], s[44:47], 0 addr64 glc
v_mov_b32_e32 v27, -1
v_mov_b32_e32 v27, 0xf000
s_waitcnt vmcnt(0) expcnt(0)
v_cmp_eq_u32_e32 vcc, v70, v69
s_or_b64 s[8:9], vcc, s[8:9]
v_mov_b32_e32 v69, v70
s_andn2_b64 exec, exec, s[8:9]
s_cbranch_execnz BB6_51
; BB#52: ; %Flow1244
; in Loop: Header=BB6_11 Depth=1
s_or_b64 exec, exec, s[8:9]
BB6_53: ; %Flow1245
; in Loop: Header=BB6_11 Depth=1
s_or_b64 exec, exec, s[4:5]
BB6_54: ; %Flow1254
; in Loop: Header=BB6_11 Depth=1
s_or_b64 exec, exec, s[54:55]
v_and_b32_e32 v23, 0xff00, v65
v_cmp_ne_u32_e32 vcc, 0, v23
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[30:31], exec, s[4:5]
; mask branch BB6_94
s_cbranch_execz BB6_94
BB6_55: ; in Loop: Header=BB6_11 Depth=1
s_mov_b32 m0, -1
ds_read_b32 v23, v54 offset:4
s_mov_b64 s[8:9], s[32:33]
s_mov_b64 s[10:11], s[46:47]
s_waitcnt lgkmcnt(0)
v_lshlrev_b32_e32 v27, 3, v23
v_add_i32_e32 v66, vcc, v27, v1
v_ashrrev_i32_e32 v67, 31, v66
v_lshl_b64 v[68:69], v[66:67], 4
buffer_load_dwordx4 v[72:75], v[68:69], s[8:11], 0 addr64
v_lshl_b64 v[70:71], v[66:67], 3
s_mov_b64 s[8:9], s[36:37]
buffer_load_dwordx2 v[67:68], v[70:71], s[8:11], 0 addr64
v_lshrrev_b32_e32 v27, 8, v65
v_mov_b32_e32 v69, 0
v_and_b32_e32 v27, 1, v27
v_cmp_eq_u32_e32 vcc, 1, v27
v_mov_b32_e32 v71, v69
v_mov_b32_e32 v78, v69
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[34:35], exec, s[4:5]
s_waitcnt vmcnt(0)
; mask branch BB6_59
s_cbranch_execz BB6_59
BB6_56: ; in Loop: Header=BB6_11 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[79:82], v55 offset1:1
s_and_b64 s[4:5], exec, s[0:1]
v_cmp_ne_u32_e32 vcc, v41, v23
s_xor_b64 s[4:5], s[4:5], -1
s_or_b64 s[4:5], s[4:5], vcc
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v27, v73, v80
s_and_b64 s[8:9], exec, s[2:3]
s_or_b64 s[4:5], s[8:9], s[4:5]
v_subrev_f32_e32 v31, v72, v79
v_mul_f32_e32 v45, v27, v27
v_cndmask_b32_e64 v51, 0, 1.0, s[4:5]
v_subrev_f32_e32 v35, v74, v81
v_mac_f32_e32 v45, v31, v31
v_mov_b32_e32 v69, 0
v_mac_f32_e32 v45, v35, v35
v_mul_f32_e32 v51, s22, v51
v_cmp_lt_f32_e32 vcc, v45, v51
v_mov_b32_e32 v71, v69
v_mov_b32_e32 v78, v69
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[38:39], exec, s[4:5]
; mask branch BB6_58
s_cbranch_execz BB6_58
BB6_57: ; in Loop: Header=BB6_11 Depth=1
v_max_f32_e32 v45, 0x34cd15ae, v45
v_mul_f32_e32 v69, v9, v45
v_mul_f32_e32 v70, v69, v69
v_mov_b32_e32 v71, 0x3a92b707
v_madak_f32_e32 v71, v71, v70, 0x3ded3cb2
v_mov_b32_e32 v76, 0x3c739487
v_madak_f32_e32 v76, v76, v70, 0x3f01e2bc
v_mad_f32 v71, v71, v70, 1.0
v_mac_f32_e32 v71, v69, v76
v_mov_b32_e32 v76, 0xb2951928
v_madak_f32_e32 v76, v76, v70, 0xb85ffb93
v_mov_b32_e32 v77, 0x35c55945
v_madak_f32_e32 v77, v77, v70, 0x3a83ca0c
v_madak_f32_e32 v76, v76, v70, 0xbc9ded90
v_madak_f32_e32 v77, v77, v70, 0x3d8eaf3b
v_madak_f32_e32 v76, v76, v70, 0xbf409397
v_mac_f32_e32 v76, v69, v77
v_rsq_f32_e32 v77, v45
v_lshrrev_b32_e32 v69, 8, v19
v_and_b32_e32 v69, 1, v69
v_cmp_eq_u32_e32 vcc, 1, v69
v_mul_f32_e32 v79, v77, v77
s_mov_b32 m0, -1
v_cndmask_b32_e64 v78, 0, 1.0, vcc
v_mul_f32_e32 v69, v79, v79
v_mul_f32_e32 v80, v78, v69
ds_read_b64 v[69:70], v56
v_mul_f32_e32 v81, v79, v80
v_mad_f32 v80, v80, v79, s23
v_mul_f32_e32 v51, v75, v82
v_mad_f32 v83, v81, v81, s26
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v70, v68, v70
v_mul_f32_e32 v82, v81, v70
v_mad_f32 v82, v69, v67, -v82
v_mul_f32_e32 v69, v67, v69
v_mul_f32_e32 v80, 0xbe2aaaab, v80
v_mul_f32_e32 v70, v83, v70
v_mul_f32_e32 v69, v69, v80
v_mac_f32_e32 v69, 0x3daaaaaa, v70
v_mac_f32_e32 v8, v78, v69
v_rcp_f32_e32 v69, v71
v_mul_f32_e32 v45, s18, v45
v_mul_f32_e32 v70, v78, v79
v_mul_f32_e32 v45, v77, v45
v_mul_f32_e32 v69, v37, v69
v_mul_f32_e32 v69, v76, v69
v_mac_f32_e32 v69, v77, v70
v_and_b32_e32 v70, s27, v45
v_mov_b32_e32 v71, 0x3fa00000
v_cmp_gt_f32_e64 s[4:5], v71, v70
v_mul_f32_e32 v71, v70, v70
v_rcp_f32_e32 v76, v71
v_add_f32_e32 v80, -1.0, v70
v_mov_b32_e32 v83, 0xbd777f97
v_mov_b32_e32 v84, 0xbf100000
v_cndmask_b32_e64 v76, v76, v80, s[4:5]
v_mov_b32_e32 v80, 0x3f580000
v_cmp_gt_f32_e64 s[8:9], v80, v70
v_cndmask_b32_e64 v71, v76, v71, s[8:9]
v_mov_b32_e32 v80, 0xc11d077e
v_mov_b32_e32 v76, 0x4036db6e
v_madak_f32_e32 v80, v80, v71, 0xc2a2932b
v_cmp_gt_f32_e32 vcc, v76, v70
v_mov_b32_e32 v76, 0xc3f1c275
v_madak_f32_e32 v76, v76, v71, 0xc480230b
v_madak_f32_e32 v80, v71, v80, 0xc3389ae7
v_madak_f32_e32 v76, v71, v76, 0xc41f6441
v_madak_f32_e32 v80, v71, v80, 0xc322658c
v_madak_f32_e32 v76, v71, v76, 0xc320a2ea
v_madak_f32_e32 v80, v71, v80, 0xc2798057
v_madak_f32_e32 v76, v71, v76, 0xc18e104b
v_madak_f32_e32 v80, v71, v80, 0xc128f022
v_madak_f32_e32 v76, v71, v76, 0xbf4c9dd4
v_madak_f32_e32 v80, v71, v80, 0xbf31a0b7
v_madak_f32_e32 v76, v71, v76, 0xbc21a092
v_madak_f32_e32 v80, v71, v80, 0xbc21a093
v_madak_f32_e32 v83, v83, v71, 0x40d23f7c
v_cndmask_b32_e32 v76, v76, v80, vcc
v_mov_b32_e32 v80, 0xc1b38712
v_madak_f32_e32 v80, v80, v71, 0x43ed43a7
v_madak_f32_e32 v83, v71, v83, 0x42d9451f
v_madak_f32_e32 v80, v71, v80, 0x451f90ce
v_madak_f32_e32 v83, v71, v83, 0x43d6810b
v_madak_f32_e32 v80, v71, v80, 0x4547fdbb
v_madak_f32_e32 v83, v71, v83, 0x442158c9
v_madak_f32_e32 v80, v71, v80, 0x44c01759
v_madak_f32_e32 v83, v71, v83, 0x43d9486f
v_madak_f32_e32 v80, v71, v80, 0x43a2e571
v_madak_f32_e32 v83, v71, v83, 0x4309a863
v_madak_f32_e32 v80, v71, v80, 0x41f2b459
v_madak_f32_e32 v83, v71, v83, 0x419d35ce
v_cndmask_b32_e32 v80, v80, v83, vcc
v_mov_b32_e32 v83, 0xbb0df9c0
v_madak_f32_e32 v83, v83, v71, 0x3d1151b3
v_madak_f32_e32 v83, v71, v83, 0xbde31cc2
v_madak_f32_e32 v83, v71, v83, 0x3ea2fe54
v_madak_f32_e32 v83, v71, v83, 0xbebe9208
v_madak_f32_e32 v83, v71, v83, 0x3ed46805
v_madak_f32_e32 v83, v71, v83, 0xbb1acdc6
v_cndmask_b32_e64 v76, v76, v83, s[4:5]
v_mov_b32_e32 v83, 0x3c445aa3
v_madak_f32_e32 v83, v83, v71, 0x3c5f6e13
v_madak_f32_e32 v83, v71, v83, 0x3e013307
v_madak_f32_e32 v83, v71, v83, 0x3d931ae7
v_madak_f32_e32 v83, v71, v83, 0x3f0a5785
v_madak_f32_e32 v83, v71, v83, 0x3dd9f331
v_cndmask_b32_e64 v80, v80, v83, s[4:5]
v_mov_b32_e32 v83, 0xb684e21a
v_madak_f32_e32 v83, v83, v71, 0x390aee49
v_madak_f32_e32 v83, v71, v83, 0x3ba68116
v_madak_f32_e32 v83, v71, v83, 0x3d852a63
v_madak_f32_e32 v83, v71, v83, 0x3ecbbbce
v_cndmask_b32_e64 v80, v80, v83, s[8:9]
v_mov_b32_e32 v83, 0xb7c756b1
v_madak_f32_e32 v83, v83, v71, 0xbbbd1489
v_madak_f32_e32 v83, v71, v83, 0xbce9528f
v_madak_f32_e32 v83, v71, v83, 0xbea66beb
v_madak_f32_e32 v83, v71, v83, 0x3e0375d4
v_mad_f32 v71, v71, v80, 1.0
v_and_b32_e32 v80, s50, v45
v_mad_f32 v84, v80, -v80, v84
v_cmp_gt_f32_e32 vcc, 0, v84
v_cndmask_b32_e64 v85, 0.5, -0.5, vcc
v_mov_b32_e32 v86, 0x3fb8aa3b
v_mac_f32_e32 v85, v86, v84
v_cvt_i32_f32_e32 v85, v85
v_mov_b32_e32 v91, 0xbf317180
v_mov_b32_e32 v93, 0xb717f7d1
v_mov_b32_e32 v96, 0xb5ddea0e
v_cvt_f32_i32_e32 v90, v85
v_mov_b32_e32 v97, 0x3331bb4c
v_mov_b32_e32 v99, 0x388ab355
v_mov_b32_e32 v100, 0xbb360b61
v_mad_f32 v92, v91, v90, v84
v_mad_f32 v94, v93, v90, v92
v_mul_f32_e32 v95, v94, v94
v_mad_f32 v98, v97, v95, v96
v_mad_f32 v98, v98, v95, v99
v_mad_f32 v98, v98, v95, v100
v_mov_b32_e32 v101, 0x3e2aaaab
v_mad_f32 v98, v98, v95, v101
v_mad_f32 v95, -v95, v98, v94
v_mov_b32_e32 v87, 0x6f800000
v_cmp_gt_f32_e64 vcc, |v71|, v87
v_mov_b32_e32 v88, 0x2f800000
v_sub_f32_e32 v98, 2.0, v95
v_cndmask_b32_e32 v89, 1.0, v88, vcc
v_cmp_gt_f32_e64 vcc, |v98|, v87
v_mul_f32_e32 v71, v89, v71
v_cndmask_b32_e32 v102, 1.0, v88, vcc
v_mul_f32_e32 v98, v102, v98
v_rcp_f32_e32 v71, v71
v_rcp_f32_e32 v98, v98
v_cndmask_b32_e64 v76, v76, v83, s[8:9]
v_lshlrev_b32_e32 v83, 23, v85
v_mul_f32_e32 v71, v71, v76
v_mul_f32_e32 v76, v95, v94
v_mul_f32_e32 v76, v98, v76
v_mul_f32_e32 v76, v76, v102
v_mad_f32 v76, -v90, v93, -v76
v_subrev_f32_e32 v76, v92, v76
v_sub_f32_e32 v76, 1.0, v76
v_add_i32_e32 v76, vcc, v76, v83
v_subrev_f32_e32 v83, v70, v80
v_mul_f32_e32 v85, v71, v89
v_add_f32_e32 v80, v70, v80
v_mad_f32 v80, v80, v83, v85
v_cmp_gt_f32_e32 vcc, 0, v80
v_cndmask_b32_e64 v83, 0.5, -0.5, vcc
v_mac_f32_e32 v83, v86, v80
v_cvt_i32_f32_e32 v83, v83
v_madak_f32_e32 v71, v89, v71, 0x3f58560b
v_cvt_f32_i32_e32 v86, v83
v_lshlrev_b32_e32 v83, 23, v83
v_mad_f32 v90, v91, v86, v80
v_mad_f32 v91, v93, v86, v90
v_mul_f32_e32 v92, v91, v91
v_mac_f32_e32 v96, v97, v92
v_mac_f32_e32 v99, v96, v92
v_mac_f32_e32 v100, v99, v92
v_mac_f32_e32 v101, v100, v92
v_mad_f32 v92, -v92, v101, v91
v_mul_f32_e32 v91, v92, v91
v_sub_f32_e32 v92, 2.0, v92
v_cmp_gt_f32_e64 vcc, |v92|, v87
v_cndmask_b32_e32 v94, 1.0, v88, vcc
v_mul_f32_e32 v92, v94, v92
v_rcp_f32_e32 v92, v92
v_mul_f32_e32 v91, v92, v91
v_mul_f32_e32 v91, v91, v94
v_mad_f32 v86, -v86, v93, -v91
v_mov_b32_e32 v91, 0xc2aeac4f
v_cmp_ge_f32_e32 vcc, v84, v91
v_mov_b32_e32 v92, 0x42b17218
v_cndmask_b32_e32 v76, 0, v76, vcc
v_cmp_lt_f32_e32 vcc, v84, v92
v_mov_b32_e32 v93, 0x7f800000
v_cndmask_b32_e32 v76, v93, v76, vcc
v_cmp_u_f32_e32 vcc, v84, v84
v_cndmask_b32_e32 v76, v76, v84, vcc
v_subrev_f32_e32 v84, v90, v86
v_sub_f32_e32 v84, 1.0, v84
v_add_i32_e32 v83, vcc, v84, v83
v_cmp_ge_f32_e32 vcc, v80, v91
v_cndmask_b32_e32 v83, 0, v83, vcc
v_cmp_lt_f32_e64 s[10:11], v80, v92
v_cndmask_b32_e64 v83, v93, v83, s[10:11]
v_cmp_u_f32_e32 vcc, v80, v80
v_cndmask_b32_e32 v80, v83, v80, vcc
v_mul_f32_e32 v76, v80, v76
v_mov_b32_e32 v80, 0x40c00000
v_cmp_gt_f32_e64 s[10:11], v80, v70
v_mov_b32_e32 v80, 0x31800000
v_cmp_gt_f32_e64 vcc, |v70|, v87
v_cmp_gt_f32_e64 s[12:13], v80, v70
v_cndmask_b32_e32 v80, 1.0, v88, vcc
v_mul_f32_e32 v70, v80, v70
v_rcp_f32_e32 v70, v70
v_cmp_u_f32_e32 vcc, v45, v45
v_mul_f32_e32 v70, v70, v76
v_mad_f32 v70, -v80, v70, 1.0
v_cndmask_b32_e64 v70, 1.0, v70, s[10:11]
v_cndmask_b32_e64 v70, v70, v71, s[4:5]
v_and_b32_e32 v71, s51, v45
v_or_b32_e32 v70, v71, v70
v_mad_f32 v71, v85, v45, v45
v_cndmask_b32_e64 v70, v70, v71, s[8:9]
v_mul_f32_e32 v71, 0x3f8375d4, v45
v_mac_f32_e32 v71, 0x41000000, v45
v_mul_f32_e32 v71, 0x3e000000, v71
v_cndmask_b32_e64 v70, v70, v71, s[12:13]
v_cndmask_b32_e32 v45, v70, v45, vcc
v_subrev_f32_e32 v45, v45, v78
v_mul_f32_e32 v70, s19, v78
v_mad_f32 v45, v77, v45, -v70
v_mul_f32_e32 v70, v79, v81
v_mac_f32_e32 v5, v45, v51
v_mul_f32_e32 v45, v82, v70
v_mad_f32 v45, v51, v69, -v45
v_mul_f32_e32 v51, v69, v51
v_mad_f32 v51, v70, v82, -v51
v_mad_f32 v50, v51, -v35, v50
v_mad_f32 v49, v51, -v27, v49
v_mad_f32 v48, v51, -v31, v48
v_mul_f32_e64 v78, v45, -v35
v_mul_f32_e64 v71, v45, -v27
v_mul_f32_e64 v69, v45, -v31
BB6_58: ; %Flow1242
; in Loop: Header=BB6_11 Depth=1
s_or_b64 exec, exec, s[38:39]
BB6_59: ; in Loop: Header=BB6_11 Depth=1
s_or_b64 exec, exec, s[34:35]
v_lshrrev_b32_e32 v27, 9, v65
v_and_b32_e32 v27, 1, v27
v_cmp_eq_u32_e32 vcc, 1, v27
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[34:35], exec, s[4:5]
; mask branch BB6_63
s_cbranch_execz BB6_63
BB6_60: ; in Loop: Header=BB6_11 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[79:82], v55 offset0:16 offset1:17
s_and_b64 s[4:5], exec, s[0:1]
v_cmp_ne_u32_e32 vcc, v64, v23
s_xor_b64 s[4:5], s[4:5], -1
s_or_b64 s[4:5], s[4:5], vcc
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v31, v73, v80
s_and_b64 s[8:9], exec, s[2:3]
s_or_b64 s[4:5], s[8:9], s[4:5]
v_subrev_f32_e32 v27, v72, v79
v_mul_f32_e32 v45, v31, v31
v_cndmask_b32_e64 v51, 0, 1.0, s[4:5]
v_subrev_f32_e32 v35, v74, v81
v_mac_f32_e32 v45, v27, v27
v_mac_f32_e32 v45, v35, v35
v_mul_f32_e32 v51, s22, v51
v_cmp_lt_f32_e32 vcc, v45, v51
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[38:39], exec, s[4:5]
; mask branch BB6_62
s_cbranch_execz BB6_62
BB6_61: ; in Loop: Header=BB6_11 Depth=1
v_max_f32_e32 v45, 0x34cd15ae, v45
v_mul_f32_e32 v80, v9, v45
v_mul_f32_e32 v76, v80, v80
v_mov_b32_e32 v77, 0x3a92b707
v_mov_b32_e32 v81, 0x3c739487
v_madak_f32_e32 v77, v77, v76, 0x3ded3cb2
v_mul_f32_e32 v51, v75, v82
v_mad_f32 v82, v77, v76, 1.0
v_mov_b32_e32 v77, 0xb2951928
v_madak_f32_e32 v81, v81, v76, 0x3f01e2bc
v_madak_f32_e32 v77, v77, v76, 0xb85ffb93
v_rsq_f32_e32 v79, v45
v_mac_f32_e32 v82, v80, v81
v_mov_b32_e32 v81, 0x35c55945
v_madak_f32_e32 v81, v81, v76, 0x3a83ca0c
v_madak_f32_e32 v77, v77, v76, 0xbc9ded90
s_mov_b32 m0, -1
v_lshrrev_b32_e32 v70, 9, v19
v_madak_f32_e32 v81, v81, v76, 0x3d8eaf3b
v_madak_f32_e32 v83, v77, v76, 0xbf409397
ds_read_b64 v[76:77], v56 offset:64
v_and_b32_e32 v70, 1, v70
v_cmp_eq_u32_e32 vcc, 1, v70
v_mac_f32_e32 v83, v80, v81
v_mul_f32_e32 v80, v79, v79
v_cndmask_b32_e64 v70, 0, 1.0, vcc
v_mul_f32_e32 v81, v80, v80
v_mul_f32_e32 v81, v70, v81
v_mul_f32_e32 v84, v80, v81
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v77, v68, v77
v_mul_f32_e32 v85, v84, v77
v_mad_f32 v81, v81, v80, s23
v_mad_f32 v85, v76, v67, -v85
v_mad_f32 v86, v84, v84, s26
v_mul_f32_e32 v76, v67, v76
v_mul_f32_e32 v81, 0xbe2aaaab, v81
v_mul_f32_e32 v77, v86, v77
v_mul_f32_e32 v76, v76, v81
v_mac_f32_e32 v76, 0x3daaaaaa, v77
v_rcp_f32_e32 v77, v82
v_mul_f32_e32 v45, s18, v45
v_mac_f32_e32 v8, v70, v76
v_mul_f32_e32 v45, v79, v45
v_mul_f32_e32 v77, v37, v77
v_mul_f32_e32 v76, v70, v80
v_mul_f32_e32 v77, v83, v77
v_mac_f32_e32 v77, v79, v76
v_and_b32_e32 v76, s27, v45
v_mov_b32_e32 v81, 0x3fa00000
v_cmp_gt_f32_e64 s[4:5], v81, v76
v_mul_f32_e32 v81, v76, v76
v_rcp_f32_e32 v82, v81
v_add_f32_e32 v83, -1.0, v76
v_mov_b32_e32 v86, 0xbd777f97
v_mov_b32_e32 v87, 0x6f800000
v_cndmask_b32_e64 v82, v82, v83, s[4:5]
v_mov_b32_e32 v83, 0x3f580000
v_cmp_gt_f32_e64 s[8:9], v83, v76
v_cndmask_b32_e64 v81, v82, v81, s[8:9]
v_mov_b32_e32 v83, 0xc11d077e
v_mov_b32_e32 v82, 0x4036db6e
v_madak_f32_e32 v83, v83, v81, 0xc2a2932b
v_cmp_gt_f32_e32 vcc, v82, v76
v_mov_b32_e32 v82, 0xc3f1c275
v_madak_f32_e32 v82, v82, v81, 0xc480230b
v_madak_f32_e32 v83, v81, v83, 0xc3389ae7
v_madak_f32_e32 v82, v81, v82, 0xc41f6441
v_madak_f32_e32 v83, v81, v83, 0xc322658c
v_madak_f32_e32 v82, v81, v82, 0xc320a2ea
v_madak_f32_e32 v83, v81, v83, 0xc2798057
v_madak_f32_e32 v82, v81, v82, 0xc18e104b
v_madak_f32_e32 v83, v81, v83, 0xc128f022
v_madak_f32_e32 v82, v81, v82, 0xbf4c9dd4
v_madak_f32_e32 v83, v81, v83, 0xbf31a0b7
v_madak_f32_e32 v82, v81, v82, 0xbc21a092
v_madak_f32_e32 v83, v81, v83, 0xbc21a093
v_madak_f32_e32 v86, v86, v81, 0x40d23f7c
v_cndmask_b32_e32 v82, v82, v83, vcc
v_mov_b32_e32 v83, 0xc1b38712
v_madak_f32_e32 v83, v83, v81, 0x43ed43a7
v_madak_f32_e32 v86, v81, v86, 0x42d9451f
v_madak_f32_e32 v83, v81, v83, 0x451f90ce
v_madak_f32_e32 v86, v81, v86, 0x43d6810b
v_madak_f32_e32 v83, v81, v83, 0x4547fdbb
v_madak_f32_e32 v86, v81, v86, 0x442158c9
v_madak_f32_e32 v83, v81, v83, 0x44c01759
v_madak_f32_e32 v86, v81, v86, 0x43d9486f
v_madak_f32_e32 v83, v81, v83, 0x43a2e571
v_madak_f32_e32 v86, v81, v86, 0x4309a863
v_madak_f32_e32 v83, v81, v83, 0x41f2b459
v_madak_f32_e32 v86, v81, v86, 0x419d35ce
v_cndmask_b32_e32 v83, v83, v86, vcc
v_mov_b32_e32 v86, 0xbb0df9c0
v_madak_f32_e32 v86, v86, v81, 0x3d1151b3
v_madak_f32_e32 v86, v81, v86, 0xbde31cc2
v_madak_f32_e32 v86, v81, v86, 0x3ea2fe54
v_madak_f32_e32 v86, v81, v86, 0xbebe9208
v_madak_f32_e32 v86, v81, v86, 0x3ed46805
v_madak_f32_e32 v86, v81, v86, 0xbb1acdc6
v_cndmask_b32_e64 v82, v82, v86, s[4:5]
v_mov_b32_e32 v86, 0x3c445aa3
v_madak_f32_e32 v86, v86, v81, 0x3c5f6e13
v_madak_f32_e32 v86, v81, v86, 0x3e013307
v_madak_f32_e32 v86, v81, v86, 0x3d931ae7
v_madak_f32_e32 v86, v81, v86, 0x3f0a5785
v_madak_f32_e32 v86, v81, v86, 0x3dd9f331
v_cndmask_b32_e64 v83, v83, v86, s[4:5]
v_mov_b32_e32 v86, 0xb684e21a
v_madak_f32_e32 v86, v86, v81, 0x390aee49
v_madak_f32_e32 v86, v81, v86, 0x3ba68116
v_madak_f32_e32 v86, v81, v86, 0x3d852a63
v_madak_f32_e32 v86, v81, v86, 0x3ecbbbce
v_cndmask_b32_e64 v83, v83, v86, s[8:9]
v_mad_f32 v83, v81, v83, 1.0
v_and_b32_e32 v90, s50, v45
v_mov_b32_e32 v91, 0xbf100000
v_cmp_gt_f32_e64 vcc, |v83|, v87
v_mov_b32_e32 v88, 0x2f800000
v_mad_f32 v91, v90, -v90, v91
v_cndmask_b32_e32 v89, 1.0, v88, vcc
v_cmp_gt_f32_e32 vcc, 0, v91
v_cndmask_b32_e64 v92, 0.5, -0.5, vcc
v_mov_b32_e32 v93, 0x3fb8aa3b
v_mac_f32_e32 v92, v93, v91
v_mov_b32_e32 v86, 0xb7c756b1
v_cvt_i32_f32_e32 v92, v92
v_madak_f32_e32 v86, v86, v81, 0xbbbd1489
v_madak_f32_e32 v86, v81, v86, 0xbce9528f
v_madak_f32_e32 v86, v81, v86, 0xbea66beb
v_mul_f32_e32 v83, v89, v83
v_madak_f32_e32 v81, v81, v86, 0x3e0375d4
v_rcp_f32_e32 v83, v83
v_cvt_f32_i32_e32 v86, v92
v_cndmask_b32_e64 v81, v82, v81, s[8:9]
v_mov_b32_e32 v82, 0xbf317180
v_mul_f32_e32 v81, v83, v81
v_mad_f32 v83, v82, v86, v91
v_mov_b32_e32 v94, 0xb717f7d1
v_mad_f32 v95, v94, v86, v83
v_mul_f32_e32 v96, v95, v95
v_mov_b32_e32 v97, 0xb5ddea0e
v_mov_b32_e32 v98, 0x3331bb4c
v_mad_f32 v99, v98, v96, v97
v_mov_b32_e32 v100, 0x388ab355
v_mad_f32 v99, v99, v96, v100
v_mov_b32_e32 v101, 0xbb360b61
v_mad_f32 v99, v99, v96, v101
v_mov_b32_e32 v102, 0x3e2aaaab
v_mad_f32 v99, v99, v96, v102
v_mad_f32 v96, -v96, v99, v95
v_mul_f32_e32 v95, v96, v95
v_sub_f32_e32 v96, 2.0, v96
v_cmp_gt_f32_e64 vcc, |v96|, v87
v_cndmask_b32_e32 v99, 1.0, v88, vcc
v_mul_f32_e32 v96, v99, v96
v_rcp_f32_e32 v96, v96
v_mul_f32_e32 v95, v96, v95
v_mul_f32_e32 v95, v95, v99
v_mad_f32 v86, -v86, v94, -v95
v_subrev_f32_e32 v83, v83, v86
v_lshlrev_b32_e32 v86, 23, v92
v_sub_f32_e32 v83, 1.0, v83
v_add_i32_e32 v83, vcc, v83, v86
v_mov_b32_e32 v86, 0xc2aeac4f
v_cmp_ge_f32_e32 vcc, v91, v86
v_mov_b32_e32 v92, 0x42b17218
v_cndmask_b32_e32 v83, 0, v83, vcc
v_cmp_lt_f32_e32 vcc, v91, v92
v_mov_b32_e32 v95, 0x7f800000
v_cndmask_b32_e32 v83, v95, v83, vcc
v_cmp_u_f32_e32 vcc, v91, v91
v_cndmask_b32_e32 v83, v83, v91, vcc
v_subrev_f32_e32 v91, v76, v90
v_mul_f32_e32 v96, v81, v89
v_add_f32_e32 v90, v76, v90
v_mad_f32 v90, v90, v91, v96
v_cmp_gt_f32_e32 vcc, 0, v90
v_cndmask_b32_e64 v91, 0.5, -0.5, vcc
v_mac_f32_e32 v91, v93, v90
v_cvt_i32_f32_e32 v91, v91
v_madak_f32_e32 v81, v89, v81, 0x3f58560b
v_cvt_f32_i32_e32 v93, v91
v_lshlrev_b32_e32 v91, 23, v91
v_mad_f32 v82, v82, v93, v90
v_mad_f32 v99, v94, v93, v82
v_mul_f32_e32 v103, v99, v99
v_mac_f32_e32 v97, v98, v103
v_mac_f32_e32 v100, v97, v103
v_mac_f32_e32 v101, v100, v103
v_mac_f32_e32 v102, v101, v103
v_mad_f32 v97, -v103, v102, v99
v_mul_f32_e32 v98, v97, v99
v_sub_f32_e32 v97, 2.0, v97
v_cmp_gt_f32_e64 vcc, |v97|, v87
v_cndmask_b32_e32 v99, 1.0, v88, vcc
v_mul_f32_e32 v97, v99, v97
v_rcp_f32_e32 v97, v97
v_mul_f32_e32 v97, v97, v98
v_mul_f32_e32 v97, v97, v99
v_mad_f32 v93, -v93, v94, -v97
v_subrev_f32_e32 v82, v82, v93
v_sub_f32_e32 v82, 1.0, v82
v_add_i32_e32 v82, vcc, v82, v91
v_cmp_ge_f32_e32 vcc, v90, v86
v_cndmask_b32_e32 v82, 0, v82, vcc
v_cmp_lt_f32_e32 vcc, v90, v92
v_cndmask_b32_e32 v82, v95, v82, vcc
v_cmp_u_f32_e32 vcc, v90, v90
v_cndmask_b32_e32 v82, v82, v90, vcc
v_mul_f32_e32 v82, v82, v83
v_mov_b32_e32 v83, 0x40c00000
v_cmp_gt_f32_e64 s[10:11], v83, v76
v_mov_b32_e32 v83, 0x31800000
v_cmp_gt_f32_e64 vcc, |v76|, v87
v_cmp_gt_f32_e64 s[12:13], v83, v76
v_cndmask_b32_e32 v83, 1.0, v88, vcc
v_mul_f32_e32 v76, v83, v76
v_rcp_f32_e32 v76, v76
v_cmp_u_f32_e32 vcc, v45, v45
v_mul_f32_e32 v76, v76, v82
v_mad_f32 v76, -v83, v76, 1.0
v_cndmask_b32_e64 v76, 1.0, v76, s[10:11]
v_cndmask_b32_e64 v76, v76, v81, s[4:5]
v_and_b32_e32 v81, s51, v45
v_or_b32_e32 v76, v81, v76
v_mad_f32 v81, v96, v45, v45
v_cndmask_b32_e64 v76, v76, v81, s[8:9]
v_mul_f32_e32 v81, 0x3f8375d4, v45
v_mac_f32_e32 v81, 0x41000000, v45
v_mul_f32_e32 v81, 0x3e000000, v81
v_cndmask_b32_e64 v76, v76, v81, s[12:13]
v_cndmask_b32_e32 v45, v76, v45, vcc
v_subrev_f32_e32 v45, v45, v70
v_mul_f32_e32 v70, s19, v70
v_mad_f32 v45, v79, v45, -v70
v_mul_f32_e32 v70, v80, v84
v_mac_f32_e32 v5, v45, v51
v_mul_f32_e32 v45, v85, v70
v_mad_f32 v45, v51, v77, -v45
v_mul_f32_e32 v51, v77, v51
v_mad_f32 v51, v70, v85, -v51
v_mad_f32 v78, v35, v51, v78
v_mad_f32 v71, v31, v51, v71
v_mac_f32_e32 v69, v27, v51
v_mad_f32 v44, v35, v45, v44
v_mad_f32 v43, v31, v45, v43
v_mac_f32_e32 v42, v27, v45
BB6_62: ; %Flow1241
; in Loop: Header=BB6_11 Depth=1
s_or_b64 exec, exec, s[38:39]
BB6_63: ; in Loop: Header=BB6_11 Depth=1
s_or_b64 exec, exec, s[34:35]
v_lshrrev_b32_e32 v27, 10, v65
v_and_b32_e32 v27, 1, v27
v_cmp_eq_u32_e32 vcc, 1, v27
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[34:35], exec, s[4:5]
; mask branch BB6_67
s_cbranch_execz BB6_67
BB6_64: ; in Loop: Header=BB6_11 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[79:82], v55 offset0:32 offset1:33
s_and_b64 s[4:5], exec, s[0:1]
v_cmp_ne_u32_e32 vcc, v63, v23
s_xor_b64 s[4:5], s[4:5], -1
s_or_b64 s[4:5], s[4:5], vcc
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v31, v73, v80
s_and_b64 s[8:9], exec, s[2:3]
s_or_b64 s[4:5], s[8:9], s[4:5]
v_subrev_f32_e32 v27, v72, v79
v_mul_f32_e32 v45, v31, v31
v_cndmask_b32_e64 v51, 0, 1.0, s[4:5]
v_subrev_f32_e32 v35, v74, v81
v_mac_f32_e32 v45, v27, v27
v_mac_f32_e32 v45, v35, v35
v_mul_f32_e32 v51, s22, v51
v_cmp_lt_f32_e32 vcc, v45, v51
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[38:39], exec, s[4:5]
; mask branch BB6_66
s_cbranch_execz BB6_66
BB6_65: ; in Loop: Header=BB6_11 Depth=1
v_max_f32_e32 v45, 0x34cd15ae, v45
v_mul_f32_e32 v80, v9, v45
v_mul_f32_e32 v76, v80, v80
v_mov_b32_e32 v77, 0x3a92b707
v_mov_b32_e32 v81, 0x3c739487
v_madak_f32_e32 v77, v77, v76, 0x3ded3cb2
v_mul_f32_e32 v51, v75, v82
v_mad_f32 v82, v77, v76, 1.0
v_mov_b32_e32 v77, 0xb2951928
v_madak_f32_e32 v81, v81, v76, 0x3f01e2bc
v_madak_f32_e32 v77, v77, v76, 0xb85ffb93
v_rsq_f32_e32 v79, v45
v_mac_f32_e32 v82, v80, v81
v_mov_b32_e32 v81, 0x35c55945
v_madak_f32_e32 v81, v81, v76, 0x3a83ca0c
v_madak_f32_e32 v77, v77, v76, 0xbc9ded90
s_mov_b32 m0, -1
v_lshrrev_b32_e32 v70, 10, v19
v_madak_f32_e32 v81, v81, v76, 0x3d8eaf3b
v_madak_f32_e32 v83, v77, v76, 0xbf409397
ds_read_b64 v[76:77], v56 offset:128
v_and_b32_e32 v70, 1, v70
v_cmp_eq_u32_e32 vcc, 1, v70
v_mac_f32_e32 v83, v80, v81
v_mul_f32_e32 v80, v79, v79
v_cndmask_b32_e64 v70, 0, 1.0, vcc
v_mul_f32_e32 v81, v80, v80
v_mul_f32_e32 v81, v70, v81
v_mul_f32_e32 v84, v80, v81
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v77, v68, v77
v_mul_f32_e32 v85, v84, v77
v_mad_f32 v81, v81, v80, s23
v_mad_f32 v85, v76, v67, -v85
v_mad_f32 v86, v84, v84, s26
v_mul_f32_e32 v76, v67, v76
v_mul_f32_e32 v81, 0xbe2aaaab, v81
v_mul_f32_e32 v77, v86, v77
v_mul_f32_e32 v76, v76, v81
v_mac_f32_e32 v76, 0x3daaaaaa, v77
v_rcp_f32_e32 v77, v82
v_mul_f32_e32 v45, s18, v45
v_mac_f32_e32 v8, v70, v76
v_mul_f32_e32 v45, v79, v45
v_mul_f32_e32 v77, v37, v77
v_mul_f32_e32 v76, v70, v80
v_mul_f32_e32 v77, v83, v77
v_mac_f32_e32 v77, v79, v76
v_and_b32_e32 v76, s27, v45
v_mov_b32_e32 v81, 0x3fa00000
v_cmp_gt_f32_e64 s[4:5], v81, v76
v_mul_f32_e32 v81, v76, v76
v_rcp_f32_e32 v82, v81
v_add_f32_e32 v83, -1.0, v76
v_mov_b32_e32 v86, 0xbd777f97
v_mov_b32_e32 v87, 0x6f800000
v_cndmask_b32_e64 v82, v82, v83, s[4:5]
v_mov_b32_e32 v83, 0x3f580000
v_cmp_gt_f32_e64 s[8:9], v83, v76
v_cndmask_b32_e64 v81, v82, v81, s[8:9]
v_mov_b32_e32 v83, 0xc11d077e
v_mov_b32_e32 v82, 0x4036db6e
v_madak_f32_e32 v83, v83, v81, 0xc2a2932b
v_cmp_gt_f32_e32 vcc, v82, v76
v_mov_b32_e32 v82, 0xc3f1c275
v_madak_f32_e32 v82, v82, v81, 0xc480230b
v_madak_f32_e32 v83, v81, v83, 0xc3389ae7
v_madak_f32_e32 v82, v81, v82, 0xc41f6441
v_madak_f32_e32 v83, v81, v83, 0xc322658c
v_madak_f32_e32 v82, v81, v82, 0xc320a2ea
v_madak_f32_e32 v83, v81, v83, 0xc2798057
v_madak_f32_e32 v82, v81, v82, 0xc18e104b
v_madak_f32_e32 v83, v81, v83, 0xc128f022
v_madak_f32_e32 v82, v81, v82, 0xbf4c9dd4
v_madak_f32_e32 v83, v81, v83, 0xbf31a0b7
v_madak_f32_e32 v82, v81, v82, 0xbc21a092
v_madak_f32_e32 v83, v81, v83, 0xbc21a093
v_madak_f32_e32 v86, v86, v81, 0x40d23f7c
v_cndmask_b32_e32 v82, v82, v83, vcc
v_mov_b32_e32 v83, 0xc1b38712
v_madak_f32_e32 v83, v83, v81, 0x43ed43a7
v_madak_f32_e32 v86, v81, v86, 0x42d9451f
v_madak_f32_e32 v83, v81, v83, 0x451f90ce
v_madak_f32_e32 v86, v81, v86, 0x43d6810b
v_madak_f32_e32 v83, v81, v83, 0x4547fdbb
v_madak_f32_e32 v86, v81, v86, 0x442158c9
v_madak_f32_e32 v83, v81, v83, 0x44c01759
v_madak_f32_e32 v86, v81, v86, 0x43d9486f
v_madak_f32_e32 v83, v81, v83, 0x43a2e571
v_madak_f32_e32 v86, v81, v86, 0x4309a863
v_madak_f32_e32 v83, v81, v83, 0x41f2b459
v_madak_f32_e32 v86, v81, v86, 0x419d35ce
v_cndmask_b32_e32 v83, v83, v86, vcc
v_mov_b32_e32 v86, 0xbb0df9c0
v_madak_f32_e32 v86, v86, v81, 0x3d1151b3
v_madak_f32_e32 v86, v81, v86, 0xbde31cc2
v_madak_f32_e32 v86, v81, v86, 0x3ea2fe54
v_madak_f32_e32 v86, v81, v86, 0xbebe9208
v_madak_f32_e32 v86, v81, v86, 0x3ed46805
v_madak_f32_e32 v86, v81, v86, 0xbb1acdc6
v_cndmask_b32_e64 v82, v82, v86, s[4:5]
v_mov_b32_e32 v86, 0x3c445aa3
v_madak_f32_e32 v86, v86, v81, 0x3c5f6e13
v_madak_f32_e32 v86, v81, v86, 0x3e013307
v_madak_f32_e32 v86, v81, v86, 0x3d931ae7
v_madak_f32_e32 v86, v81, v86, 0x3f0a5785
v_madak_f32_e32 v86, v81, v86, 0x3dd9f331
v_cndmask_b32_e64 v83, v83, v86, s[4:5]
v_mov_b32_e32 v86, 0xb684e21a
v_madak_f32_e32 v86, v86, v81, 0x390aee49
v_madak_f32_e32 v86, v81, v86, 0x3ba68116
v_madak_f32_e32 v86, v81, v86, 0x3d852a63
v_madak_f32_e32 v86, v81, v86, 0x3ecbbbce
v_cndmask_b32_e64 v83, v83, v86, s[8:9]
v_mad_f32 v83, v81, v83, 1.0
v_and_b32_e32 v90, s50, v45
v_mov_b32_e32 v91, 0xbf100000
v_cmp_gt_f32_e64 vcc, |v83|, v87
v_mov_b32_e32 v88, 0x2f800000
v_mad_f32 v91, v90, -v90, v91
v_cndmask_b32_e32 v89, 1.0, v88, vcc
v_cmp_gt_f32_e32 vcc, 0, v91
v_cndmask_b32_e64 v92, 0.5, -0.5, vcc
v_mov_b32_e32 v93, 0x3fb8aa3b
v_mac_f32_e32 v92, v93, v91
v_mov_b32_e32 v86, 0xb7c756b1
v_cvt_i32_f32_e32 v92, v92
v_madak_f32_e32 v86, v86, v81, 0xbbbd1489
v_madak_f32_e32 v86, v81, v86, 0xbce9528f
v_madak_f32_e32 v86, v81, v86, 0xbea66beb
v_mul_f32_e32 v83, v89, v83
v_madak_f32_e32 v81, v81, v86, 0x3e0375d4
v_rcp_f32_e32 v83, v83
v_cvt_f32_i32_e32 v86, v92
v_cndmask_b32_e64 v81, v82, v81, s[8:9]
v_mov_b32_e32 v82, 0xbf317180
v_mul_f32_e32 v81, v83, v81
v_mad_f32 v83, v82, v86, v91
v_mov_b32_e32 v94, 0xb717f7d1
v_mad_f32 v95, v94, v86, v83
v_mul_f32_e32 v96, v95, v95
v_mov_b32_e32 v97, 0xb5ddea0e
v_mov_b32_e32 v98, 0x3331bb4c
v_mad_f32 v99, v98, v96, v97
v_mov_b32_e32 v100, 0x388ab355
v_mad_f32 v99, v99, v96, v100
v_mov_b32_e32 v101, 0xbb360b61
v_mad_f32 v99, v99, v96, v101
v_mov_b32_e32 v102, 0x3e2aaaab
v_mad_f32 v99, v99, v96, v102
v_mad_f32 v96, -v96, v99, v95
v_mul_f32_e32 v95, v96, v95
v_sub_f32_e32 v96, 2.0, v96
v_cmp_gt_f32_e64 vcc, |v96|, v87
v_cndmask_b32_e32 v99, 1.0, v88, vcc
v_mul_f32_e32 v96, v99, v96
v_rcp_f32_e32 v96, v96
v_mul_f32_e32 v95, v96, v95
v_mul_f32_e32 v95, v95, v99
v_mad_f32 v86, -v86, v94, -v95
v_subrev_f32_e32 v83, v83, v86
v_lshlrev_b32_e32 v86, 23, v92
v_sub_f32_e32 v83, 1.0, v83
v_add_i32_e32 v83, vcc, v83, v86
v_mov_b32_e32 v86, 0xc2aeac4f
v_cmp_ge_f32_e32 vcc, v91, v86
v_mov_b32_e32 v92, 0x42b17218
v_cndmask_b32_e32 v83, 0, v83, vcc
v_cmp_lt_f32_e32 vcc, v91, v92
v_mov_b32_e32 v95, 0x7f800000
v_cndmask_b32_e32 v83, v95, v83, vcc
v_cmp_u_f32_e32 vcc, v91, v91
v_cndmask_b32_e32 v83, v83, v91, vcc
v_subrev_f32_e32 v91, v76, v90
v_mul_f32_e32 v96, v81, v89
v_add_f32_e32 v90, v76, v90
v_mad_f32 v90, v90, v91, v96
v_cmp_gt_f32_e32 vcc, 0, v90
v_cndmask_b32_e64 v91, 0.5, -0.5, vcc
v_mac_f32_e32 v91, v93, v90
v_cvt_i32_f32_e32 v91, v91
v_madak_f32_e32 v81, v89, v81, 0x3f58560b
v_cvt_f32_i32_e32 v93, v91
v_lshlrev_b32_e32 v91, 23, v91
v_mad_f32 v82, v82, v93, v90
v_mad_f32 v99, v94, v93, v82
v_mul_f32_e32 v103, v99, v99
v_mac_f32_e32 v97, v98, v103
v_mac_f32_e32 v100, v97, v103
v_mac_f32_e32 v101, v100, v103
v_mac_f32_e32 v102, v101, v103
v_mad_f32 v97, -v103, v102, v99
v_mul_f32_e32 v98, v97, v99
v_sub_f32_e32 v97, 2.0, v97
v_cmp_gt_f32_e64 vcc, |v97|, v87
v_cndmask_b32_e32 v99, 1.0, v88, vcc
v_mul_f32_e32 v97, v99, v97
v_rcp_f32_e32 v97, v97
v_mul_f32_e32 v97, v97, v98
v_mul_f32_e32 v97, v97, v99
v_mad_f32 v93, -v93, v94, -v97
v_subrev_f32_e32 v82, v82, v93
v_sub_f32_e32 v82, 1.0, v82
v_add_i32_e32 v82, vcc, v82, v91
v_cmp_ge_f32_e32 vcc, v90, v86
v_cndmask_b32_e32 v82, 0, v82, vcc
v_cmp_lt_f32_e32 vcc, v90, v92
v_cndmask_b32_e32 v82, v95, v82, vcc
v_cmp_u_f32_e32 vcc, v90, v90
v_cndmask_b32_e32 v82, v82, v90, vcc
v_mul_f32_e32 v82, v82, v83
v_mov_b32_e32 v83, 0x40c00000
v_cmp_gt_f32_e64 s[10:11], v83, v76
v_mov_b32_e32 v83, 0x31800000
v_cmp_gt_f32_e64 vcc, |v76|, v87
v_cmp_gt_f32_e64 s[12:13], v83, v76
v_cndmask_b32_e32 v83, 1.0, v88, vcc
v_mul_f32_e32 v76, v83, v76
v_rcp_f32_e32 v76, v76
v_cmp_u_f32_e32 vcc, v45, v45
v_mul_f32_e32 v76, v76, v82
v_mad_f32 v76, -v83, v76, 1.0
v_cndmask_b32_e64 v76, 1.0, v76, s[10:11]
v_cndmask_b32_e64 v76, v76, v81, s[4:5]
v_and_b32_e32 v81, s51, v45
v_or_b32_e32 v76, v81, v76
v_mad_f32 v81, v96, v45, v45
v_cndmask_b32_e64 v76, v76, v81, s[8:9]
v_mul_f32_e32 v81, 0x3f8375d4, v45
v_mac_f32_e32 v81, 0x41000000, v45
v_mul_f32_e32 v81, 0x3e000000, v81
v_cndmask_b32_e64 v76, v76, v81, s[12:13]
v_cndmask_b32_e32 v45, v76, v45, vcc
v_subrev_f32_e32 v45, v45, v70
v_mul_f32_e32 v70, s19, v70
v_mad_f32 v45, v79, v45, -v70
v_mul_f32_e32 v70, v80, v84
v_mac_f32_e32 v5, v45, v51
v_mul_f32_e32 v45, v85, v70
v_mad_f32 v45, v51, v77, -v45
v_mul_f32_e32 v51, v77, v51
v_mad_f32 v51, v70, v85, -v51
v_mad_f32 v78, v35, v51, v78
v_mad_f32 v71, v31, v51, v71
v_mac_f32_e32 v69, v27, v51
v_mad_f32 v34, v35, v45, v34
v_mad_f32 v33, v31, v45, v33
v_mac_f32_e32 v32, v27, v45
BB6_66: ; %Flow1240
; in Loop: Header=BB6_11 Depth=1
s_or_b64 exec, exec, s[38:39]
BB6_67: ; in Loop: Header=BB6_11 Depth=1
s_or_b64 exec, exec, s[34:35]
v_lshrrev_b32_e32 v27, 11, v65
v_and_b32_e32 v27, 1, v27
v_cmp_eq_u32_e32 vcc, 1, v27
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[34:35], exec, s[4:5]
; mask branch BB6_71
s_cbranch_execz BB6_71
BB6_68: ; in Loop: Header=BB6_11 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[79:82], v55 offset0:48 offset1:49
s_and_b64 s[4:5], exec, s[0:1]
v_cmp_ne_u32_e32 vcc, v62, v23
s_xor_b64 s[4:5], s[4:5], -1
s_or_b64 s[4:5], s[4:5], vcc
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v31, v73, v80
s_and_b64 s[8:9], exec, s[2:3]
s_or_b64 s[4:5], s[8:9], s[4:5]
v_subrev_f32_e32 v27, v72, v79
v_mul_f32_e32 v45, v31, v31
v_cndmask_b32_e64 v51, 0, 1.0, s[4:5]
v_subrev_f32_e32 v35, v74, v81
v_mac_f32_e32 v45, v27, v27
v_mac_f32_e32 v45, v35, v35
v_mul_f32_e32 v51, s22, v51
v_cmp_lt_f32_e32 vcc, v45, v51
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[38:39], exec, s[4:5]
; mask branch BB6_70
s_cbranch_execz BB6_70
BB6_69: ; in Loop: Header=BB6_11 Depth=1
v_max_f32_e32 v45, 0x34cd15ae, v45
v_mul_f32_e32 v80, v9, v45
v_mul_f32_e32 v76, v80, v80
v_mov_b32_e32 v77, 0x3a92b707
v_mov_b32_e32 v81, 0x3c739487
v_madak_f32_e32 v77, v77, v76, 0x3ded3cb2
v_mul_f32_e32 v51, v75, v82
v_mad_f32 v82, v77, v76, 1.0
v_mov_b32_e32 v77, 0xb2951928
v_madak_f32_e32 v81, v81, v76, 0x3f01e2bc
v_madak_f32_e32 v77, v77, v76, 0xb85ffb93
v_rsq_f32_e32 v79, v45
v_mac_f32_e32 v82, v80, v81
v_mov_b32_e32 v81, 0x35c55945
v_madak_f32_e32 v81, v81, v76, 0x3a83ca0c
v_madak_f32_e32 v77, v77, v76, 0xbc9ded90
s_mov_b32 m0, -1
v_lshrrev_b32_e32 v70, 11, v19
v_madak_f32_e32 v81, v81, v76, 0x3d8eaf3b
v_madak_f32_e32 v83, v77, v76, 0xbf409397
ds_read_b64 v[76:77], v56 offset:192
v_and_b32_e32 v70, 1, v70
v_cmp_eq_u32_e32 vcc, 1, v70
v_mac_f32_e32 v83, v80, v81
v_mul_f32_e32 v80, v79, v79
v_cndmask_b32_e64 v70, 0, 1.0, vcc
v_mul_f32_e32 v81, v80, v80
v_mul_f32_e32 v81, v70, v81
v_mul_f32_e32 v84, v80, v81
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v77, v68, v77
v_mul_f32_e32 v85, v84, v77
v_mad_f32 v81, v81, v80, s23
v_mad_f32 v85, v76, v67, -v85
v_mad_f32 v86, v84, v84, s26
v_mul_f32_e32 v76, v67, v76
v_mul_f32_e32 v81, 0xbe2aaaab, v81
v_mul_f32_e32 v77, v86, v77
v_mul_f32_e32 v76, v76, v81
v_mac_f32_e32 v76, 0x3daaaaaa, v77
v_rcp_f32_e32 v77, v82
v_mul_f32_e32 v45, s18, v45
v_mac_f32_e32 v8, v70, v76
v_mul_f32_e32 v45, v79, v45
v_mul_f32_e32 v77, v37, v77
v_mul_f32_e32 v76, v70, v80
v_mul_f32_e32 v77, v83, v77
v_mac_f32_e32 v77, v79, v76
v_and_b32_e32 v76, s27, v45
v_mov_b32_e32 v81, 0x3fa00000
v_cmp_gt_f32_e64 s[4:5], v81, v76
v_mul_f32_e32 v81, v76, v76
v_rcp_f32_e32 v82, v81
v_add_f32_e32 v83, -1.0, v76
v_mov_b32_e32 v86, 0xbd777f97
v_mov_b32_e32 v87, 0x6f800000
v_cndmask_b32_e64 v82, v82, v83, s[4:5]
v_mov_b32_e32 v83, 0x3f580000
v_cmp_gt_f32_e64 s[8:9], v83, v76
v_cndmask_b32_e64 v81, v82, v81, s[8:9]
v_mov_b32_e32 v83, 0xc11d077e
v_mov_b32_e32 v82, 0x4036db6e
v_madak_f32_e32 v83, v83, v81, 0xc2a2932b
v_cmp_gt_f32_e32 vcc, v82, v76
v_mov_b32_e32 v82, 0xc3f1c275
v_madak_f32_e32 v82, v82, v81, 0xc480230b
v_madak_f32_e32 v83, v81, v83, 0xc3389ae7
v_madak_f32_e32 v82, v81, v82, 0xc41f6441
v_madak_f32_e32 v83, v81, v83, 0xc322658c
v_madak_f32_e32 v82, v81, v82, 0xc320a2ea
v_madak_f32_e32 v83, v81, v83, 0xc2798057
v_madak_f32_e32 v82, v81, v82, 0xc18e104b
v_madak_f32_e32 v83, v81, v83, 0xc128f022
v_madak_f32_e32 v82, v81, v82, 0xbf4c9dd4
v_madak_f32_e32 v83, v81, v83, 0xbf31a0b7
v_madak_f32_e32 v82, v81, v82, 0xbc21a092
v_madak_f32_e32 v83, v81, v83, 0xbc21a093
v_madak_f32_e32 v86, v86, v81, 0x40d23f7c
v_cndmask_b32_e32 v82, v82, v83, vcc
v_mov_b32_e32 v83, 0xc1b38712
v_madak_f32_e32 v83, v83, v81, 0x43ed43a7
v_madak_f32_e32 v86, v81, v86, 0x42d9451f
v_madak_f32_e32 v83, v81, v83, 0x451f90ce
v_madak_f32_e32 v86, v81, v86, 0x43d6810b
v_madak_f32_e32 v83, v81, v83, 0x4547fdbb
v_madak_f32_e32 v86, v81, v86, 0x442158c9
v_madak_f32_e32 v83, v81, v83, 0x44c01759
v_madak_f32_e32 v86, v81, v86, 0x43d9486f
v_madak_f32_e32 v83, v81, v83, 0x43a2e571
v_madak_f32_e32 v86, v81, v86, 0x4309a863
v_madak_f32_e32 v83, v81, v83, 0x41f2b459
v_madak_f32_e32 v86, v81, v86, 0x419d35ce
v_cndmask_b32_e32 v83, v83, v86, vcc
v_mov_b32_e32 v86, 0xbb0df9c0
v_madak_f32_e32 v86, v86, v81, 0x3d1151b3
v_madak_f32_e32 v86, v81, v86, 0xbde31cc2
v_madak_f32_e32 v86, v81, v86, 0x3ea2fe54
v_madak_f32_e32 v86, v81, v86, 0xbebe9208
v_madak_f32_e32 v86, v81, v86, 0x3ed46805
v_madak_f32_e32 v86, v81, v86, 0xbb1acdc6
v_cndmask_b32_e64 v82, v82, v86, s[4:5]
v_mov_b32_e32 v86, 0x3c445aa3
v_madak_f32_e32 v86, v86, v81, 0x3c5f6e13
v_madak_f32_e32 v86, v81, v86, 0x3e013307
v_madak_f32_e32 v86, v81, v86, 0x3d931ae7
v_madak_f32_e32 v86, v81, v86, 0x3f0a5785
v_madak_f32_e32 v86, v81, v86, 0x3dd9f331
v_cndmask_b32_e64 v83, v83, v86, s[4:5]
v_mov_b32_e32 v86, 0xb684e21a
v_madak_f32_e32 v86, v86, v81, 0x390aee49
v_madak_f32_e32 v86, v81, v86, 0x3ba68116
v_madak_f32_e32 v86, v81, v86, 0x3d852a63
v_madak_f32_e32 v86, v81, v86, 0x3ecbbbce
v_cndmask_b32_e64 v83, v83, v86, s[8:9]
v_mad_f32 v83, v81, v83, 1.0
v_and_b32_e32 v90, s50, v45
v_mov_b32_e32 v91, 0xbf100000
v_cmp_gt_f32_e64 vcc, |v83|, v87
v_mov_b32_e32 v88, 0x2f800000
v_mad_f32 v91, v90, -v90, v91
v_cndmask_b32_e32 v89, 1.0, v88, vcc
v_cmp_gt_f32_e32 vcc, 0, v91
v_cndmask_b32_e64 v92, 0.5, -0.5, vcc
v_mov_b32_e32 v93, 0x3fb8aa3b
v_mac_f32_e32 v92, v93, v91
v_mov_b32_e32 v86, 0xb7c756b1
v_cvt_i32_f32_e32 v92, v92
v_madak_f32_e32 v86, v86, v81, 0xbbbd1489
v_madak_f32_e32 v86, v81, v86, 0xbce9528f
v_madak_f32_e32 v86, v81, v86, 0xbea66beb
v_mul_f32_e32 v83, v89, v83
v_madak_f32_e32 v81, v81, v86, 0x3e0375d4
v_rcp_f32_e32 v83, v83
v_cvt_f32_i32_e32 v86, v92
v_cndmask_b32_e64 v81, v82, v81, s[8:9]
v_mov_b32_e32 v82, 0xbf317180
v_mul_f32_e32 v81, v83, v81
v_mad_f32 v83, v82, v86, v91
v_mov_b32_e32 v94, 0xb717f7d1
v_mad_f32 v95, v94, v86, v83
v_mul_f32_e32 v96, v95, v95
v_mov_b32_e32 v97, 0xb5ddea0e
v_mov_b32_e32 v98, 0x3331bb4c
v_mad_f32 v99, v98, v96, v97
v_mov_b32_e32 v100, 0x388ab355
v_mad_f32 v99, v99, v96, v100
v_mov_b32_e32 v101, 0xbb360b61
v_mad_f32 v99, v99, v96, v101
v_mov_b32_e32 v102, 0x3e2aaaab
v_mad_f32 v99, v99, v96, v102
v_mad_f32 v96, -v96, v99, v95
v_mul_f32_e32 v95, v96, v95
v_sub_f32_e32 v96, 2.0, v96
v_cmp_gt_f32_e64 vcc, |v96|, v87
v_cndmask_b32_e32 v99, 1.0, v88, vcc
v_mul_f32_e32 v96, v99, v96
v_rcp_f32_e32 v96, v96
v_mul_f32_e32 v95, v96, v95
v_mul_f32_e32 v95, v95, v99
v_mad_f32 v86, -v86, v94, -v95
v_subrev_f32_e32 v83, v83, v86
v_lshlrev_b32_e32 v86, 23, v92
v_sub_f32_e32 v83, 1.0, v83
v_add_i32_e32 v83, vcc, v83, v86
v_mov_b32_e32 v86, 0xc2aeac4f
v_cmp_ge_f32_e32 vcc, v91, v86
v_mov_b32_e32 v92, 0x42b17218
v_cndmask_b32_e32 v83, 0, v83, vcc
v_cmp_lt_f32_e32 vcc, v91, v92
v_mov_b32_e32 v95, 0x7f800000
v_cndmask_b32_e32 v83, v95, v83, vcc
v_cmp_u_f32_e32 vcc, v91, v91
v_cndmask_b32_e32 v83, v83, v91, vcc
v_subrev_f32_e32 v91, v76, v90
v_mul_f32_e32 v96, v81, v89
v_add_f32_e32 v90, v76, v90
v_mad_f32 v90, v90, v91, v96
v_cmp_gt_f32_e32 vcc, 0, v90
v_cndmask_b32_e64 v91, 0.5, -0.5, vcc
v_mac_f32_e32 v91, v93, v90
v_cvt_i32_f32_e32 v91, v91
v_madak_f32_e32 v81, v89, v81, 0x3f58560b
v_cvt_f32_i32_e32 v93, v91
v_lshlrev_b32_e32 v91, 23, v91
v_mad_f32 v82, v82, v93, v90
v_mad_f32 v99, v94, v93, v82
v_mul_f32_e32 v103, v99, v99
v_mac_f32_e32 v97, v98, v103
v_mac_f32_e32 v100, v97, v103
v_mac_f32_e32 v101, v100, v103
v_mac_f32_e32 v102, v101, v103
v_mad_f32 v97, -v103, v102, v99
v_mul_f32_e32 v98, v97, v99
v_sub_f32_e32 v97, 2.0, v97
v_cmp_gt_f32_e64 vcc, |v97|, v87
v_cndmask_b32_e32 v99, 1.0, v88, vcc
v_mul_f32_e32 v97, v99, v97
v_rcp_f32_e32 v97, v97
v_mul_f32_e32 v97, v97, v98
v_mul_f32_e32 v97, v97, v99
v_mad_f32 v93, -v93, v94, -v97
v_subrev_f32_e32 v82, v82, v93
v_sub_f32_e32 v82, 1.0, v82
v_add_i32_e32 v82, vcc, v82, v91
v_cmp_ge_f32_e32 vcc, v90, v86
v_cndmask_b32_e32 v82, 0, v82, vcc
v_cmp_lt_f32_e32 vcc, v90, v92
v_cndmask_b32_e32 v82, v95, v82, vcc
v_cmp_u_f32_e32 vcc, v90, v90
v_cndmask_b32_e32 v82, v82, v90, vcc
v_mul_f32_e32 v82, v82, v83
v_mov_b32_e32 v83, 0x40c00000
v_cmp_gt_f32_e64 s[10:11], v83, v76
v_mov_b32_e32 v83, 0x31800000
v_cmp_gt_f32_e64 vcc, |v76|, v87
v_cmp_gt_f32_e64 s[12:13], v83, v76
v_cndmask_b32_e32 v83, 1.0, v88, vcc
v_mul_f32_e32 v76, v83, v76
v_rcp_f32_e32 v76, v76
v_cmp_u_f32_e32 vcc, v45, v45
v_mul_f32_e32 v76, v76, v82
v_mad_f32 v76, -v83, v76, 1.0
v_cndmask_b32_e64 v76, 1.0, v76, s[10:11]
v_cndmask_b32_e64 v76, v76, v81, s[4:5]
v_and_b32_e32 v81, s51, v45
v_or_b32_e32 v76, v81, v76
v_mad_f32 v81, v96, v45, v45
v_cndmask_b32_e64 v76, v76, v81, s[8:9]
v_mul_f32_e32 v81, 0x3f8375d4, v45
v_mac_f32_e32 v81, 0x41000000, v45
v_mul_f32_e32 v81, 0x3e000000, v81
v_cndmask_b32_e64 v76, v76, v81, s[12:13]
v_cndmask_b32_e32 v45, v76, v45, vcc
v_subrev_f32_e32 v45, v45, v70
v_mul_f32_e32 v70, s19, v70
v_mad_f32 v45, v79, v45, -v70
v_mul_f32_e32 v70, v80, v84
v_mac_f32_e32 v5, v45, v51
v_mul_f32_e32 v45, v85, v70
v_mad_f32 v45, v51, v77, -v45
v_mul_f32_e32 v51, v77, v51
v_mad_f32 v51, v70, v85, -v51
v_mad_f32 v78, v35, v51, v78
v_mad_f32 v71, v31, v51, v71
v_mac_f32_e32 v69, v27, v51
v_mad_f32 v30, v35, v45, v30
v_mad_f32 v29, v31, v45, v29
v_mac_f32_e32 v28, v27, v45
BB6_70: ; %Flow1239
; in Loop: Header=BB6_11 Depth=1
s_or_b64 exec, exec, s[38:39]
BB6_71: ; in Loop: Header=BB6_11 Depth=1
s_or_b64 exec, exec, s[34:35]
v_lshrrev_b32_e32 v27, 12, v65
v_and_b32_e32 v27, 1, v27
v_cmp_eq_u32_e32 vcc, 1, v27
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[34:35], exec, s[4:5]
; mask branch BB6_75
s_cbranch_execz BB6_75
BB6_72: ; in Loop: Header=BB6_11 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[79:82], v55 offset0:64 offset1:65
s_and_b64 s[4:5], exec, s[0:1]
v_cmp_ne_u32_e32 vcc, v61, v23
s_xor_b64 s[4:5], s[4:5], -1
s_or_b64 s[4:5], s[4:5], vcc
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v31, v73, v80
s_and_b64 s[8:9], exec, s[2:3]
s_or_b64 s[4:5], s[8:9], s[4:5]
v_subrev_f32_e32 v27, v72, v79
v_mul_f32_e32 v45, v31, v31
v_cndmask_b32_e64 v51, 0, 1.0, s[4:5]
v_subrev_f32_e32 v35, v74, v81
v_mac_f32_e32 v45, v27, v27
v_mac_f32_e32 v45, v35, v35
v_mul_f32_e32 v51, s22, v51
v_cmp_lt_f32_e32 vcc, v45, v51
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[38:39], exec, s[4:5]
; mask branch BB6_74
s_cbranch_execz BB6_74
BB6_73: ; in Loop: Header=BB6_11 Depth=1
v_max_f32_e32 v45, 0x34cd15ae, v45
v_mul_f32_e32 v80, v9, v45
v_mul_f32_e32 v76, v80, v80
v_mov_b32_e32 v77, 0x3a92b707
v_mov_b32_e32 v81, 0x3c739487
v_madak_f32_e32 v77, v77, v76, 0x3ded3cb2
v_mul_f32_e32 v51, v75, v82
v_mad_f32 v82, v77, v76, 1.0
v_mov_b32_e32 v77, 0xb2951928
v_madak_f32_e32 v81, v81, v76, 0x3f01e2bc
v_madak_f32_e32 v77, v77, v76, 0xb85ffb93
v_rsq_f32_e32 v79, v45
v_mac_f32_e32 v82, v80, v81
v_mov_b32_e32 v81, 0x35c55945
v_madak_f32_e32 v81, v81, v76, 0x3a83ca0c
v_madak_f32_e32 v77, v77, v76, 0xbc9ded90
s_mov_b32 m0, -1
v_lshrrev_b32_e32 v70, 12, v19
v_madak_f32_e32 v81, v81, v76, 0x3d8eaf3b
v_madak_f32_e32 v83, v77, v76, 0xbf409397
ds_read_b64 v[76:77], v56 offset:256
v_and_b32_e32 v70, 1, v70
v_cmp_eq_u32_e32 vcc, 1, v70
v_mac_f32_e32 v83, v80, v81
v_mul_f32_e32 v80, v79, v79
v_cndmask_b32_e64 v70, 0, 1.0, vcc
v_mul_f32_e32 v81, v80, v80
v_mul_f32_e32 v81, v70, v81
v_mul_f32_e32 v84, v80, v81
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v77, v68, v77
v_mul_f32_e32 v85, v84, v77
v_mad_f32 v81, v81, v80, s23
v_mad_f32 v85, v76, v67, -v85
v_mad_f32 v86, v84, v84, s26
v_mul_f32_e32 v76, v67, v76
v_mul_f32_e32 v81, 0xbe2aaaab, v81
v_mul_f32_e32 v77, v86, v77
v_mul_f32_e32 v76, v76, v81
v_mac_f32_e32 v76, 0x3daaaaaa, v77
v_rcp_f32_e32 v77, v82
v_mul_f32_e32 v45, s18, v45
v_mac_f32_e32 v8, v70, v76
v_mul_f32_e32 v45, v79, v45
v_mul_f32_e32 v77, v37, v77
v_mul_f32_e32 v76, v70, v80
v_mul_f32_e32 v77, v83, v77
v_mac_f32_e32 v77, v79, v76
v_and_b32_e32 v76, s27, v45
v_mov_b32_e32 v81, 0x3fa00000
v_cmp_gt_f32_e64 s[4:5], v81, v76
v_mul_f32_e32 v81, v76, v76
v_rcp_f32_e32 v82, v81
v_add_f32_e32 v83, -1.0, v76
v_mov_b32_e32 v86, 0xbd777f97
v_mov_b32_e32 v87, 0x6f800000
v_cndmask_b32_e64 v82, v82, v83, s[4:5]
v_mov_b32_e32 v83, 0x3f580000
v_cmp_gt_f32_e64 s[8:9], v83, v76
v_cndmask_b32_e64 v81, v82, v81, s[8:9]
v_mov_b32_e32 v83, 0xc11d077e
v_mov_b32_e32 v82, 0x4036db6e
v_madak_f32_e32 v83, v83, v81, 0xc2a2932b
v_cmp_gt_f32_e32 vcc, v82, v76
v_mov_b32_e32 v82, 0xc3f1c275
v_madak_f32_e32 v82, v82, v81, 0xc480230b
v_madak_f32_e32 v83, v81, v83, 0xc3389ae7
v_madak_f32_e32 v82, v81, v82, 0xc41f6441
v_madak_f32_e32 v83, v81, v83, 0xc322658c
v_madak_f32_e32 v82, v81, v82, 0xc320a2ea
v_madak_f32_e32 v83, v81, v83, 0xc2798057
v_madak_f32_e32 v82, v81, v82, 0xc18e104b
v_madak_f32_e32 v83, v81, v83, 0xc128f022
v_madak_f32_e32 v82, v81, v82, 0xbf4c9dd4
v_madak_f32_e32 v83, v81, v83, 0xbf31a0b7
v_madak_f32_e32 v82, v81, v82, 0xbc21a092
v_madak_f32_e32 v83, v81, v83, 0xbc21a093
v_madak_f32_e32 v86, v86, v81, 0x40d23f7c
v_cndmask_b32_e32 v82, v82, v83, vcc
v_mov_b32_e32 v83, 0xc1b38712
v_madak_f32_e32 v83, v83, v81, 0x43ed43a7
v_madak_f32_e32 v86, v81, v86, 0x42d9451f
v_madak_f32_e32 v83, v81, v83, 0x451f90ce
v_madak_f32_e32 v86, v81, v86, 0x43d6810b
v_madak_f32_e32 v83, v81, v83, 0x4547fdbb
v_madak_f32_e32 v86, v81, v86, 0x442158c9
v_madak_f32_e32 v83, v81, v83, 0x44c01759
v_madak_f32_e32 v86, v81, v86, 0x43d9486f
v_madak_f32_e32 v83, v81, v83, 0x43a2e571
v_madak_f32_e32 v86, v81, v86, 0x4309a863
v_madak_f32_e32 v83, v81, v83, 0x41f2b459
v_madak_f32_e32 v86, v81, v86, 0x419d35ce
v_cndmask_b32_e32 v83, v83, v86, vcc
v_mov_b32_e32 v86, 0xbb0df9c0
v_madak_f32_e32 v86, v86, v81, 0x3d1151b3
v_madak_f32_e32 v86, v81, v86, 0xbde31cc2
v_madak_f32_e32 v86, v81, v86, 0x3ea2fe54
v_madak_f32_e32 v86, v81, v86, 0xbebe9208
v_madak_f32_e32 v86, v81, v86, 0x3ed46805
v_madak_f32_e32 v86, v81, v86, 0xbb1acdc6
v_cndmask_b32_e64 v82, v82, v86, s[4:5]
v_mov_b32_e32 v86, 0x3c445aa3
v_madak_f32_e32 v86, v86, v81, 0x3c5f6e13
v_madak_f32_e32 v86, v81, v86, 0x3e013307
v_madak_f32_e32 v86, v81, v86, 0x3d931ae7
v_madak_f32_e32 v86, v81, v86, 0x3f0a5785
v_madak_f32_e32 v86, v81, v86, 0x3dd9f331
v_cndmask_b32_e64 v83, v83, v86, s[4:5]
v_mov_b32_e32 v86, 0xb684e21a
v_madak_f32_e32 v86, v86, v81, 0x390aee49
v_madak_f32_e32 v86, v81, v86, 0x3ba68116
v_madak_f32_e32 v86, v81, v86, 0x3d852a63
v_madak_f32_e32 v86, v81, v86, 0x3ecbbbce
v_cndmask_b32_e64 v83, v83, v86, s[8:9]
v_mad_f32 v83, v81, v83, 1.0
v_and_b32_e32 v90, s50, v45
v_mov_b32_e32 v91, 0xbf100000
v_cmp_gt_f32_e64 vcc, |v83|, v87
v_mov_b32_e32 v88, 0x2f800000
v_mad_f32 v91, v90, -v90, v91
v_cndmask_b32_e32 v89, 1.0, v88, vcc
v_cmp_gt_f32_e32 vcc, 0, v91
v_cndmask_b32_e64 v92, 0.5, -0.5, vcc
v_mov_b32_e32 v93, 0x3fb8aa3b
v_mac_f32_e32 v92, v93, v91
v_mov_b32_e32 v86, 0xb7c756b1
v_cvt_i32_f32_e32 v92, v92
v_madak_f32_e32 v86, v86, v81, 0xbbbd1489
v_madak_f32_e32 v86, v81, v86, 0xbce9528f
v_madak_f32_e32 v86, v81, v86, 0xbea66beb
v_mul_f32_e32 v83, v89, v83
v_madak_f32_e32 v81, v81, v86, 0x3e0375d4
v_rcp_f32_e32 v83, v83
v_cvt_f32_i32_e32 v86, v92
v_cndmask_b32_e64 v81, v82, v81, s[8:9]
v_mov_b32_e32 v82, 0xbf317180
v_mul_f32_e32 v81, v83, v81
v_mad_f32 v83, v82, v86, v91
v_mov_b32_e32 v94, 0xb717f7d1
v_mad_f32 v95, v94, v86, v83
v_mul_f32_e32 v96, v95, v95
v_mov_b32_e32 v97, 0xb5ddea0e
v_mov_b32_e32 v98, 0x3331bb4c
v_mad_f32 v99, v98, v96, v97
v_mov_b32_e32 v100, 0x388ab355
v_mad_f32 v99, v99, v96, v100
v_mov_b32_e32 v101, 0xbb360b61
v_mad_f32 v99, v99, v96, v101
v_mov_b32_e32 v102, 0x3e2aaaab
v_mad_f32 v99, v99, v96, v102
v_mad_f32 v96, -v96, v99, v95
v_mul_f32_e32 v95, v96, v95
v_sub_f32_e32 v96, 2.0, v96
v_cmp_gt_f32_e64 vcc, |v96|, v87
v_cndmask_b32_e32 v99, 1.0, v88, vcc
v_mul_f32_e32 v96, v99, v96
v_rcp_f32_e32 v96, v96
v_mul_f32_e32 v95, v96, v95
v_mul_f32_e32 v95, v95, v99
v_mad_f32 v86, -v86, v94, -v95
v_subrev_f32_e32 v83, v83, v86
v_lshlrev_b32_e32 v86, 23, v92
v_sub_f32_e32 v83, 1.0, v83
v_add_i32_e32 v83, vcc, v83, v86
v_mov_b32_e32 v86, 0xc2aeac4f
v_cmp_ge_f32_e32 vcc, v91, v86
v_mov_b32_e32 v92, 0x42b17218
v_cndmask_b32_e32 v83, 0, v83, vcc
v_cmp_lt_f32_e32 vcc, v91, v92
v_mov_b32_e32 v95, 0x7f800000
v_cndmask_b32_e32 v83, v95, v83, vcc
v_cmp_u_f32_e32 vcc, v91, v91
v_cndmask_b32_e32 v83, v83, v91, vcc
v_subrev_f32_e32 v91, v76, v90
v_mul_f32_e32 v96, v81, v89
v_add_f32_e32 v90, v76, v90
v_mad_f32 v90, v90, v91, v96
v_cmp_gt_f32_e32 vcc, 0, v90
v_cndmask_b32_e64 v91, 0.5, -0.5, vcc
v_mac_f32_e32 v91, v93, v90
v_cvt_i32_f32_e32 v91, v91
v_madak_f32_e32 v81, v89, v81, 0x3f58560b
v_cvt_f32_i32_e32 v93, v91
v_lshlrev_b32_e32 v91, 23, v91
v_mad_f32 v82, v82, v93, v90
v_mad_f32 v99, v94, v93, v82
v_mul_f32_e32 v103, v99, v99
v_mac_f32_e32 v97, v98, v103
v_mac_f32_e32 v100, v97, v103
v_mac_f32_e32 v101, v100, v103
v_mac_f32_e32 v102, v101, v103
v_mad_f32 v97, -v103, v102, v99
v_mul_f32_e32 v98, v97, v99
v_sub_f32_e32 v97, 2.0, v97
v_cmp_gt_f32_e64 vcc, |v97|, v87
v_cndmask_b32_e32 v99, 1.0, v88, vcc
v_mul_f32_e32 v97, v99, v97
v_rcp_f32_e32 v97, v97
v_mul_f32_e32 v97, v97, v98
v_mul_f32_e32 v97, v97, v99
v_mad_f32 v93, -v93, v94, -v97
v_subrev_f32_e32 v82, v82, v93
v_sub_f32_e32 v82, 1.0, v82
v_add_i32_e32 v82, vcc, v82, v91
v_cmp_ge_f32_e32 vcc, v90, v86
v_cndmask_b32_e32 v82, 0, v82, vcc
v_cmp_lt_f32_e32 vcc, v90, v92
v_cndmask_b32_e32 v82, v95, v82, vcc
v_cmp_u_f32_e32 vcc, v90, v90
v_cndmask_b32_e32 v82, v82, v90, vcc
v_mul_f32_e32 v82, v82, v83
v_mov_b32_e32 v83, 0x40c00000
v_cmp_gt_f32_e64 s[10:11], v83, v76
v_mov_b32_e32 v83, 0x31800000
v_cmp_gt_f32_e64 vcc, |v76|, v87
v_cmp_gt_f32_e64 s[12:13], v83, v76
v_cndmask_b32_e32 v83, 1.0, v88, vcc
v_mul_f32_e32 v76, v83, v76
v_rcp_f32_e32 v76, v76
v_cmp_u_f32_e32 vcc, v45, v45
v_mul_f32_e32 v76, v76, v82
v_mad_f32 v76, -v83, v76, 1.0
v_cndmask_b32_e64 v76, 1.0, v76, s[10:11]
v_cndmask_b32_e64 v76, v76, v81, s[4:5]
v_and_b32_e32 v81, s51, v45
v_or_b32_e32 v76, v81, v76
v_mad_f32 v81, v96, v45, v45
v_cndmask_b32_e64 v76, v76, v81, s[8:9]
v_mul_f32_e32 v81, 0x3f8375d4, v45
v_mac_f32_e32 v81, 0x41000000, v45
v_mul_f32_e32 v81, 0x3e000000, v81
v_cndmask_b32_e64 v76, v76, v81, s[12:13]
v_cndmask_b32_e32 v45, v76, v45, vcc
v_subrev_f32_e32 v45, v45, v70
v_mul_f32_e32 v70, s19, v70
v_mad_f32 v45, v79, v45, -v70
v_mul_f32_e32 v70, v80, v84
v_mac_f32_e32 v5, v45, v51
v_mul_f32_e32 v45, v85, v70
v_mad_f32 v45, v51, v77, -v45
v_mul_f32_e32 v51, v77, v51
v_mad_f32 v51, v70, v85, -v51
v_mad_f32 v78, v35, v51, v78
v_mad_f32 v71, v31, v51, v71
v_mac_f32_e32 v69, v27, v51
v_mad_f32 v26, v35, v45, v26
v_mad_f32 v25, v31, v45, v25
v_mac_f32_e32 v24, v27, v45
BB6_74: ; %Flow1238
; in Loop: Header=BB6_11 Depth=1
s_or_b64 exec, exec, s[38:39]
BB6_75: ; in Loop: Header=BB6_11 Depth=1
s_or_b64 exec, exec, s[34:35]
v_lshrrev_b32_e32 v27, 13, v65
v_and_b32_e32 v27, 1, v27
v_cmp_eq_u32_e32 vcc, 1, v27
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[34:35], exec, s[4:5]
; mask branch BB6_79
s_cbranch_execz BB6_79
BB6_76: ; in Loop: Header=BB6_11 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[79:82], v55 offset0:80 offset1:81
s_and_b64 s[4:5], exec, s[0:1]
v_cmp_ne_u32_e32 vcc, v60, v23
s_xor_b64 s[4:5], s[4:5], -1
s_or_b64 s[4:5], s[4:5], vcc
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v31, v73, v80
s_and_b64 s[8:9], exec, s[2:3]
s_or_b64 s[4:5], s[8:9], s[4:5]
v_subrev_f32_e32 v27, v72, v79
v_mul_f32_e32 v45, v31, v31
v_cndmask_b32_e64 v51, 0, 1.0, s[4:5]
v_subrev_f32_e32 v35, v74, v81
v_mac_f32_e32 v45, v27, v27
v_mac_f32_e32 v45, v35, v35
v_mul_f32_e32 v51, s22, v51
v_cmp_lt_f32_e32 vcc, v45, v51
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[38:39], exec, s[4:5]
; mask branch BB6_78
s_cbranch_execz BB6_78
BB6_77: ; in Loop: Header=BB6_11 Depth=1
v_max_f32_e32 v45, 0x34cd15ae, v45
v_mul_f32_e32 v80, v9, v45
v_mul_f32_e32 v76, v80, v80
v_mov_b32_e32 v77, 0x3a92b707
v_mov_b32_e32 v81, 0x3c739487
v_madak_f32_e32 v77, v77, v76, 0x3ded3cb2
v_mul_f32_e32 v51, v75, v82
v_mad_f32 v82, v77, v76, 1.0
v_mov_b32_e32 v77, 0xb2951928
v_madak_f32_e32 v81, v81, v76, 0x3f01e2bc
v_madak_f32_e32 v77, v77, v76, 0xb85ffb93
v_rsq_f32_e32 v79, v45
v_mac_f32_e32 v82, v80, v81
v_mov_b32_e32 v81, 0x35c55945
v_madak_f32_e32 v81, v81, v76, 0x3a83ca0c
v_madak_f32_e32 v77, v77, v76, 0xbc9ded90
s_mov_b32 m0, -1
v_lshrrev_b32_e32 v70, 13, v19
v_madak_f32_e32 v81, v81, v76, 0x3d8eaf3b
v_madak_f32_e32 v83, v77, v76, 0xbf409397
ds_read_b64 v[76:77], v56 offset:320
v_and_b32_e32 v70, 1, v70
v_cmp_eq_u32_e32 vcc, 1, v70
v_mac_f32_e32 v83, v80, v81
v_mul_f32_e32 v80, v79, v79
v_cndmask_b32_e64 v70, 0, 1.0, vcc
v_mul_f32_e32 v81, v80, v80
v_mul_f32_e32 v81, v70, v81
v_mul_f32_e32 v84, v80, v81
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v77, v68, v77
v_mul_f32_e32 v85, v84, v77
v_mad_f32 v81, v81, v80, s23
v_mad_f32 v85, v76, v67, -v85
v_mad_f32 v86, v84, v84, s26
v_mul_f32_e32 v76, v67, v76
v_mul_f32_e32 v81, 0xbe2aaaab, v81
v_mul_f32_e32 v77, v86, v77
v_mul_f32_e32 v76, v76, v81
v_mac_f32_e32 v76, 0x3daaaaaa, v77
v_rcp_f32_e32 v77, v82
v_mul_f32_e32 v45, s18, v45
v_mac_f32_e32 v8, v70, v76
v_mul_f32_e32 v45, v79, v45
v_mul_f32_e32 v77, v37, v77
v_mul_f32_e32 v76, v70, v80
v_mul_f32_e32 v77, v83, v77
v_mac_f32_e32 v77, v79, v76
v_and_b32_e32 v76, s27, v45
v_mov_b32_e32 v81, 0x3fa00000
v_cmp_gt_f32_e64 s[4:5], v81, v76
v_mul_f32_e32 v81, v76, v76
v_rcp_f32_e32 v82, v81
v_add_f32_e32 v83, -1.0, v76
v_mov_b32_e32 v86, 0xbd777f97
v_mov_b32_e32 v87, 0x6f800000
v_cndmask_b32_e64 v82, v82, v83, s[4:5]
v_mov_b32_e32 v83, 0x3f580000
v_cmp_gt_f32_e64 s[8:9], v83, v76
v_cndmask_b32_e64 v81, v82, v81, s[8:9]
v_mov_b32_e32 v83, 0xc11d077e
v_mov_b32_e32 v82, 0x4036db6e
v_madak_f32_e32 v83, v83, v81, 0xc2a2932b
v_cmp_gt_f32_e32 vcc, v82, v76
v_mov_b32_e32 v82, 0xc3f1c275
v_madak_f32_e32 v82, v82, v81, 0xc480230b
v_madak_f32_e32 v83, v81, v83, 0xc3389ae7
v_madak_f32_e32 v82, v81, v82, 0xc41f6441
v_madak_f32_e32 v83, v81, v83, 0xc322658c
v_madak_f32_e32 v82, v81, v82, 0xc320a2ea
v_madak_f32_e32 v83, v81, v83, 0xc2798057
v_madak_f32_e32 v82, v81, v82, 0xc18e104b
v_madak_f32_e32 v83, v81, v83, 0xc128f022
v_madak_f32_e32 v82, v81, v82, 0xbf4c9dd4
v_madak_f32_e32 v83, v81, v83, 0xbf31a0b7
v_madak_f32_e32 v82, v81, v82, 0xbc21a092
v_madak_f32_e32 v83, v81, v83, 0xbc21a093
v_madak_f32_e32 v86, v86, v81, 0x40d23f7c
v_cndmask_b32_e32 v82, v82, v83, vcc
v_mov_b32_e32 v83, 0xc1b38712
v_madak_f32_e32 v83, v83, v81, 0x43ed43a7
v_madak_f32_e32 v86, v81, v86, 0x42d9451f
v_madak_f32_e32 v83, v81, v83, 0x451f90ce
v_madak_f32_e32 v86, v81, v86, 0x43d6810b
v_madak_f32_e32 v83, v81, v83, 0x4547fdbb
v_madak_f32_e32 v86, v81, v86, 0x442158c9
v_madak_f32_e32 v83, v81, v83, 0x44c01759
v_madak_f32_e32 v86, v81, v86, 0x43d9486f
v_madak_f32_e32 v83, v81, v83, 0x43a2e571
v_madak_f32_e32 v86, v81, v86, 0x4309a863
v_madak_f32_e32 v83, v81, v83, 0x41f2b459
v_madak_f32_e32 v86, v81, v86, 0x419d35ce
v_cndmask_b32_e32 v83, v83, v86, vcc
v_mov_b32_e32 v86, 0xbb0df9c0
v_madak_f32_e32 v86, v86, v81, 0x3d1151b3
v_madak_f32_e32 v86, v81, v86, 0xbde31cc2
v_madak_f32_e32 v86, v81, v86, 0x3ea2fe54
v_madak_f32_e32 v86, v81, v86, 0xbebe9208
v_madak_f32_e32 v86, v81, v86, 0x3ed46805
v_madak_f32_e32 v86, v81, v86, 0xbb1acdc6
v_cndmask_b32_e64 v82, v82, v86, s[4:5]
v_mov_b32_e32 v86, 0x3c445aa3
v_madak_f32_e32 v86, v86, v81, 0x3c5f6e13
v_madak_f32_e32 v86, v81, v86, 0x3e013307
v_madak_f32_e32 v86, v81, v86, 0x3d931ae7
v_madak_f32_e32 v86, v81, v86, 0x3f0a5785
v_madak_f32_e32 v86, v81, v86, 0x3dd9f331
v_cndmask_b32_e64 v83, v83, v86, s[4:5]
v_mov_b32_e32 v86, 0xb684e21a
v_madak_f32_e32 v86, v86, v81, 0x390aee49
v_madak_f32_e32 v86, v81, v86, 0x3ba68116
v_madak_f32_e32 v86, v81, v86, 0x3d852a63
v_madak_f32_e32 v86, v81, v86, 0x3ecbbbce
v_cndmask_b32_e64 v83, v83, v86, s[8:9]
v_mad_f32 v83, v81, v83, 1.0
v_and_b32_e32 v90, s50, v45
v_mov_b32_e32 v91, 0xbf100000
v_cmp_gt_f32_e64 vcc, |v83|, v87
v_mov_b32_e32 v88, 0x2f800000
v_mad_f32 v91, v90, -v90, v91
v_cndmask_b32_e32 v89, 1.0, v88, vcc
v_cmp_gt_f32_e32 vcc, 0, v91
v_cndmask_b32_e64 v92, 0.5, -0.5, vcc
v_mov_b32_e32 v93, 0x3fb8aa3b
v_mac_f32_e32 v92, v93, v91
v_mov_b32_e32 v86, 0xb7c756b1
v_cvt_i32_f32_e32 v92, v92
v_madak_f32_e32 v86, v86, v81, 0xbbbd1489
v_madak_f32_e32 v86, v81, v86, 0xbce9528f
v_madak_f32_e32 v86, v81, v86, 0xbea66beb
v_mul_f32_e32 v83, v89, v83
v_madak_f32_e32 v81, v81, v86, 0x3e0375d4
v_rcp_f32_e32 v83, v83
v_cvt_f32_i32_e32 v86, v92
v_cndmask_b32_e64 v81, v82, v81, s[8:9]
v_mov_b32_e32 v82, 0xbf317180
v_mul_f32_e32 v81, v83, v81
v_mad_f32 v83, v82, v86, v91
v_mov_b32_e32 v94, 0xb717f7d1
v_mad_f32 v95, v94, v86, v83
v_mul_f32_e32 v96, v95, v95
v_mov_b32_e32 v97, 0xb5ddea0e
v_mov_b32_e32 v98, 0x3331bb4c
v_mad_f32 v99, v98, v96, v97
v_mov_b32_e32 v100, 0x388ab355
v_mad_f32 v99, v99, v96, v100
v_mov_b32_e32 v101, 0xbb360b61
v_mad_f32 v99, v99, v96, v101
v_mov_b32_e32 v102, 0x3e2aaaab
v_mad_f32 v99, v99, v96, v102
v_mad_f32 v96, -v96, v99, v95
v_mul_f32_e32 v95, v96, v95
v_sub_f32_e32 v96, 2.0, v96
v_cmp_gt_f32_e64 vcc, |v96|, v87
v_cndmask_b32_e32 v99, 1.0, v88, vcc
v_mul_f32_e32 v96, v99, v96
v_rcp_f32_e32 v96, v96
v_mul_f32_e32 v95, v96, v95
v_mul_f32_e32 v95, v95, v99
v_mad_f32 v86, -v86, v94, -v95
v_subrev_f32_e32 v83, v83, v86
v_lshlrev_b32_e32 v86, 23, v92
v_sub_f32_e32 v83, 1.0, v83
v_add_i32_e32 v83, vcc, v83, v86
v_mov_b32_e32 v86, 0xc2aeac4f
v_cmp_ge_f32_e32 vcc, v91, v86
v_mov_b32_e32 v92, 0x42b17218
v_cndmask_b32_e32 v83, 0, v83, vcc
v_cmp_lt_f32_e32 vcc, v91, v92
v_mov_b32_e32 v95, 0x7f800000
v_cndmask_b32_e32 v83, v95, v83, vcc
v_cmp_u_f32_e32 vcc, v91, v91
v_cndmask_b32_e32 v83, v83, v91, vcc
v_subrev_f32_e32 v91, v76, v90
v_mul_f32_e32 v96, v81, v89
v_add_f32_e32 v90, v76, v90
v_mad_f32 v90, v90, v91, v96
v_cmp_gt_f32_e32 vcc, 0, v90
v_cndmask_b32_e64 v91, 0.5, -0.5, vcc
v_mac_f32_e32 v91, v93, v90
v_cvt_i32_f32_e32 v91, v91
v_madak_f32_e32 v81, v89, v81, 0x3f58560b
v_cvt_f32_i32_e32 v93, v91
v_lshlrev_b32_e32 v91, 23, v91
v_mad_f32 v82, v82, v93, v90
v_mad_f32 v99, v94, v93, v82
v_mul_f32_e32 v103, v99, v99
v_mac_f32_e32 v97, v98, v103
v_mac_f32_e32 v100, v97, v103
v_mac_f32_e32 v101, v100, v103
v_mac_f32_e32 v102, v101, v103
v_mad_f32 v97, -v103, v102, v99
v_mul_f32_e32 v98, v97, v99
v_sub_f32_e32 v97, 2.0, v97
v_cmp_gt_f32_e64 vcc, |v97|, v87
v_cndmask_b32_e32 v99, 1.0, v88, vcc
v_mul_f32_e32 v97, v99, v97
v_rcp_f32_e32 v97, v97
v_mul_f32_e32 v97, v97, v98
v_mul_f32_e32 v97, v97, v99
v_mad_f32 v93, -v93, v94, -v97
v_subrev_f32_e32 v82, v82, v93
v_sub_f32_e32 v82, 1.0, v82
v_add_i32_e32 v82, vcc, v82, v91
v_cmp_ge_f32_e32 vcc, v90, v86
v_cndmask_b32_e32 v82, 0, v82, vcc
v_cmp_lt_f32_e32 vcc, v90, v92
v_cndmask_b32_e32 v82, v95, v82, vcc
v_cmp_u_f32_e32 vcc, v90, v90
v_cndmask_b32_e32 v82, v82, v90, vcc
v_mul_f32_e32 v82, v82, v83
v_mov_b32_e32 v83, 0x40c00000
v_cmp_gt_f32_e64 s[10:11], v83, v76
v_mov_b32_e32 v83, 0x31800000
v_cmp_gt_f32_e64 vcc, |v76|, v87
v_cmp_gt_f32_e64 s[12:13], v83, v76
v_cndmask_b32_e32 v83, 1.0, v88, vcc
v_mul_f32_e32 v76, v83, v76
v_rcp_f32_e32 v76, v76
v_cmp_u_f32_e32 vcc, v45, v45
v_mul_f32_e32 v76, v76, v82
v_mad_f32 v76, -v83, v76, 1.0
v_cndmask_b32_e64 v76, 1.0, v76, s[10:11]
v_cndmask_b32_e64 v76, v76, v81, s[4:5]
v_and_b32_e32 v81, s51, v45
v_or_b32_e32 v76, v81, v76
v_mad_f32 v81, v96, v45, v45
v_cndmask_b32_e64 v76, v76, v81, s[8:9]
v_mul_f32_e32 v81, 0x3f8375d4, v45
v_mac_f32_e32 v81, 0x41000000, v45
v_mul_f32_e32 v81, 0x3e000000, v81
v_cndmask_b32_e64 v76, v76, v81, s[12:13]
v_cndmask_b32_e32 v45, v76, v45, vcc
v_subrev_f32_e32 v45, v45, v70
v_mul_f32_e32 v70, s19, v70
v_mad_f32 v45, v79, v45, -v70
v_mul_f32_e32 v70, v80, v84
v_mac_f32_e32 v5, v45, v51
v_mul_f32_e32 v45, v85, v70
v_mad_f32 v45, v51, v77, -v45
v_mul_f32_e32 v51, v77, v51
v_mad_f32 v51, v70, v85, -v51
v_mad_f32 v78, v35, v51, v78
v_mad_f32 v71, v31, v51, v71
v_mac_f32_e32 v69, v27, v51
v_mad_f32 v22, v35, v45, v22
v_mad_f32 v21, v31, v45, v21
v_mac_f32_e32 v20, v27, v45
BB6_78: ; %Flow1237
; in Loop: Header=BB6_11 Depth=1
s_or_b64 exec, exec, s[38:39]
BB6_79: ; in Loop: Header=BB6_11 Depth=1
s_or_b64 exec, exec, s[34:35]
v_lshrrev_b32_e32 v27, 14, v65
v_and_b32_e32 v27, 1, v27
v_cmp_eq_u32_e32 vcc, 1, v27
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[34:35], exec, s[4:5]
; mask branch BB6_83
s_cbranch_execz BB6_83
BB6_80: ; in Loop: Header=BB6_11 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[79:82], v55 offset0:96 offset1:97
s_and_b64 s[4:5], exec, s[0:1]
v_cmp_ne_u32_e32 vcc, v59, v23
s_xor_b64 s[4:5], s[4:5], -1
s_or_b64 s[4:5], s[4:5], vcc
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v31, v73, v80
s_and_b64 s[8:9], exec, s[2:3]
s_or_b64 s[4:5], s[8:9], s[4:5]
v_subrev_f32_e32 v27, v72, v79
v_mul_f32_e32 v45, v31, v31
v_cndmask_b32_e64 v51, 0, 1.0, s[4:5]
v_subrev_f32_e32 v35, v74, v81
v_mac_f32_e32 v45, v27, v27
v_mac_f32_e32 v45, v35, v35
v_mul_f32_e32 v51, s22, v51
v_cmp_lt_f32_e32 vcc, v45, v51
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[38:39], exec, s[4:5]
; mask branch BB6_82
s_cbranch_execz BB6_82
BB6_81: ; in Loop: Header=BB6_11 Depth=1
v_max_f32_e32 v45, 0x34cd15ae, v45
v_mul_f32_e32 v80, v9, v45
v_mul_f32_e32 v76, v80, v80
v_mov_b32_e32 v77, 0x3a92b707
v_mov_b32_e32 v81, 0x3c739487
v_madak_f32_e32 v77, v77, v76, 0x3ded3cb2
v_mul_f32_e32 v51, v75, v82
v_mad_f32 v82, v77, v76, 1.0
v_mov_b32_e32 v77, 0xb2951928
v_madak_f32_e32 v81, v81, v76, 0x3f01e2bc
v_madak_f32_e32 v77, v77, v76, 0xb85ffb93
v_rsq_f32_e32 v79, v45
v_mac_f32_e32 v82, v80, v81
v_mov_b32_e32 v81, 0x35c55945
v_madak_f32_e32 v81, v81, v76, 0x3a83ca0c
v_madak_f32_e32 v77, v77, v76, 0xbc9ded90
s_mov_b32 m0, -1
v_lshrrev_b32_e32 v70, 14, v19
v_madak_f32_e32 v81, v81, v76, 0x3d8eaf3b
v_madak_f32_e32 v83, v77, v76, 0xbf409397
ds_read_b64 v[76:77], v56 offset:384
v_and_b32_e32 v70, 1, v70
v_cmp_eq_u32_e32 vcc, 1, v70
v_mac_f32_e32 v83, v80, v81
v_mul_f32_e32 v80, v79, v79
v_cndmask_b32_e64 v70, 0, 1.0, vcc
v_mul_f32_e32 v81, v80, v80
v_mul_f32_e32 v81, v70, v81
v_mul_f32_e32 v84, v80, v81
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v77, v68, v77
v_mul_f32_e32 v85, v84, v77
v_mad_f32 v81, v81, v80, s23
v_mad_f32 v85, v76, v67, -v85
v_mad_f32 v86, v84, v84, s26
v_mul_f32_e32 v76, v67, v76
v_mul_f32_e32 v81, 0xbe2aaaab, v81
v_mul_f32_e32 v77, v86, v77
v_mul_f32_e32 v76, v76, v81
v_mac_f32_e32 v76, 0x3daaaaaa, v77
v_rcp_f32_e32 v77, v82
v_mul_f32_e32 v45, s18, v45
v_mac_f32_e32 v8, v70, v76
v_mul_f32_e32 v45, v79, v45
v_mul_f32_e32 v77, v37, v77
v_mul_f32_e32 v76, v70, v80
v_mul_f32_e32 v77, v83, v77
v_mac_f32_e32 v77, v79, v76
v_and_b32_e32 v76, s27, v45
v_mov_b32_e32 v81, 0x3fa00000
v_cmp_gt_f32_e64 s[4:5], v81, v76
v_mul_f32_e32 v81, v76, v76
v_rcp_f32_e32 v82, v81
v_add_f32_e32 v83, -1.0, v76
v_mov_b32_e32 v86, 0xbd777f97
v_mov_b32_e32 v87, 0x6f800000
v_cndmask_b32_e64 v82, v82, v83, s[4:5]
v_mov_b32_e32 v83, 0x3f580000
v_cmp_gt_f32_e64 s[8:9], v83, v76
v_cndmask_b32_e64 v81, v82, v81, s[8:9]
v_mov_b32_e32 v83, 0xc11d077e
v_mov_b32_e32 v82, 0x4036db6e
v_madak_f32_e32 v83, v83, v81, 0xc2a2932b
v_cmp_gt_f32_e32 vcc, v82, v76
v_mov_b32_e32 v82, 0xc3f1c275
v_madak_f32_e32 v82, v82, v81, 0xc480230b
v_madak_f32_e32 v83, v81, v83, 0xc3389ae7
v_madak_f32_e32 v82, v81, v82, 0xc41f6441
v_madak_f32_e32 v83, v81, v83, 0xc322658c
v_madak_f32_e32 v82, v81, v82, 0xc320a2ea
v_madak_f32_e32 v83, v81, v83, 0xc2798057
v_madak_f32_e32 v82, v81, v82, 0xc18e104b
v_madak_f32_e32 v83, v81, v83, 0xc128f022
v_madak_f32_e32 v82, v81, v82, 0xbf4c9dd4
v_madak_f32_e32 v83, v81, v83, 0xbf31a0b7
v_madak_f32_e32 v82, v81, v82, 0xbc21a092
v_madak_f32_e32 v83, v81, v83, 0xbc21a093
v_madak_f32_e32 v86, v86, v81, 0x40d23f7c
v_cndmask_b32_e32 v82, v82, v83, vcc
v_mov_b32_e32 v83, 0xc1b38712
v_madak_f32_e32 v83, v83, v81, 0x43ed43a7
v_madak_f32_e32 v86, v81, v86, 0x42d9451f
v_madak_f32_e32 v83, v81, v83, 0x451f90ce
v_madak_f32_e32 v86, v81, v86, 0x43d6810b
v_madak_f32_e32 v83, v81, v83, 0x4547fdbb
v_madak_f32_e32 v86, v81, v86, 0x442158c9
v_madak_f32_e32 v83, v81, v83, 0x44c01759
v_madak_f32_e32 v86, v81, v86, 0x43d9486f
v_madak_f32_e32 v83, v81, v83, 0x43a2e571
v_madak_f32_e32 v86, v81, v86, 0x4309a863
v_madak_f32_e32 v83, v81, v83, 0x41f2b459
v_madak_f32_e32 v86, v81, v86, 0x419d35ce
v_cndmask_b32_e32 v83, v83, v86, vcc
v_mov_b32_e32 v86, 0xbb0df9c0
v_madak_f32_e32 v86, v86, v81, 0x3d1151b3
v_madak_f32_e32 v86, v81, v86, 0xbde31cc2
v_madak_f32_e32 v86, v81, v86, 0x3ea2fe54
v_madak_f32_e32 v86, v81, v86, 0xbebe9208
v_madak_f32_e32 v86, v81, v86, 0x3ed46805
v_madak_f32_e32 v86, v81, v86, 0xbb1acdc6
v_cndmask_b32_e64 v82, v82, v86, s[4:5]
v_mov_b32_e32 v86, 0x3c445aa3
v_madak_f32_e32 v86, v86, v81, 0x3c5f6e13
v_madak_f32_e32 v86, v81, v86, 0x3e013307
v_madak_f32_e32 v86, v81, v86, 0x3d931ae7
v_madak_f32_e32 v86, v81, v86, 0x3f0a5785
v_madak_f32_e32 v86, v81, v86, 0x3dd9f331
v_cndmask_b32_e64 v83, v83, v86, s[4:5]
v_mov_b32_e32 v86, 0xb684e21a
v_madak_f32_e32 v86, v86, v81, 0x390aee49
v_madak_f32_e32 v86, v81, v86, 0x3ba68116
v_madak_f32_e32 v86, v81, v86, 0x3d852a63
v_madak_f32_e32 v86, v81, v86, 0x3ecbbbce
v_cndmask_b32_e64 v83, v83, v86, s[8:9]
v_mad_f32 v83, v81, v83, 1.0
v_and_b32_e32 v90, s50, v45
v_mov_b32_e32 v91, 0xbf100000
v_cmp_gt_f32_e64 vcc, |v83|, v87
v_mov_b32_e32 v88, 0x2f800000
v_mad_f32 v91, v90, -v90, v91
v_cndmask_b32_e32 v89, 1.0, v88, vcc
v_cmp_gt_f32_e32 vcc, 0, v91
v_cndmask_b32_e64 v92, 0.5, -0.5, vcc
v_mov_b32_e32 v93, 0x3fb8aa3b
v_mac_f32_e32 v92, v93, v91
v_mov_b32_e32 v86, 0xb7c756b1
v_cvt_i32_f32_e32 v92, v92
v_madak_f32_e32 v86, v86, v81, 0xbbbd1489
v_madak_f32_e32 v86, v81, v86, 0xbce9528f
v_madak_f32_e32 v86, v81, v86, 0xbea66beb
v_mul_f32_e32 v83, v89, v83
v_madak_f32_e32 v81, v81, v86, 0x3e0375d4
v_rcp_f32_e32 v83, v83
v_cvt_f32_i32_e32 v86, v92
v_cndmask_b32_e64 v81, v82, v81, s[8:9]
v_mov_b32_e32 v82, 0xbf317180
v_mul_f32_e32 v81, v83, v81
v_mad_f32 v83, v82, v86, v91
v_mov_b32_e32 v94, 0xb717f7d1
v_mad_f32 v95, v94, v86, v83
v_mul_f32_e32 v96, v95, v95
v_mov_b32_e32 v97, 0xb5ddea0e
v_mov_b32_e32 v98, 0x3331bb4c
v_mad_f32 v99, v98, v96, v97
v_mov_b32_e32 v100, 0x388ab355
v_mad_f32 v99, v99, v96, v100
v_mov_b32_e32 v101, 0xbb360b61
v_mad_f32 v99, v99, v96, v101
v_mov_b32_e32 v102, 0x3e2aaaab
v_mad_f32 v99, v99, v96, v102
v_mad_f32 v96, -v96, v99, v95
v_mul_f32_e32 v95, v96, v95
v_sub_f32_e32 v96, 2.0, v96
v_cmp_gt_f32_e64 vcc, |v96|, v87
v_cndmask_b32_e32 v99, 1.0, v88, vcc
v_mul_f32_e32 v96, v99, v96
v_rcp_f32_e32 v96, v96
v_mul_f32_e32 v95, v96, v95
v_mul_f32_e32 v95, v95, v99
v_mad_f32 v86, -v86, v94, -v95
v_subrev_f32_e32 v83, v83, v86
v_lshlrev_b32_e32 v86, 23, v92
v_sub_f32_e32 v83, 1.0, v83
v_add_i32_e32 v83, vcc, v83, v86
v_mov_b32_e32 v86, 0xc2aeac4f
v_cmp_ge_f32_e32 vcc, v91, v86
v_mov_b32_e32 v92, 0x42b17218
v_cndmask_b32_e32 v83, 0, v83, vcc
v_cmp_lt_f32_e32 vcc, v91, v92
v_mov_b32_e32 v95, 0x7f800000
v_cndmask_b32_e32 v83, v95, v83, vcc
v_cmp_u_f32_e32 vcc, v91, v91
v_cndmask_b32_e32 v83, v83, v91, vcc
v_subrev_f32_e32 v91, v76, v90
v_mul_f32_e32 v96, v81, v89
v_add_f32_e32 v90, v76, v90
v_mad_f32 v90, v90, v91, v96
v_cmp_gt_f32_e32 vcc, 0, v90
v_cndmask_b32_e64 v91, 0.5, -0.5, vcc
v_mac_f32_e32 v91, v93, v90
v_cvt_i32_f32_e32 v91, v91
v_madak_f32_e32 v81, v89, v81, 0x3f58560b
v_cvt_f32_i32_e32 v93, v91
v_lshlrev_b32_e32 v91, 23, v91
v_mad_f32 v82, v82, v93, v90
v_mad_f32 v99, v94, v93, v82
v_mul_f32_e32 v103, v99, v99
v_mac_f32_e32 v97, v98, v103
v_mac_f32_e32 v100, v97, v103
v_mac_f32_e32 v101, v100, v103
v_mac_f32_e32 v102, v101, v103
v_mad_f32 v97, -v103, v102, v99
v_mul_f32_e32 v98, v97, v99
v_sub_f32_e32 v97, 2.0, v97
v_cmp_gt_f32_e64 vcc, |v97|, v87
v_cndmask_b32_e32 v99, 1.0, v88, vcc
v_mul_f32_e32 v97, v99, v97
v_rcp_f32_e32 v97, v97
v_mul_f32_e32 v97, v97, v98
v_mul_f32_e32 v97, v97, v99
v_mad_f32 v93, -v93, v94, -v97
v_subrev_f32_e32 v82, v82, v93
v_sub_f32_e32 v82, 1.0, v82
v_add_i32_e32 v82, vcc, v82, v91
v_cmp_ge_f32_e32 vcc, v90, v86
v_cndmask_b32_e32 v82, 0, v82, vcc
v_cmp_lt_f32_e32 vcc, v90, v92
v_cndmask_b32_e32 v82, v95, v82, vcc
v_cmp_u_f32_e32 vcc, v90, v90
v_cndmask_b32_e32 v82, v82, v90, vcc
v_mul_f32_e32 v82, v82, v83
v_mov_b32_e32 v83, 0x40c00000
v_cmp_gt_f32_e64 s[10:11], v83, v76
v_mov_b32_e32 v83, 0x31800000
v_cmp_gt_f32_e64 vcc, |v76|, v87
v_cmp_gt_f32_e64 s[12:13], v83, v76
v_cndmask_b32_e32 v83, 1.0, v88, vcc
v_mul_f32_e32 v76, v83, v76
v_rcp_f32_e32 v76, v76
v_cmp_u_f32_e32 vcc, v45, v45
v_mul_f32_e32 v76, v76, v82
v_mad_f32 v76, -v83, v76, 1.0
v_cndmask_b32_e64 v76, 1.0, v76, s[10:11]
v_cndmask_b32_e64 v76, v76, v81, s[4:5]
v_and_b32_e32 v81, s51, v45
v_or_b32_e32 v76, v81, v76
v_mad_f32 v81, v96, v45, v45
v_cndmask_b32_e64 v76, v76, v81, s[8:9]
v_mul_f32_e32 v81, 0x3f8375d4, v45
v_mac_f32_e32 v81, 0x41000000, v45
v_mul_f32_e32 v81, 0x3e000000, v81
v_cndmask_b32_e64 v76, v76, v81, s[12:13]
v_cndmask_b32_e32 v45, v76, v45, vcc
v_subrev_f32_e32 v45, v45, v70
v_mul_f32_e32 v70, s19, v70
v_mad_f32 v45, v79, v45, -v70
v_mul_f32_e32 v70, v80, v84
v_mac_f32_e32 v5, v45, v51
v_mul_f32_e32 v45, v85, v70
v_mad_f32 v45, v51, v77, -v45
v_mul_f32_e32 v51, v77, v51
v_mad_f32 v51, v70, v85, -v51
v_mad_f32 v78, v35, v51, v78
v_mad_f32 v71, v31, v51, v71
v_mac_f32_e32 v69, v27, v51
v_mad_f32 v18, v35, v45, v18
v_mad_f32 v17, v31, v45, v17
v_mac_f32_e32 v16, v27, v45
BB6_82: ; %Flow1236
; in Loop: Header=BB6_11 Depth=1
s_or_b64 exec, exec, s[38:39]
BB6_83: ; in Loop: Header=BB6_11 Depth=1
s_or_b64 exec, exec, s[34:35]
v_lshrrev_b32_e32 v27, 15, v65
v_and_b32_e32 v27, 1, v27
v_cmp_eq_u32_e32 vcc, 1, v27
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[12:13], exec, s[4:5]
; mask branch BB6_87
s_cbranch_execz BB6_87
BB6_84: ; in Loop: Header=BB6_11 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[79:82], v55 offset0:112 offset1:113
s_and_b64 s[4:5], exec, s[0:1]
v_cmp_ne_u32_e32 vcc, v38, v23
s_xor_b64 s[4:5], s[4:5], -1
s_or_b64 s[4:5], s[4:5], vcc
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v27, v73, v80
s_and_b64 s[8:9], exec, s[2:3]
s_or_b64 s[4:5], s[8:9], s[4:5]
v_subrev_f32_e32 v23, v72, v79
v_mul_f32_e32 v35, v27, v27
v_cndmask_b32_e64 v45, 0, 1.0, s[4:5]
v_subrev_f32_e32 v31, v74, v81
v_mac_f32_e32 v35, v23, v23
v_mac_f32_e32 v35, v31, v31
v_mul_f32_e32 v45, s22, v45
v_cmp_lt_f32_e32 vcc, v35, v45
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[34:35], exec, s[4:5]
; mask branch BB6_86
s_cbranch_execz BB6_86
BB6_85: ; in Loop: Header=BB6_11 Depth=1
v_lshrrev_b32_e32 v51, 15, v19
v_and_b32_e32 v51, 1, v51
v_max_f32_e32 v35, 0x34cd15ae, v35
v_cmp_eq_u32_e32 vcc, 1, v51
v_rsq_f32_e32 v51, v35
s_mov_b32 m0, -1
v_cndmask_b32_e64 v70, 0, 1.0, vcc
v_mul_f32_e32 v45, v75, v82
v_mul_f32_e32 v74, v51, v51
v_mul_f32_e32 v72, v74, v74
v_mul_f32_e32 v75, v70, v72
ds_read_b64 v[72:73], v56 offset:448
v_mul_f32_e32 v77, v9, v35
v_mul_f32_e32 v79, v77, v77
v_mov_b32_e32 v80, 0x3a92b707
v_madak_f32_e32 v80, v80, v79, 0x3ded3cb2
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v68, v68, v73
v_mul_f32_e32 v73, v74, v75
v_mul_f32_e32 v76, v73, v68
v_mad_f32 v76, v72, v67, -v76
v_mul_f32_e32 v67, v67, v72
v_mad_f32 v72, v73, v73, s26
v_mul_f32_e32 v68, v72, v68
v_mad_f32 v72, v75, v74, s23
v_mov_b32_e32 v81, 0x3c739487
v_mul_f32_e32 v72, 0xbe2aaaab, v72
v_mul_f32_e32 v67, v67, v72
v_madak_f32_e32 v81, v81, v79, 0x3f01e2bc
v_mad_f32 v80, v80, v79, 1.0
v_mac_f32_e32 v67, 0x3daaaaaa, v68
v_mac_f32_e32 v80, v77, v81
v_mov_b32_e32 v81, 0xb2951928
v_mac_f32_e32 v8, v70, v67
v_rcp_f32_e32 v67, v80
v_madak_f32_e32 v81, v81, v79, 0xb85ffb93
v_mov_b32_e32 v82, 0x35c55945
v_madak_f32_e32 v82, v82, v79, 0x3a83ca0c
v_madak_f32_e32 v81, v81, v79, 0xbc9ded90
v_madak_f32_e32 v82, v82, v79, 0x3d8eaf3b
v_madak_f32_e32 v79, v81, v79, 0xbf409397
v_mul_f32_e32 v35, s18, v35
v_mac_f32_e32 v79, v77, v82
v_mul_f32_e32 v67, v37, v67
v_mul_f32_e32 v67, v79, v67
v_mul_f32_e32 v68, v70, v74
v_mul_f32_e32 v35, v51, v35
v_mac_f32_e32 v67, v51, v68
v_and_b32_e32 v68, s27, v35
v_mov_b32_e32 v72, 0x3fa00000
v_cmp_gt_f32_e64 s[4:5], v72, v68
v_mul_f32_e32 v72, v68, v68
v_rcp_f32_e32 v75, v72
v_add_f32_e32 v77, -1.0, v68
v_mov_b32_e32 v79, 0xc11d077e
v_mov_b32_e32 v80, 0xbd777f97
v_cndmask_b32_e64 v75, v75, v77, s[4:5]
v_mov_b32_e32 v77, 0x3f580000
v_cmp_gt_f32_e64 s[8:9], v77, v68
v_cndmask_b32_e64 v72, v75, v72, s[8:9]
v_mov_b32_e32 v75, 0xc3f1c275
v_madak_f32_e32 v79, v79, v72, 0xc2a2932b
v_madak_f32_e32 v75, v75, v72, 0xc480230b
v_mov_b32_e32 v77, 0xc1b38712
v_madak_f32_e32 v79, v72, v79, 0xc3389ae7
v_madak_f32_e32 v80, v80, v72, 0x40d23f7c
v_madak_f32_e32 v75, v72, v75, 0xc41f6441
v_madak_f32_e32 v77, v77, v72, 0x43ed43a7
v_madak_f32_e32 v79, v72, v79, 0xc322658c
v_madak_f32_e32 v80, v72, v80, 0x42d9451f
v_madak_f32_e32 v75, v72, v75, 0xc320a2ea
v_madak_f32_e32 v77, v72, v77, 0x451f90ce
v_madak_f32_e32 v79, v72, v79, 0xc2798057
v_madak_f32_e32 v80, v72, v80, 0x43d6810b
v_madak_f32_e32 v75, v72, v75, 0xc18e104b
v_madak_f32_e32 v77, v72, v77, 0x4547fdbb
v_madak_f32_e32 v79, v72, v79, 0xc128f022
v_madak_f32_e32 v80, v72, v80, 0x442158c9
v_madak_f32_e32 v75, v72, v75, 0xbf4c9dd4
v_madak_f32_e32 v77, v72, v77, 0x44c01759
v_madak_f32_e32 v79, v72, v79, 0xbf31a0b7
v_madak_f32_e32 v80, v72, v80, 0x43d9486f
v_mov_b32_e32 v81, 0x4036db6e
v_madak_f32_e32 v77, v72, v77, 0x43a2e571
v_madak_f32_e32 v80, v72, v80, 0x4309a863
v_madak_f32_e32 v75, v72, v75, 0xbc21a092
v_madak_f32_e32 v79, v72, v79, 0xbc21a093
v_cmp_gt_f32_e32 vcc, v81, v68
v_cndmask_b32_e32 v75, v75, v79, vcc
v_mov_b32_e32 v79, 0xbb0df9c0
v_madak_f32_e32 v77, v72, v77, 0x41f2b459
v_madak_f32_e32 v80, v72, v80, 0x419d35ce
v_madak_f32_e32 v79, v79, v72, 0x3d1151b3
v_cndmask_b32_e32 v77, v77, v80, vcc
v_mov_b32_e32 v80, 0x3c445aa3
v_madak_f32_e32 v79, v72, v79, 0xbde31cc2
v_madak_f32_e32 v80, v80, v72, 0x3c5f6e13
v_madak_f32_e32 v79, v72, v79, 0x3ea2fe54
v_madak_f32_e32 v80, v72, v80, 0x3e013307
v_madak_f32_e32 v79, v72, v79, 0xbebe9208
v_madak_f32_e32 v80, v72, v80, 0x3d931ae7
v_madak_f32_e32 v79, v72, v79, 0x3ed46805
v_madak_f32_e32 v80, v72, v80, 0x3f0a5785
v_madak_f32_e32 v79, v72, v79, 0xbb1acdc6
v_madak_f32_e32 v80, v72, v80, 0x3dd9f331
v_cndmask_b32_e64 v75, v75, v79, s[4:5]
v_mov_b32_e32 v79, 0xb7c756b1
v_cndmask_b32_e64 v77, v77, v80, s[4:5]
v_mov_b32_e32 v80, 0xb684e21a
v_madak_f32_e32 v79, v79, v72, 0xbbbd1489
v_madak_f32_e32 v80, v80, v72, 0x390aee49
v_madak_f32_e32 v79, v72, v79, 0xbce9528f
v_madak_f32_e32 v80, v72, v80, 0x3ba68116
v_madak_f32_e32 v79, v72, v79, 0xbea66beb
v_madak_f32_e32 v80, v72, v80, 0x3d852a63
v_madak_f32_e32 v79, v72, v79, 0x3e0375d4
v_madak_f32_e32 v80, v72, v80, 0x3ecbbbce
v_cndmask_b32_e64 v77, v77, v80, s[8:9]
v_cndmask_b32_e64 v75, v75, v79, s[8:9]
v_and_b32_e32 v79, s50, v35
v_mov_b32_e32 v80, 0xbf100000
v_mad_f32 v80, v79, -v79, v80
v_cmp_gt_f32_e64 s[10:11], 0, v80
v_cndmask_b32_e64 v81, 0.5, -0.5, s[10:11]
v_mov_b32_e32 v82, 0x3fb8aa3b
v_mac_f32_e32 v81, v82, v80
v_cvt_i32_f32_e32 v81, v81
v_mov_b32_e32 v86, 0xbf317180
v_mov_b32_e32 v88, 0xb717f7d1
v_mov_b32_e32 v91, 0xb5ddea0e
v_cvt_f32_i32_e32 v85, v81
v_mov_b32_e32 v92, 0x3331bb4c
v_mov_b32_e32 v94, 0x388ab355
v_mov_b32_e32 v95, 0xbb360b61
v_mad_f32 v87, v86, v85, v80
v_mad_f32 v89, v88, v85, v87
v_mul_f32_e32 v90, v89, v89
v_mad_f32 v93, v92, v90, v91
v_mad_f32 v93, v93, v90, v94
v_mad_f32 v93, v93, v90, v95
v_mov_b32_e32 v96, 0x3e2aaaab
v_mad_f32 v93, v93, v90, v96
v_mad_f32 v90, -v90, v93, v89
v_mad_f32 v72, v72, v77, 1.0
v_mov_b32_e32 v77, 0x6f800000
v_cmp_gt_f32_e64 vcc, |v72|, v77
v_mov_b32_e32 v83, 0x2f800000
v_sub_f32_e32 v93, 2.0, v90
v_cndmask_b32_e32 v84, 1.0, v83, vcc
v_cmp_gt_f32_e64 vcc, |v93|, v77
v_cndmask_b32_e32 v97, 1.0, v83, vcc
v_mul_f32_e32 v93, v97, v93
v_rcp_f32_e32 v93, v93
v_mul_f32_e32 v72, v84, v72
v_mul_f32_e32 v89, v90, v89
v_rcp_f32_e32 v72, v72
v_mul_f32_e32 v89, v93, v89
v_mul_f32_e32 v89, v89, v97
v_mad_f32 v85, -v85, v88, -v89
v_subrev_f32_e32 v85, v87, v85
v_mul_f32_e32 v72, v72, v75
v_mul_f32_e32 v75, v72, v84
v_subrev_f32_e32 v87, v68, v79
v_add_f32_e32 v79, v68, v79
v_sub_f32_e32 v85, 1.0, v85
v_lshlrev_b32_e32 v81, 23, v81
v_add_i32_e32 v81, vcc, v85, v81
v_mad_f32 v79, v79, v87, v75
v_cmp_gt_f32_e32 vcc, 0, v79
v_cndmask_b32_e64 v87, 0.5, -0.5, vcc
v_mac_f32_e32 v87, v82, v79
v_cvt_i32_f32_e32 v82, v87
v_mov_b32_e32 v85, 0xc2aeac4f
v_cmp_ge_f32_e32 vcc, v80, v85
v_mov_b32_e32 v87, 0x42b17218
v_cvt_f32_i32_e32 v89, v82
v_cndmask_b32_e32 v81, 0, v81, vcc
v_cmp_lt_f32_e32 vcc, v80, v87
v_mov_b32_e32 v90, 0x7f800000
v_mad_f32 v86, v86, v89, v79
v_mad_f32 v93, v88, v89, v86
v_mul_f32_e32 v97, v93, v93
v_mac_f32_e32 v91, v92, v97
v_mac_f32_e32 v94, v91, v97
v_mac_f32_e32 v95, v94, v97
v_mac_f32_e32 v96, v95, v97
v_mad_f32 v91, -v97, v96, v93
v_sub_f32_e32 v92, 2.0, v91
v_cndmask_b32_e32 v81, v90, v81, vcc
v_cmp_gt_f32_e64 vcc, |v92|, v77
v_cndmask_b32_e32 v94, 1.0, v83, vcc
v_mul_f32_e32 v92, v94, v92
v_rcp_f32_e32 v92, v92
v_cmp_u_f32_e32 vcc, v80, v80
v_cndmask_b32_e32 v80, v81, v80, vcc
v_mul_f32_e32 v81, v91, v93
v_mul_f32_e32 v81, v92, v81
v_mul_f32_e32 v81, v81, v94
v_mad_f32 v81, -v89, v88, -v81
v_subrev_f32_e32 v81, v86, v81
v_sub_f32_e32 v81, 1.0, v81
v_lshlrev_b32_e32 v82, 23, v82
v_add_i32_e32 v81, vcc, v81, v82
v_cmp_ge_f32_e32 vcc, v79, v85
v_cndmask_b32_e32 v81, 0, v81, vcc
v_cmp_lt_f32_e64 s[10:11], v79, v87
v_cndmask_b32_e64 v81, v90, v81, s[10:11]
v_cmp_u_f32_e32 vcc, v79, v79
v_cndmask_b32_e32 v79, v81, v79, vcc
v_cmp_gt_f32_e64 vcc, |v68|, v77
v_cndmask_b32_e32 v77, 1.0, v83, vcc
v_mul_f32_e32 v81, v77, v68
v_rcp_f32_e32 v81, v81
v_mul_f32_e32 v79, v79, v80
v_mov_b32_e32 v80, 0x40c00000
v_cmp_gt_f32_e32 vcc, v80, v68
v_mov_b32_e32 v80, 0x31800000
v_mul_f32_e32 v79, v81, v79
v_cmp_gt_f32_e64 s[10:11], v80, v68
v_mad_f32 v68, -v77, v79, 1.0
v_cndmask_b32_e32 v68, 1.0, v68, vcc
v_madak_f32_e32 v72, v84, v72, 0x3f58560b
v_cndmask_b32_e64 v68, v68, v72, s[4:5]
v_and_b32_e32 v72, s51, v35
v_or_b32_e32 v68, v72, v68
v_mad_f32 v72, v75, v35, v35
v_cndmask_b32_e64 v68, v68, v72, s[8:9]
v_mul_f32_e32 v72, 0x3f8375d4, v35
v_mac_f32_e32 v72, 0x41000000, v35
v_mul_f32_e32 v72, 0x3e000000, v72
v_cndmask_b32_e64 v68, v68, v72, s[10:11]
v_cmp_u_f32_e32 vcc, v35, v35
v_cndmask_b32_e32 v35, v68, v35, vcc
v_subrev_f32_e32 v35, v35, v70
v_mul_f32_e32 v68, s19, v70
v_mad_f32 v35, v51, v35, -v68
v_mul_f32_e32 v51, v74, v73
v_mac_f32_e32 v5, v35, v45
v_mul_f32_e32 v35, v76, v51
v_mad_f32 v35, v45, v67, -v35
v_mul_f32_e32 v45, v67, v45
v_mad_f32 v45, v51, v76, -v45
v_mad_f32 v78, v31, v45, v78
v_mad_f32 v71, v27, v45, v71
v_mac_f32_e32 v69, v23, v45
v_mad_f32 v15, v31, v35, v15
v_mad_f32 v14, v27, v35, v14
v_mac_f32_e32 v13, v23, v35
BB6_86: ; %Flow1235
; in Loop: Header=BB6_11 Depth=1
s_or_b64 exec, exec, s[34:35]
BB6_87: ; in Loop: Header=BB6_11 Depth=1
s_or_b64 exec, exec, s[12:13]
s_mov_b32 m0, -1
v_cmp_gt_i32_e32 vcc, 3, v2
ds_write_b32 v6, v69
ds_write_b32 v7, v71
ds_write_b32 v12, v78
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[4:5], exec, s[4:5]
s_waitcnt lgkmcnt(0)
; mask branch BB6_93
s_cbranch_execz BB6_93
BB6_88: ; in Loop: Header=BB6_11 Depth=1
v_lshlrev_b32_e32 v27, 6, v2
v_add_i32_e32 v23, vcc, v11, v27
v_lshlrev_b32_e32 v23, 2, v23
v_add_i32_e32 v31, vcc, s15, v23
s_mov_b32 m0, -1
ds_read_b32 v23, v31
v_add_i32_e32 v35, vcc, 8, v11
v_or_b32_e32 v45, 1, v11
v_cmp_lt_i32_e32 vcc, v45, v35
s_and_saveexec_b64 s[8:9], vcc
s_xor_b64 s[8:9], exec, s[8:9]
s_waitcnt lgkmcnt(0)
; mask branch BB6_90
s_cbranch_execz BB6_90
BB6_89: ; in Loop: Header=BB6_11 Depth=1
s_mov_b32 m0, -1
ds_read2_b32 v[67:68], v31 offset0:1 offset1:2
v_or_b32_e32 v35, 3, v11
v_add_i32_e32 v27, vcc, v35, v27
v_lshlrev_b32_e32 v27, 2, v27
ds_read2_b32 v[69:70], v31 offset0:3 offset1:4
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v23, v23, v67
v_add_i32_e32 v27, vcc, s15, v27
v_add_f32_e32 v23, v68, v23
ds_read2_b32 v[67:68], v27 offset0:2 offset1:3
ds_read_b32 v31, v31 offset:28
v_add_f32_e32 v23, v69, v23
v_add_f32_e32 v23, v70, v23
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v23, v67, v23
v_add_f32_e32 v23, v68, v23
v_add_f32_e32 v23, v31, v23
BB6_90: ; %._crit_edge.i72
; in Loop: Header=BB6_11 Depth=1
s_or_b64 exec, exec, s[8:9]
v_mul_lo_i32 v27, v66, 3
v_mov_b32_e32 v31, s29
s_mov_b64 s[8:9], s[28:29]
s_mov_b64 s[10:11], s[46:47]
v_add_i32_e32 v66, vcc, v27, v2
v_ashrrev_i32_e32 v67, 31, v66
v_lshl_b64 v[68:69], v[66:67], 2
v_add_i32_e32 v66, vcc, s28, v68
v_addc_u32_e32 v67, vcc, v69, v31, vcc
buffer_load_dword v69, v[68:69], s[8:11], 0 addr64
s_mov_b64 s[8:9], 0
s_waitcnt vmcnt(0)
BB6_91: ; Parent Loop BB6_11 Depth=1
; => This Inner Loop Header: Depth=2
v_add_f32_e32 v68, v23, v69
v_mov_b32_e32 v71, v69
v_mov_b32_e32 v70, v68
buffer_atomic_cmpswap v[70:71], v[66:67], s[44:47], 0 addr64 glc
v_mov_b32_e32 v27, -1
v_mov_b32_e32 v27, 0xf000
s_waitcnt vmcnt(0) expcnt(0)
v_cmp_eq_u32_e32 vcc, v70, v69
s_or_b64 s[8:9], vcc, s[8:9]
v_mov_b32_e32 v69, v70
s_andn2_b64 exec, exec, s[8:9]
s_cbranch_execnz BB6_91
; BB#92: ; %Flow1233
; in Loop: Header=BB6_11 Depth=1
s_or_b64 exec, exec, s[8:9]
BB6_93: ; %Flow1234
; in Loop: Header=BB6_11 Depth=1
s_or_b64 exec, exec, s[4:5]
BB6_94: ; %Flow1243
; in Loop: Header=BB6_11 Depth=1
s_or_b64 exec, exec, s[30:31]
v_and_b32_e32 v23, 0xff0000, v65
v_cmp_ne_u32_e32 vcc, 0, v23
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[30:31], exec, s[4:5]
; mask branch BB6_134
s_cbranch_execz BB6_134
BB6_95: ; in Loop: Header=BB6_11 Depth=1
s_mov_b32 m0, -1
ds_read_b32 v23, v54 offset:8
s_mov_b64 s[8:9], s[32:33]
s_mov_b64 s[10:11], s[46:47]
s_waitcnt lgkmcnt(0)
v_lshlrev_b32_e32 v27, 3, v23
v_add_i32_e32 v66, vcc, v27, v1
v_ashrrev_i32_e32 v67, 31, v66
v_lshl_b64 v[68:69], v[66:67], 4
buffer_load_dwordx4 v[72:75], v[68:69], s[8:11], 0 addr64
v_lshl_b64 v[70:71], v[66:67], 3
s_mov_b64 s[8:9], s[36:37]
buffer_load_dwordx2 v[67:68], v[70:71], s[8:11], 0 addr64
v_lshrrev_b32_e32 v27, 16, v65
v_mov_b32_e32 v69, 0
v_and_b32_e32 v27, 1, v27
v_cmp_eq_u32_e32 vcc, 1, v27
v_mov_b32_e32 v71, v69
v_mov_b32_e32 v78, v69
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[34:35], exec, s[4:5]
s_waitcnt vmcnt(0)
; mask branch BB6_99
s_cbranch_execz BB6_99
BB6_96: ; in Loop: Header=BB6_11 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[79:82], v55 offset1:1
s_and_b64 s[4:5], exec, s[0:1]
v_cmp_ne_u32_e32 vcc, v41, v23
s_xor_b64 s[4:5], s[4:5], -1
s_or_b64 s[4:5], s[4:5], vcc
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v27, v73, v80
s_and_b64 s[8:9], exec, s[2:3]
s_or_b64 s[4:5], s[8:9], s[4:5]
v_subrev_f32_e32 v31, v72, v79
v_mul_f32_e32 v45, v27, v27
v_cndmask_b32_e64 v51, 0, 1.0, s[4:5]
v_subrev_f32_e32 v35, v74, v81
v_mac_f32_e32 v45, v31, v31
v_mov_b32_e32 v69, 0
v_mac_f32_e32 v45, v35, v35
v_mul_f32_e32 v51, s22, v51
v_cmp_lt_f32_e32 vcc, v45, v51
v_mov_b32_e32 v71, v69
v_mov_b32_e32 v78, v69
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[38:39], exec, s[4:5]
; mask branch BB6_98
s_cbranch_execz BB6_98
BB6_97: ; in Loop: Header=BB6_11 Depth=1
v_max_f32_e32 v45, 0x34cd15ae, v45
v_mul_f32_e32 v69, v9, v45
v_mul_f32_e32 v70, v69, v69
v_mov_b32_e32 v71, 0x3a92b707
v_madak_f32_e32 v71, v71, v70, 0x3ded3cb2
v_mov_b32_e32 v76, 0x3c739487
v_madak_f32_e32 v76, v76, v70, 0x3f01e2bc
v_mad_f32 v71, v71, v70, 1.0
v_mac_f32_e32 v71, v69, v76
v_mov_b32_e32 v76, 0xb2951928
v_madak_f32_e32 v76, v76, v70, 0xb85ffb93
v_mov_b32_e32 v77, 0x35c55945
v_madak_f32_e32 v77, v77, v70, 0x3a83ca0c
v_madak_f32_e32 v76, v76, v70, 0xbc9ded90
v_madak_f32_e32 v77, v77, v70, 0x3d8eaf3b
v_madak_f32_e32 v76, v76, v70, 0xbf409397
v_mac_f32_e32 v76, v69, v77
v_rsq_f32_e32 v77, v45
v_lshrrev_b32_e32 v69, 16, v19
v_and_b32_e32 v69, 1, v69
v_cmp_eq_u32_e32 vcc, 1, v69
v_mul_f32_e32 v79, v77, v77
s_mov_b32 m0, -1
v_cndmask_b32_e64 v78, 0, 1.0, vcc
v_mul_f32_e32 v69, v79, v79
v_mul_f32_e32 v80, v78, v69
ds_read_b64 v[69:70], v56
v_mul_f32_e32 v81, v79, v80
v_mad_f32 v80, v80, v79, s23
v_mul_f32_e32 v51, v75, v82
v_mad_f32 v83, v81, v81, s26
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v70, v68, v70
v_mul_f32_e32 v82, v81, v70
v_mad_f32 v82, v69, v67, -v82
v_mul_f32_e32 v69, v67, v69
v_mul_f32_e32 v80, 0xbe2aaaab, v80
v_mul_f32_e32 v70, v83, v70
v_mul_f32_e32 v69, v69, v80
v_mac_f32_e32 v69, 0x3daaaaaa, v70
v_mac_f32_e32 v8, v78, v69
v_rcp_f32_e32 v69, v71
v_mul_f32_e32 v45, s18, v45
v_mul_f32_e32 v70, v78, v79
v_mul_f32_e32 v45, v77, v45
v_mul_f32_e32 v69, v37, v69
v_mul_f32_e32 v69, v76, v69
v_mac_f32_e32 v69, v77, v70
v_and_b32_e32 v70, s27, v45
v_mov_b32_e32 v71, 0x3fa00000
v_cmp_gt_f32_e64 s[4:5], v71, v70
v_mul_f32_e32 v71, v70, v70
v_rcp_f32_e32 v76, v71
v_add_f32_e32 v80, -1.0, v70
v_mov_b32_e32 v83, 0xbd777f97
v_mov_b32_e32 v84, 0xbf100000
v_cndmask_b32_e64 v76, v76, v80, s[4:5]
v_mov_b32_e32 v80, 0x3f580000
v_cmp_gt_f32_e64 s[8:9], v80, v70
v_cndmask_b32_e64 v71, v76, v71, s[8:9]
v_mov_b32_e32 v80, 0xc11d077e
v_mov_b32_e32 v76, 0x4036db6e
v_madak_f32_e32 v80, v80, v71, 0xc2a2932b
v_cmp_gt_f32_e32 vcc, v76, v70
v_mov_b32_e32 v76, 0xc3f1c275
v_madak_f32_e32 v76, v76, v71, 0xc480230b
v_madak_f32_e32 v80, v71, v80, 0xc3389ae7
v_madak_f32_e32 v76, v71, v76, 0xc41f6441
v_madak_f32_e32 v80, v71, v80, 0xc322658c
v_madak_f32_e32 v76, v71, v76, 0xc320a2ea
v_madak_f32_e32 v80, v71, v80, 0xc2798057
v_madak_f32_e32 v76, v71, v76, 0xc18e104b
v_madak_f32_e32 v80, v71, v80, 0xc128f022
v_madak_f32_e32 v76, v71, v76, 0xbf4c9dd4
v_madak_f32_e32 v80, v71, v80, 0xbf31a0b7
v_madak_f32_e32 v76, v71, v76, 0xbc21a092
v_madak_f32_e32 v80, v71, v80, 0xbc21a093
v_madak_f32_e32 v83, v83, v71, 0x40d23f7c
v_cndmask_b32_e32 v76, v76, v80, vcc
v_mov_b32_e32 v80, 0xc1b38712
v_madak_f32_e32 v80, v80, v71, 0x43ed43a7
v_madak_f32_e32 v83, v71, v83, 0x42d9451f
v_madak_f32_e32 v80, v71, v80, 0x451f90ce
v_madak_f32_e32 v83, v71, v83, 0x43d6810b
v_madak_f32_e32 v80, v71, v80, 0x4547fdbb
v_madak_f32_e32 v83, v71, v83, 0x442158c9
v_madak_f32_e32 v80, v71, v80, 0x44c01759
v_madak_f32_e32 v83, v71, v83, 0x43d9486f
v_madak_f32_e32 v80, v71, v80, 0x43a2e571
v_madak_f32_e32 v83, v71, v83, 0x4309a863
v_madak_f32_e32 v80, v71, v80, 0x41f2b459
v_madak_f32_e32 v83, v71, v83, 0x419d35ce
v_cndmask_b32_e32 v80, v80, v83, vcc
v_mov_b32_e32 v83, 0xbb0df9c0
v_madak_f32_e32 v83, v83, v71, 0x3d1151b3
v_madak_f32_e32 v83, v71, v83, 0xbde31cc2
v_madak_f32_e32 v83, v71, v83, 0x3ea2fe54
v_madak_f32_e32 v83, v71, v83, 0xbebe9208
v_madak_f32_e32 v83, v71, v83, 0x3ed46805
v_madak_f32_e32 v83, v71, v83, 0xbb1acdc6
v_cndmask_b32_e64 v76, v76, v83, s[4:5]
v_mov_b32_e32 v83, 0x3c445aa3
v_madak_f32_e32 v83, v83, v71, 0x3c5f6e13
v_madak_f32_e32 v83, v71, v83, 0x3e013307
v_madak_f32_e32 v83, v71, v83, 0x3d931ae7
v_madak_f32_e32 v83, v71, v83, 0x3f0a5785
v_madak_f32_e32 v83, v71, v83, 0x3dd9f331
v_cndmask_b32_e64 v80, v80, v83, s[4:5]
v_mov_b32_e32 v83, 0xb684e21a
v_madak_f32_e32 v83, v83, v71, 0x390aee49
v_madak_f32_e32 v83, v71, v83, 0x3ba68116
v_madak_f32_e32 v83, v71, v83, 0x3d852a63
v_madak_f32_e32 v83, v71, v83, 0x3ecbbbce
v_cndmask_b32_e64 v80, v80, v83, s[8:9]
v_mov_b32_e32 v83, 0xb7c756b1
v_madak_f32_e32 v83, v83, v71, 0xbbbd1489
v_madak_f32_e32 v83, v71, v83, 0xbce9528f
v_madak_f32_e32 v83, v71, v83, 0xbea66beb
v_madak_f32_e32 v83, v71, v83, 0x3e0375d4
v_mad_f32 v71, v71, v80, 1.0
v_and_b32_e32 v80, s50, v45
v_mad_f32 v84, v80, -v80, v84
v_cmp_gt_f32_e32 vcc, 0, v84
v_cndmask_b32_e64 v85, 0.5, -0.5, vcc
v_mov_b32_e32 v86, 0x3fb8aa3b
v_mac_f32_e32 v85, v86, v84
v_cvt_i32_f32_e32 v85, v85
v_mov_b32_e32 v91, 0xbf317180
v_mov_b32_e32 v93, 0xb717f7d1
v_mov_b32_e32 v96, 0xb5ddea0e
v_cvt_f32_i32_e32 v90, v85
v_mov_b32_e32 v97, 0x3331bb4c
v_mov_b32_e32 v99, 0x388ab355
v_mov_b32_e32 v100, 0xbb360b61
v_mad_f32 v92, v91, v90, v84
v_mad_f32 v94, v93, v90, v92
v_mul_f32_e32 v95, v94, v94
v_mad_f32 v98, v97, v95, v96
v_mad_f32 v98, v98, v95, v99
v_mad_f32 v98, v98, v95, v100
v_mov_b32_e32 v101, 0x3e2aaaab
v_mad_f32 v98, v98, v95, v101
v_mad_f32 v95, -v95, v98, v94
v_mov_b32_e32 v87, 0x6f800000
v_cmp_gt_f32_e64 vcc, |v71|, v87
v_mov_b32_e32 v88, 0x2f800000
v_sub_f32_e32 v98, 2.0, v95
v_cndmask_b32_e32 v89, 1.0, v88, vcc
v_cmp_gt_f32_e64 vcc, |v98|, v87
v_mul_f32_e32 v71, v89, v71
v_cndmask_b32_e32 v102, 1.0, v88, vcc
v_mul_f32_e32 v98, v102, v98
v_rcp_f32_e32 v71, v71
v_rcp_f32_e32 v98, v98
v_cndmask_b32_e64 v76, v76, v83, s[8:9]
v_lshlrev_b32_e32 v83, 23, v85
v_mul_f32_e32 v71, v71, v76
v_mul_f32_e32 v76, v95, v94
v_mul_f32_e32 v76, v98, v76
v_mul_f32_e32 v76, v76, v102
v_mad_f32 v76, -v90, v93, -v76
v_subrev_f32_e32 v76, v92, v76
v_sub_f32_e32 v76, 1.0, v76
v_add_i32_e32 v76, vcc, v76, v83
v_subrev_f32_e32 v83, v70, v80
v_mul_f32_e32 v85, v71, v89
v_add_f32_e32 v80, v70, v80
v_mad_f32 v80, v80, v83, v85
v_cmp_gt_f32_e32 vcc, 0, v80
v_cndmask_b32_e64 v83, 0.5, -0.5, vcc
v_mac_f32_e32 v83, v86, v80
v_cvt_i32_f32_e32 v83, v83
v_madak_f32_e32 v71, v89, v71, 0x3f58560b
v_cvt_f32_i32_e32 v86, v83
v_lshlrev_b32_e32 v83, 23, v83
v_mad_f32 v90, v91, v86, v80
v_mad_f32 v91, v93, v86, v90
v_mul_f32_e32 v92, v91, v91
v_mac_f32_e32 v96, v97, v92
v_mac_f32_e32 v99, v96, v92
v_mac_f32_e32 v100, v99, v92
v_mac_f32_e32 v101, v100, v92
v_mad_f32 v92, -v92, v101, v91
v_mul_f32_e32 v91, v92, v91
v_sub_f32_e32 v92, 2.0, v92
v_cmp_gt_f32_e64 vcc, |v92|, v87
v_cndmask_b32_e32 v94, 1.0, v88, vcc
v_mul_f32_e32 v92, v94, v92
v_rcp_f32_e32 v92, v92
v_mul_f32_e32 v91, v92, v91
v_mul_f32_e32 v91, v91, v94
v_mad_f32 v86, -v86, v93, -v91
v_mov_b32_e32 v91, 0xc2aeac4f
v_cmp_ge_f32_e32 vcc, v84, v91
v_mov_b32_e32 v92, 0x42b17218
v_cndmask_b32_e32 v76, 0, v76, vcc
v_cmp_lt_f32_e32 vcc, v84, v92
v_mov_b32_e32 v93, 0x7f800000
v_cndmask_b32_e32 v76, v93, v76, vcc
v_cmp_u_f32_e32 vcc, v84, v84
v_cndmask_b32_e32 v76, v76, v84, vcc
v_subrev_f32_e32 v84, v90, v86
v_sub_f32_e32 v84, 1.0, v84
v_add_i32_e32 v83, vcc, v84, v83
v_cmp_ge_f32_e32 vcc, v80, v91
v_cndmask_b32_e32 v83, 0, v83, vcc
v_cmp_lt_f32_e64 s[10:11], v80, v92
v_cndmask_b32_e64 v83, v93, v83, s[10:11]
v_cmp_u_f32_e32 vcc, v80, v80
v_cndmask_b32_e32 v80, v83, v80, vcc
v_mul_f32_e32 v76, v80, v76
v_mov_b32_e32 v80, 0x40c00000
v_cmp_gt_f32_e64 s[10:11], v80, v70
v_mov_b32_e32 v80, 0x31800000
v_cmp_gt_f32_e64 vcc, |v70|, v87
v_cmp_gt_f32_e64 s[12:13], v80, v70
v_cndmask_b32_e32 v80, 1.0, v88, vcc
v_mul_f32_e32 v70, v80, v70
v_rcp_f32_e32 v70, v70
v_cmp_u_f32_e32 vcc, v45, v45
v_mul_f32_e32 v70, v70, v76
v_mad_f32 v70, -v80, v70, 1.0
v_cndmask_b32_e64 v70, 1.0, v70, s[10:11]
v_cndmask_b32_e64 v70, v70, v71, s[4:5]
v_and_b32_e32 v71, s51, v45
v_or_b32_e32 v70, v71, v70
v_mad_f32 v71, v85, v45, v45
v_cndmask_b32_e64 v70, v70, v71, s[8:9]
v_mul_f32_e32 v71, 0x3f8375d4, v45
v_mac_f32_e32 v71, 0x41000000, v45
v_mul_f32_e32 v71, 0x3e000000, v71
v_cndmask_b32_e64 v70, v70, v71, s[12:13]
v_cndmask_b32_e32 v45, v70, v45, vcc
v_subrev_f32_e32 v45, v45, v78
v_mul_f32_e32 v70, s19, v78
v_mad_f32 v45, v77, v45, -v70
v_mul_f32_e32 v70, v79, v81
v_mac_f32_e32 v5, v45, v51
v_mul_f32_e32 v45, v82, v70
v_mad_f32 v45, v51, v69, -v45
v_mul_f32_e32 v51, v69, v51
v_mad_f32 v51, v70, v82, -v51
v_mad_f32 v50, v51, -v35, v50
v_mad_f32 v49, v51, -v27, v49
v_mad_f32 v48, v51, -v31, v48
v_mul_f32_e64 v78, v45, -v35
v_mul_f32_e64 v71, v45, -v27
v_mul_f32_e64 v69, v45, -v31
BB6_98: ; %Flow1231
; in Loop: Header=BB6_11 Depth=1
s_or_b64 exec, exec, s[38:39]
BB6_99: ; in Loop: Header=BB6_11 Depth=1
s_or_b64 exec, exec, s[34:35]
v_lshrrev_b32_e32 v27, 17, v65
v_and_b32_e32 v27, 1, v27
v_cmp_eq_u32_e32 vcc, 1, v27
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[34:35], exec, s[4:5]
; mask branch BB6_103
s_cbranch_execz BB6_103
BB6_100: ; in Loop: Header=BB6_11 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[79:82], v55 offset0:16 offset1:17
s_and_b64 s[4:5], exec, s[0:1]
v_cmp_ne_u32_e32 vcc, v64, v23
s_xor_b64 s[4:5], s[4:5], -1
s_or_b64 s[4:5], s[4:5], vcc
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v31, v73, v80
s_and_b64 s[8:9], exec, s[2:3]
s_or_b64 s[4:5], s[8:9], s[4:5]
v_subrev_f32_e32 v27, v72, v79
v_mul_f32_e32 v45, v31, v31
v_cndmask_b32_e64 v51, 0, 1.0, s[4:5]
v_subrev_f32_e32 v35, v74, v81
v_mac_f32_e32 v45, v27, v27
v_mac_f32_e32 v45, v35, v35
v_mul_f32_e32 v51, s22, v51
v_cmp_lt_f32_e32 vcc, v45, v51
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[38:39], exec, s[4:5]
; mask branch BB6_102
s_cbranch_execz BB6_102
BB6_101: ; in Loop: Header=BB6_11 Depth=1
v_max_f32_e32 v45, 0x34cd15ae, v45
v_mul_f32_e32 v80, v9, v45
v_mul_f32_e32 v76, v80, v80
v_mov_b32_e32 v77, 0x3a92b707
v_mov_b32_e32 v81, 0x3c739487
v_madak_f32_e32 v77, v77, v76, 0x3ded3cb2
v_mul_f32_e32 v51, v75, v82
v_mad_f32 v82, v77, v76, 1.0
v_mov_b32_e32 v77, 0xb2951928
v_madak_f32_e32 v81, v81, v76, 0x3f01e2bc
v_madak_f32_e32 v77, v77, v76, 0xb85ffb93
v_rsq_f32_e32 v79, v45
v_mac_f32_e32 v82, v80, v81
v_mov_b32_e32 v81, 0x35c55945
v_madak_f32_e32 v81, v81, v76, 0x3a83ca0c
v_madak_f32_e32 v77, v77, v76, 0xbc9ded90
s_mov_b32 m0, -1
v_lshrrev_b32_e32 v70, 17, v19
v_madak_f32_e32 v81, v81, v76, 0x3d8eaf3b
v_madak_f32_e32 v83, v77, v76, 0xbf409397
ds_read_b64 v[76:77], v56 offset:64
v_and_b32_e32 v70, 1, v70
v_cmp_eq_u32_e32 vcc, 1, v70
v_mac_f32_e32 v83, v80, v81
v_mul_f32_e32 v80, v79, v79
v_cndmask_b32_e64 v70, 0, 1.0, vcc
v_mul_f32_e32 v81, v80, v80
v_mul_f32_e32 v81, v70, v81
v_mul_f32_e32 v84, v80, v81
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v77, v68, v77
v_mul_f32_e32 v85, v84, v77
v_mad_f32 v81, v81, v80, s23
v_mad_f32 v85, v76, v67, -v85
v_mad_f32 v86, v84, v84, s26
v_mul_f32_e32 v76, v67, v76
v_mul_f32_e32 v81, 0xbe2aaaab, v81
v_mul_f32_e32 v77, v86, v77
v_mul_f32_e32 v76, v76, v81
v_mac_f32_e32 v76, 0x3daaaaaa, v77
v_rcp_f32_e32 v77, v82
v_mul_f32_e32 v45, s18, v45
v_mac_f32_e32 v8, v70, v76
v_mul_f32_e32 v45, v79, v45
v_mul_f32_e32 v77, v37, v77
v_mul_f32_e32 v76, v70, v80
v_mul_f32_e32 v77, v83, v77
v_mac_f32_e32 v77, v79, v76
v_and_b32_e32 v76, s27, v45
v_mov_b32_e32 v81, 0x3fa00000
v_cmp_gt_f32_e64 s[4:5], v81, v76
v_mul_f32_e32 v81, v76, v76
v_rcp_f32_e32 v82, v81
v_add_f32_e32 v83, -1.0, v76
v_mov_b32_e32 v86, 0xbd777f97
v_mov_b32_e32 v87, 0x6f800000
v_cndmask_b32_e64 v82, v82, v83, s[4:5]
v_mov_b32_e32 v83, 0x3f580000
v_cmp_gt_f32_e64 s[8:9], v83, v76
v_cndmask_b32_e64 v81, v82, v81, s[8:9]
v_mov_b32_e32 v83, 0xc11d077e
v_mov_b32_e32 v82, 0x4036db6e
v_madak_f32_e32 v83, v83, v81, 0xc2a2932b
v_cmp_gt_f32_e32 vcc, v82, v76
v_mov_b32_e32 v82, 0xc3f1c275
v_madak_f32_e32 v82, v82, v81, 0xc480230b
v_madak_f32_e32 v83, v81, v83, 0xc3389ae7
v_madak_f32_e32 v82, v81, v82, 0xc41f6441
v_madak_f32_e32 v83, v81, v83, 0xc322658c
v_madak_f32_e32 v82, v81, v82, 0xc320a2ea
v_madak_f32_e32 v83, v81, v83, 0xc2798057
v_madak_f32_e32 v82, v81, v82, 0xc18e104b
v_madak_f32_e32 v83, v81, v83, 0xc128f022
v_madak_f32_e32 v82, v81, v82, 0xbf4c9dd4
v_madak_f32_e32 v83, v81, v83, 0xbf31a0b7
v_madak_f32_e32 v82, v81, v82, 0xbc21a092
v_madak_f32_e32 v83, v81, v83, 0xbc21a093
v_madak_f32_e32 v86, v86, v81, 0x40d23f7c
v_cndmask_b32_e32 v82, v82, v83, vcc
v_mov_b32_e32 v83, 0xc1b38712
v_madak_f32_e32 v83, v83, v81, 0x43ed43a7
v_madak_f32_e32 v86, v81, v86, 0x42d9451f
v_madak_f32_e32 v83, v81, v83, 0x451f90ce
v_madak_f32_e32 v86, v81, v86, 0x43d6810b
v_madak_f32_e32 v83, v81, v83, 0x4547fdbb
v_madak_f32_e32 v86, v81, v86, 0x442158c9
v_madak_f32_e32 v83, v81, v83, 0x44c01759
v_madak_f32_e32 v86, v81, v86, 0x43d9486f
v_madak_f32_e32 v83, v81, v83, 0x43a2e571
v_madak_f32_e32 v86, v81, v86, 0x4309a863
v_madak_f32_e32 v83, v81, v83, 0x41f2b459
v_madak_f32_e32 v86, v81, v86, 0x419d35ce
v_cndmask_b32_e32 v83, v83, v86, vcc
v_mov_b32_e32 v86, 0xbb0df9c0
v_madak_f32_e32 v86, v86, v81, 0x3d1151b3
v_madak_f32_e32 v86, v81, v86, 0xbde31cc2
v_madak_f32_e32 v86, v81, v86, 0x3ea2fe54
v_madak_f32_e32 v86, v81, v86, 0xbebe9208
v_madak_f32_e32 v86, v81, v86, 0x3ed46805
v_madak_f32_e32 v86, v81, v86, 0xbb1acdc6
v_cndmask_b32_e64 v82, v82, v86, s[4:5]
v_mov_b32_e32 v86, 0x3c445aa3
v_madak_f32_e32 v86, v86, v81, 0x3c5f6e13
v_madak_f32_e32 v86, v81, v86, 0x3e013307
v_madak_f32_e32 v86, v81, v86, 0x3d931ae7
v_madak_f32_e32 v86, v81, v86, 0x3f0a5785
v_madak_f32_e32 v86, v81, v86, 0x3dd9f331
v_cndmask_b32_e64 v83, v83, v86, s[4:5]
v_mov_b32_e32 v86, 0xb684e21a
v_madak_f32_e32 v86, v86, v81, 0x390aee49
v_madak_f32_e32 v86, v81, v86, 0x3ba68116
v_madak_f32_e32 v86, v81, v86, 0x3d852a63
v_madak_f32_e32 v86, v81, v86, 0x3ecbbbce
v_cndmask_b32_e64 v83, v83, v86, s[8:9]
v_mad_f32 v83, v81, v83, 1.0
v_and_b32_e32 v90, s50, v45
v_mov_b32_e32 v91, 0xbf100000
v_cmp_gt_f32_e64 vcc, |v83|, v87
v_mov_b32_e32 v88, 0x2f800000
v_mad_f32 v91, v90, -v90, v91
v_cndmask_b32_e32 v89, 1.0, v88, vcc
v_cmp_gt_f32_e32 vcc, 0, v91
v_cndmask_b32_e64 v92, 0.5, -0.5, vcc
v_mov_b32_e32 v93, 0x3fb8aa3b
v_mac_f32_e32 v92, v93, v91
v_mov_b32_e32 v86, 0xb7c756b1
v_cvt_i32_f32_e32 v92, v92
v_madak_f32_e32 v86, v86, v81, 0xbbbd1489
v_madak_f32_e32 v86, v81, v86, 0xbce9528f
v_madak_f32_e32 v86, v81, v86, 0xbea66beb
v_mul_f32_e32 v83, v89, v83
v_madak_f32_e32 v81, v81, v86, 0x3e0375d4
v_rcp_f32_e32 v83, v83
v_cvt_f32_i32_e32 v86, v92
v_cndmask_b32_e64 v81, v82, v81, s[8:9]
v_mov_b32_e32 v82, 0xbf317180
v_mul_f32_e32 v81, v83, v81
v_mad_f32 v83, v82, v86, v91
v_mov_b32_e32 v94, 0xb717f7d1
v_mad_f32 v95, v94, v86, v83
v_mul_f32_e32 v96, v95, v95
v_mov_b32_e32 v97, 0xb5ddea0e
v_mov_b32_e32 v98, 0x3331bb4c
v_mad_f32 v99, v98, v96, v97
v_mov_b32_e32 v100, 0x388ab355
v_mad_f32 v99, v99, v96, v100
v_mov_b32_e32 v101, 0xbb360b61
v_mad_f32 v99, v99, v96, v101
v_mov_b32_e32 v102, 0x3e2aaaab
v_mad_f32 v99, v99, v96, v102
v_mad_f32 v96, -v96, v99, v95
v_mul_f32_e32 v95, v96, v95
v_sub_f32_e32 v96, 2.0, v96
v_cmp_gt_f32_e64 vcc, |v96|, v87
v_cndmask_b32_e32 v99, 1.0, v88, vcc
v_mul_f32_e32 v96, v99, v96
v_rcp_f32_e32 v96, v96
v_mul_f32_e32 v95, v96, v95
v_mul_f32_e32 v95, v95, v99
v_mad_f32 v86, -v86, v94, -v95
v_subrev_f32_e32 v83, v83, v86
v_lshlrev_b32_e32 v86, 23, v92
v_sub_f32_e32 v83, 1.0, v83
v_add_i32_e32 v83, vcc, v83, v86
v_mov_b32_e32 v86, 0xc2aeac4f
v_cmp_ge_f32_e32 vcc, v91, v86
v_mov_b32_e32 v92, 0x42b17218
v_cndmask_b32_e32 v83, 0, v83, vcc
v_cmp_lt_f32_e32 vcc, v91, v92
v_mov_b32_e32 v95, 0x7f800000
v_cndmask_b32_e32 v83, v95, v83, vcc
v_cmp_u_f32_e32 vcc, v91, v91
v_cndmask_b32_e32 v83, v83, v91, vcc
v_subrev_f32_e32 v91, v76, v90
v_mul_f32_e32 v96, v81, v89
v_add_f32_e32 v90, v76, v90
v_mad_f32 v90, v90, v91, v96
v_cmp_gt_f32_e32 vcc, 0, v90
v_cndmask_b32_e64 v91, 0.5, -0.5, vcc
v_mac_f32_e32 v91, v93, v90
v_cvt_i32_f32_e32 v91, v91
v_madak_f32_e32 v81, v89, v81, 0x3f58560b
v_cvt_f32_i32_e32 v93, v91
v_lshlrev_b32_e32 v91, 23, v91
v_mad_f32 v82, v82, v93, v90
v_mad_f32 v99, v94, v93, v82
v_mul_f32_e32 v103, v99, v99
v_mac_f32_e32 v97, v98, v103
v_mac_f32_e32 v100, v97, v103
v_mac_f32_e32 v101, v100, v103
v_mac_f32_e32 v102, v101, v103
v_mad_f32 v97, -v103, v102, v99
v_mul_f32_e32 v98, v97, v99
v_sub_f32_e32 v97, 2.0, v97
v_cmp_gt_f32_e64 vcc, |v97|, v87
v_cndmask_b32_e32 v99, 1.0, v88, vcc
v_mul_f32_e32 v97, v99, v97
v_rcp_f32_e32 v97, v97
v_mul_f32_e32 v97, v97, v98
v_mul_f32_e32 v97, v97, v99
v_mad_f32 v93, -v93, v94, -v97
v_subrev_f32_e32 v82, v82, v93
v_sub_f32_e32 v82, 1.0, v82
v_add_i32_e32 v82, vcc, v82, v91
v_cmp_ge_f32_e32 vcc, v90, v86
v_cndmask_b32_e32 v82, 0, v82, vcc
v_cmp_lt_f32_e32 vcc, v90, v92
v_cndmask_b32_e32 v82, v95, v82, vcc
v_cmp_u_f32_e32 vcc, v90, v90
v_cndmask_b32_e32 v82, v82, v90, vcc
v_mul_f32_e32 v82, v82, v83
v_mov_b32_e32 v83, 0x40c00000
v_cmp_gt_f32_e64 s[10:11], v83, v76
v_mov_b32_e32 v83, 0x31800000
v_cmp_gt_f32_e64 vcc, |v76|, v87
v_cmp_gt_f32_e64 s[12:13], v83, v76
v_cndmask_b32_e32 v83, 1.0, v88, vcc
v_mul_f32_e32 v76, v83, v76
v_rcp_f32_e32 v76, v76
v_cmp_u_f32_e32 vcc, v45, v45
v_mul_f32_e32 v76, v76, v82
v_mad_f32 v76, -v83, v76, 1.0
v_cndmask_b32_e64 v76, 1.0, v76, s[10:11]
v_cndmask_b32_e64 v76, v76, v81, s[4:5]
v_and_b32_e32 v81, s51, v45
v_or_b32_e32 v76, v81, v76
v_mad_f32 v81, v96, v45, v45
v_cndmask_b32_e64 v76, v76, v81, s[8:9]
v_mul_f32_e32 v81, 0x3f8375d4, v45
v_mac_f32_e32 v81, 0x41000000, v45
v_mul_f32_e32 v81, 0x3e000000, v81
v_cndmask_b32_e64 v76, v76, v81, s[12:13]
v_cndmask_b32_e32 v45, v76, v45, vcc
v_subrev_f32_e32 v45, v45, v70
v_mul_f32_e32 v70, s19, v70
v_mad_f32 v45, v79, v45, -v70
v_mul_f32_e32 v70, v80, v84
v_mac_f32_e32 v5, v45, v51
v_mul_f32_e32 v45, v85, v70
v_mad_f32 v45, v51, v77, -v45
v_mul_f32_e32 v51, v77, v51
v_mad_f32 v51, v70, v85, -v51
v_mad_f32 v78, v35, v51, v78
v_mad_f32 v71, v31, v51, v71
v_mac_f32_e32 v69, v27, v51
v_mad_f32 v44, v35, v45, v44
v_mad_f32 v43, v31, v45, v43
v_mac_f32_e32 v42, v27, v45
BB6_102: ; %Flow1230
; in Loop: Header=BB6_11 Depth=1
s_or_b64 exec, exec, s[38:39]
BB6_103: ; in Loop: Header=BB6_11 Depth=1
s_or_b64 exec, exec, s[34:35]
v_lshrrev_b32_e32 v27, 18, v65
v_and_b32_e32 v27, 1, v27
v_cmp_eq_u32_e32 vcc, 1, v27
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[34:35], exec, s[4:5]
; mask branch BB6_107
s_cbranch_execz BB6_107
BB6_104: ; in Loop: Header=BB6_11 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[79:82], v55 offset0:32 offset1:33
s_and_b64 s[4:5], exec, s[0:1]
v_cmp_ne_u32_e32 vcc, v63, v23
s_xor_b64 s[4:5], s[4:5], -1
s_or_b64 s[4:5], s[4:5], vcc
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v31, v73, v80
s_and_b64 s[8:9], exec, s[2:3]
s_or_b64 s[4:5], s[8:9], s[4:5]
v_subrev_f32_e32 v27, v72, v79
v_mul_f32_e32 v45, v31, v31
v_cndmask_b32_e64 v51, 0, 1.0, s[4:5]
v_subrev_f32_e32 v35, v74, v81
v_mac_f32_e32 v45, v27, v27
v_mac_f32_e32 v45, v35, v35
v_mul_f32_e32 v51, s22, v51
v_cmp_lt_f32_e32 vcc, v45, v51
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[38:39], exec, s[4:5]
; mask branch BB6_106
s_cbranch_execz BB6_106
BB6_105: ; in Loop: Header=BB6_11 Depth=1
v_max_f32_e32 v45, 0x34cd15ae, v45
v_mul_f32_e32 v80, v9, v45
v_mul_f32_e32 v76, v80, v80
v_mov_b32_e32 v77, 0x3a92b707
v_mov_b32_e32 v81, 0x3c739487
v_madak_f32_e32 v77, v77, v76, 0x3ded3cb2
v_mul_f32_e32 v51, v75, v82
v_mad_f32 v82, v77, v76, 1.0
v_mov_b32_e32 v77, 0xb2951928
v_madak_f32_e32 v81, v81, v76, 0x3f01e2bc
v_madak_f32_e32 v77, v77, v76, 0xb85ffb93
v_rsq_f32_e32 v79, v45
v_mac_f32_e32 v82, v80, v81
v_mov_b32_e32 v81, 0x35c55945
v_madak_f32_e32 v81, v81, v76, 0x3a83ca0c
v_madak_f32_e32 v77, v77, v76, 0xbc9ded90
s_mov_b32 m0, -1
v_lshrrev_b32_e32 v70, 18, v19
v_madak_f32_e32 v81, v81, v76, 0x3d8eaf3b
v_madak_f32_e32 v83, v77, v76, 0xbf409397
ds_read_b64 v[76:77], v56 offset:128
v_and_b32_e32 v70, 1, v70
v_cmp_eq_u32_e32 vcc, 1, v70
v_mac_f32_e32 v83, v80, v81
v_mul_f32_e32 v80, v79, v79
v_cndmask_b32_e64 v70, 0, 1.0, vcc
v_mul_f32_e32 v81, v80, v80
v_mul_f32_e32 v81, v70, v81
v_mul_f32_e32 v84, v80, v81
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v77, v68, v77
v_mul_f32_e32 v85, v84, v77
v_mad_f32 v81, v81, v80, s23
v_mad_f32 v85, v76, v67, -v85
v_mad_f32 v86, v84, v84, s26
v_mul_f32_e32 v76, v67, v76
v_mul_f32_e32 v81, 0xbe2aaaab, v81
v_mul_f32_e32 v77, v86, v77
v_mul_f32_e32 v76, v76, v81
v_mac_f32_e32 v76, 0x3daaaaaa, v77
v_rcp_f32_e32 v77, v82
v_mul_f32_e32 v45, s18, v45
v_mac_f32_e32 v8, v70, v76
v_mul_f32_e32 v45, v79, v45
v_mul_f32_e32 v77, v37, v77
v_mul_f32_e32 v76, v70, v80
v_mul_f32_e32 v77, v83, v77
v_mac_f32_e32 v77, v79, v76
v_and_b32_e32 v76, s27, v45
v_mov_b32_e32 v81, 0x3fa00000
v_cmp_gt_f32_e64 s[4:5], v81, v76
v_mul_f32_e32 v81, v76, v76
v_rcp_f32_e32 v82, v81
v_add_f32_e32 v83, -1.0, v76
v_mov_b32_e32 v86, 0xbd777f97
v_mov_b32_e32 v87, 0x6f800000
v_cndmask_b32_e64 v82, v82, v83, s[4:5]
v_mov_b32_e32 v83, 0x3f580000
v_cmp_gt_f32_e64 s[8:9], v83, v76
v_cndmask_b32_e64 v81, v82, v81, s[8:9]
v_mov_b32_e32 v83, 0xc11d077e
v_mov_b32_e32 v82, 0x4036db6e
v_madak_f32_e32 v83, v83, v81, 0xc2a2932b
v_cmp_gt_f32_e32 vcc, v82, v76
v_mov_b32_e32 v82, 0xc3f1c275
v_madak_f32_e32 v82, v82, v81, 0xc480230b
v_madak_f32_e32 v83, v81, v83, 0xc3389ae7
v_madak_f32_e32 v82, v81, v82, 0xc41f6441
v_madak_f32_e32 v83, v81, v83, 0xc322658c
v_madak_f32_e32 v82, v81, v82, 0xc320a2ea
v_madak_f32_e32 v83, v81, v83, 0xc2798057
v_madak_f32_e32 v82, v81, v82, 0xc18e104b
v_madak_f32_e32 v83, v81, v83, 0xc128f022
v_madak_f32_e32 v82, v81, v82, 0xbf4c9dd4
v_madak_f32_e32 v83, v81, v83, 0xbf31a0b7
v_madak_f32_e32 v82, v81, v82, 0xbc21a092
v_madak_f32_e32 v83, v81, v83, 0xbc21a093
v_madak_f32_e32 v86, v86, v81, 0x40d23f7c
v_cndmask_b32_e32 v82, v82, v83, vcc
v_mov_b32_e32 v83, 0xc1b38712
v_madak_f32_e32 v83, v83, v81, 0x43ed43a7
v_madak_f32_e32 v86, v81, v86, 0x42d9451f
v_madak_f32_e32 v83, v81, v83, 0x451f90ce
v_madak_f32_e32 v86, v81, v86, 0x43d6810b
v_madak_f32_e32 v83, v81, v83, 0x4547fdbb
v_madak_f32_e32 v86, v81, v86, 0x442158c9
v_madak_f32_e32 v83, v81, v83, 0x44c01759
v_madak_f32_e32 v86, v81, v86, 0x43d9486f
v_madak_f32_e32 v83, v81, v83, 0x43a2e571
v_madak_f32_e32 v86, v81, v86, 0x4309a863
v_madak_f32_e32 v83, v81, v83, 0x41f2b459
v_madak_f32_e32 v86, v81, v86, 0x419d35ce
v_cndmask_b32_e32 v83, v83, v86, vcc
v_mov_b32_e32 v86, 0xbb0df9c0
v_madak_f32_e32 v86, v86, v81, 0x3d1151b3
v_madak_f32_e32 v86, v81, v86, 0xbde31cc2
v_madak_f32_e32 v86, v81, v86, 0x3ea2fe54
v_madak_f32_e32 v86, v81, v86, 0xbebe9208
v_madak_f32_e32 v86, v81, v86, 0x3ed46805
v_madak_f32_e32 v86, v81, v86, 0xbb1acdc6
v_cndmask_b32_e64 v82, v82, v86, s[4:5]
v_mov_b32_e32 v86, 0x3c445aa3
v_madak_f32_e32 v86, v86, v81, 0x3c5f6e13
v_madak_f32_e32 v86, v81, v86, 0x3e013307
v_madak_f32_e32 v86, v81, v86, 0x3d931ae7
v_madak_f32_e32 v86, v81, v86, 0x3f0a5785
v_madak_f32_e32 v86, v81, v86, 0x3dd9f331
v_cndmask_b32_e64 v83, v83, v86, s[4:5]
v_mov_b32_e32 v86, 0xb684e21a
v_madak_f32_e32 v86, v86, v81, 0x390aee49
v_madak_f32_e32 v86, v81, v86, 0x3ba68116
v_madak_f32_e32 v86, v81, v86, 0x3d852a63
v_madak_f32_e32 v86, v81, v86, 0x3ecbbbce
v_cndmask_b32_e64 v83, v83, v86, s[8:9]
v_mad_f32 v83, v81, v83, 1.0
v_and_b32_e32 v90, s50, v45
v_mov_b32_e32 v91, 0xbf100000
v_cmp_gt_f32_e64 vcc, |v83|, v87
v_mov_b32_e32 v88, 0x2f800000
v_mad_f32 v91, v90, -v90, v91
v_cndmask_b32_e32 v89, 1.0, v88, vcc
v_cmp_gt_f32_e32 vcc, 0, v91
v_cndmask_b32_e64 v92, 0.5, -0.5, vcc
v_mov_b32_e32 v93, 0x3fb8aa3b
v_mac_f32_e32 v92, v93, v91
v_mov_b32_e32 v86, 0xb7c756b1
v_cvt_i32_f32_e32 v92, v92
v_madak_f32_e32 v86, v86, v81, 0xbbbd1489
v_madak_f32_e32 v86, v81, v86, 0xbce9528f
v_madak_f32_e32 v86, v81, v86, 0xbea66beb
v_mul_f32_e32 v83, v89, v83
v_madak_f32_e32 v81, v81, v86, 0x3e0375d4
v_rcp_f32_e32 v83, v83
v_cvt_f32_i32_e32 v86, v92
v_cndmask_b32_e64 v81, v82, v81, s[8:9]
v_mov_b32_e32 v82, 0xbf317180
v_mul_f32_e32 v81, v83, v81
v_mad_f32 v83, v82, v86, v91
v_mov_b32_e32 v94, 0xb717f7d1
v_mad_f32 v95, v94, v86, v83
v_mul_f32_e32 v96, v95, v95
v_mov_b32_e32 v97, 0xb5ddea0e
v_mov_b32_e32 v98, 0x3331bb4c
v_mad_f32 v99, v98, v96, v97
v_mov_b32_e32 v100, 0x388ab355
v_mad_f32 v99, v99, v96, v100
v_mov_b32_e32 v101, 0xbb360b61
v_mad_f32 v99, v99, v96, v101
v_mov_b32_e32 v102, 0x3e2aaaab
v_mad_f32 v99, v99, v96, v102
v_mad_f32 v96, -v96, v99, v95
v_mul_f32_e32 v95, v96, v95
v_sub_f32_e32 v96, 2.0, v96
v_cmp_gt_f32_e64 vcc, |v96|, v87
v_cndmask_b32_e32 v99, 1.0, v88, vcc
v_mul_f32_e32 v96, v99, v96
v_rcp_f32_e32 v96, v96
v_mul_f32_e32 v95, v96, v95
v_mul_f32_e32 v95, v95, v99
v_mad_f32 v86, -v86, v94, -v95
v_subrev_f32_e32 v83, v83, v86
v_lshlrev_b32_e32 v86, 23, v92
v_sub_f32_e32 v83, 1.0, v83
v_add_i32_e32 v83, vcc, v83, v86
v_mov_b32_e32 v86, 0xc2aeac4f
v_cmp_ge_f32_e32 vcc, v91, v86
v_mov_b32_e32 v92, 0x42b17218
v_cndmask_b32_e32 v83, 0, v83, vcc
v_cmp_lt_f32_e32 vcc, v91, v92
v_mov_b32_e32 v95, 0x7f800000
v_cndmask_b32_e32 v83, v95, v83, vcc
v_cmp_u_f32_e32 vcc, v91, v91
v_cndmask_b32_e32 v83, v83, v91, vcc
v_subrev_f32_e32 v91, v76, v90
v_mul_f32_e32 v96, v81, v89
v_add_f32_e32 v90, v76, v90
v_mad_f32 v90, v90, v91, v96
v_cmp_gt_f32_e32 vcc, 0, v90
v_cndmask_b32_e64 v91, 0.5, -0.5, vcc
v_mac_f32_e32 v91, v93, v90
v_cvt_i32_f32_e32 v91, v91
v_madak_f32_e32 v81, v89, v81, 0x3f58560b
v_cvt_f32_i32_e32 v93, v91
v_lshlrev_b32_e32 v91, 23, v91
v_mad_f32 v82, v82, v93, v90
v_mad_f32 v99, v94, v93, v82
v_mul_f32_e32 v103, v99, v99
v_mac_f32_e32 v97, v98, v103
v_mac_f32_e32 v100, v97, v103
v_mac_f32_e32 v101, v100, v103
v_mac_f32_e32 v102, v101, v103
v_mad_f32 v97, -v103, v102, v99
v_mul_f32_e32 v98, v97, v99
v_sub_f32_e32 v97, 2.0, v97
v_cmp_gt_f32_e64 vcc, |v97|, v87
v_cndmask_b32_e32 v99, 1.0, v88, vcc
v_mul_f32_e32 v97, v99, v97
v_rcp_f32_e32 v97, v97
v_mul_f32_e32 v97, v97, v98
v_mul_f32_e32 v97, v97, v99
v_mad_f32 v93, -v93, v94, -v97
v_subrev_f32_e32 v82, v82, v93
v_sub_f32_e32 v82, 1.0, v82
v_add_i32_e32 v82, vcc, v82, v91
v_cmp_ge_f32_e32 vcc, v90, v86
v_cndmask_b32_e32 v82, 0, v82, vcc
v_cmp_lt_f32_e32 vcc, v90, v92
v_cndmask_b32_e32 v82, v95, v82, vcc
v_cmp_u_f32_e32 vcc, v90, v90
v_cndmask_b32_e32 v82, v82, v90, vcc
v_mul_f32_e32 v82, v82, v83
v_mov_b32_e32 v83, 0x40c00000
v_cmp_gt_f32_e64 s[10:11], v83, v76
v_mov_b32_e32 v83, 0x31800000
v_cmp_gt_f32_e64 vcc, |v76|, v87
v_cmp_gt_f32_e64 s[12:13], v83, v76
v_cndmask_b32_e32 v83, 1.0, v88, vcc
v_mul_f32_e32 v76, v83, v76
v_rcp_f32_e32 v76, v76
v_cmp_u_f32_e32 vcc, v45, v45
v_mul_f32_e32 v76, v76, v82
v_mad_f32 v76, -v83, v76, 1.0
v_cndmask_b32_e64 v76, 1.0, v76, s[10:11]
v_cndmask_b32_e64 v76, v76, v81, s[4:5]
v_and_b32_e32 v81, s51, v45
v_or_b32_e32 v76, v81, v76
v_mad_f32 v81, v96, v45, v45
v_cndmask_b32_e64 v76, v76, v81, s[8:9]
v_mul_f32_e32 v81, 0x3f8375d4, v45
v_mac_f32_e32 v81, 0x41000000, v45
v_mul_f32_e32 v81, 0x3e000000, v81
v_cndmask_b32_e64 v76, v76, v81, s[12:13]
v_cndmask_b32_e32 v45, v76, v45, vcc
v_subrev_f32_e32 v45, v45, v70
v_mul_f32_e32 v70, s19, v70
v_mad_f32 v45, v79, v45, -v70
v_mul_f32_e32 v70, v80, v84
v_mac_f32_e32 v5, v45, v51
v_mul_f32_e32 v45, v85, v70
v_mad_f32 v45, v51, v77, -v45
v_mul_f32_e32 v51, v77, v51
v_mad_f32 v51, v70, v85, -v51
v_mad_f32 v78, v35, v51, v78
v_mad_f32 v71, v31, v51, v71
v_mac_f32_e32 v69, v27, v51
v_mad_f32 v34, v35, v45, v34
v_mad_f32 v33, v31, v45, v33
v_mac_f32_e32 v32, v27, v45
BB6_106: ; %Flow1229
; in Loop: Header=BB6_11 Depth=1
s_or_b64 exec, exec, s[38:39]
BB6_107: ; in Loop: Header=BB6_11 Depth=1
s_or_b64 exec, exec, s[34:35]
v_lshrrev_b32_e32 v27, 19, v65
v_and_b32_e32 v27, 1, v27
v_cmp_eq_u32_e32 vcc, 1, v27
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[34:35], exec, s[4:5]
; mask branch BB6_111
s_cbranch_execz BB6_111
BB6_108: ; in Loop: Header=BB6_11 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[79:82], v55 offset0:48 offset1:49
s_and_b64 s[4:5], exec, s[0:1]
v_cmp_ne_u32_e32 vcc, v62, v23
s_xor_b64 s[4:5], s[4:5], -1
s_or_b64 s[4:5], s[4:5], vcc
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v31, v73, v80
s_and_b64 s[8:9], exec, s[2:3]
s_or_b64 s[4:5], s[8:9], s[4:5]
v_subrev_f32_e32 v27, v72, v79
v_mul_f32_e32 v45, v31, v31
v_cndmask_b32_e64 v51, 0, 1.0, s[4:5]
v_subrev_f32_e32 v35, v74, v81
v_mac_f32_e32 v45, v27, v27
v_mac_f32_e32 v45, v35, v35
v_mul_f32_e32 v51, s22, v51
v_cmp_lt_f32_e32 vcc, v45, v51
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[38:39], exec, s[4:5]
; mask branch BB6_110
s_cbranch_execz BB6_110
BB6_109: ; in Loop: Header=BB6_11 Depth=1
v_max_f32_e32 v45, 0x34cd15ae, v45
v_mul_f32_e32 v80, v9, v45
v_mul_f32_e32 v76, v80, v80
v_mov_b32_e32 v77, 0x3a92b707
v_mov_b32_e32 v81, 0x3c739487
v_madak_f32_e32 v77, v77, v76, 0x3ded3cb2
v_mul_f32_e32 v51, v75, v82
v_mad_f32 v82, v77, v76, 1.0
v_mov_b32_e32 v77, 0xb2951928
v_madak_f32_e32 v81, v81, v76, 0x3f01e2bc
v_madak_f32_e32 v77, v77, v76, 0xb85ffb93
v_rsq_f32_e32 v79, v45
v_mac_f32_e32 v82, v80, v81
v_mov_b32_e32 v81, 0x35c55945
v_madak_f32_e32 v81, v81, v76, 0x3a83ca0c
v_madak_f32_e32 v77, v77, v76, 0xbc9ded90
s_mov_b32 m0, -1
v_lshrrev_b32_e32 v70, 19, v19
v_madak_f32_e32 v81, v81, v76, 0x3d8eaf3b
v_madak_f32_e32 v83, v77, v76, 0xbf409397
ds_read_b64 v[76:77], v56 offset:192
v_and_b32_e32 v70, 1, v70
v_cmp_eq_u32_e32 vcc, 1, v70
v_mac_f32_e32 v83, v80, v81
v_mul_f32_e32 v80, v79, v79
v_cndmask_b32_e64 v70, 0, 1.0, vcc
v_mul_f32_e32 v81, v80, v80
v_mul_f32_e32 v81, v70, v81
v_mul_f32_e32 v84, v80, v81
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v77, v68, v77
v_mul_f32_e32 v85, v84, v77
v_mad_f32 v81, v81, v80, s23
v_mad_f32 v85, v76, v67, -v85
v_mad_f32 v86, v84, v84, s26
v_mul_f32_e32 v76, v67, v76
v_mul_f32_e32 v81, 0xbe2aaaab, v81
v_mul_f32_e32 v77, v86, v77
v_mul_f32_e32 v76, v76, v81
v_mac_f32_e32 v76, 0x3daaaaaa, v77
v_rcp_f32_e32 v77, v82
v_mul_f32_e32 v45, s18, v45
v_mac_f32_e32 v8, v70, v76
v_mul_f32_e32 v45, v79, v45
v_mul_f32_e32 v77, v37, v77
v_mul_f32_e32 v76, v70, v80
v_mul_f32_e32 v77, v83, v77
v_mac_f32_e32 v77, v79, v76
v_and_b32_e32 v76, s27, v45
v_mov_b32_e32 v81, 0x3fa00000
v_cmp_gt_f32_e64 s[4:5], v81, v76
v_mul_f32_e32 v81, v76, v76
v_rcp_f32_e32 v82, v81
v_add_f32_e32 v83, -1.0, v76
v_mov_b32_e32 v86, 0xbd777f97
v_mov_b32_e32 v87, 0x6f800000
v_cndmask_b32_e64 v82, v82, v83, s[4:5]
v_mov_b32_e32 v83, 0x3f580000
v_cmp_gt_f32_e64 s[8:9], v83, v76
v_cndmask_b32_e64 v81, v82, v81, s[8:9]
v_mov_b32_e32 v83, 0xc11d077e
v_mov_b32_e32 v82, 0x4036db6e
v_madak_f32_e32 v83, v83, v81, 0xc2a2932b
v_cmp_gt_f32_e32 vcc, v82, v76
v_mov_b32_e32 v82, 0xc3f1c275
v_madak_f32_e32 v82, v82, v81, 0xc480230b
v_madak_f32_e32 v83, v81, v83, 0xc3389ae7
v_madak_f32_e32 v82, v81, v82, 0xc41f6441
v_madak_f32_e32 v83, v81, v83, 0xc322658c
v_madak_f32_e32 v82, v81, v82, 0xc320a2ea
v_madak_f32_e32 v83, v81, v83, 0xc2798057
v_madak_f32_e32 v82, v81, v82, 0xc18e104b
v_madak_f32_e32 v83, v81, v83, 0xc128f022
v_madak_f32_e32 v82, v81, v82, 0xbf4c9dd4
v_madak_f32_e32 v83, v81, v83, 0xbf31a0b7
v_madak_f32_e32 v82, v81, v82, 0xbc21a092
v_madak_f32_e32 v83, v81, v83, 0xbc21a093
v_madak_f32_e32 v86, v86, v81, 0x40d23f7c
v_cndmask_b32_e32 v82, v82, v83, vcc
v_mov_b32_e32 v83, 0xc1b38712
v_madak_f32_e32 v83, v83, v81, 0x43ed43a7
v_madak_f32_e32 v86, v81, v86, 0x42d9451f
v_madak_f32_e32 v83, v81, v83, 0x451f90ce
v_madak_f32_e32 v86, v81, v86, 0x43d6810b
v_madak_f32_e32 v83, v81, v83, 0x4547fdbb
v_madak_f32_e32 v86, v81, v86, 0x442158c9
v_madak_f32_e32 v83, v81, v83, 0x44c01759
v_madak_f32_e32 v86, v81, v86, 0x43d9486f
v_madak_f32_e32 v83, v81, v83, 0x43a2e571
v_madak_f32_e32 v86, v81, v86, 0x4309a863
v_madak_f32_e32 v83, v81, v83, 0x41f2b459
v_madak_f32_e32 v86, v81, v86, 0x419d35ce
v_cndmask_b32_e32 v83, v83, v86, vcc
v_mov_b32_e32 v86, 0xbb0df9c0
v_madak_f32_e32 v86, v86, v81, 0x3d1151b3
v_madak_f32_e32 v86, v81, v86, 0xbde31cc2
v_madak_f32_e32 v86, v81, v86, 0x3ea2fe54
v_madak_f32_e32 v86, v81, v86, 0xbebe9208
v_madak_f32_e32 v86, v81, v86, 0x3ed46805
v_madak_f32_e32 v86, v81, v86, 0xbb1acdc6
v_cndmask_b32_e64 v82, v82, v86, s[4:5]
v_mov_b32_e32 v86, 0x3c445aa3
v_madak_f32_e32 v86, v86, v81, 0x3c5f6e13
v_madak_f32_e32 v86, v81, v86, 0x3e013307
v_madak_f32_e32 v86, v81, v86, 0x3d931ae7
v_madak_f32_e32 v86, v81, v86, 0x3f0a5785
v_madak_f32_e32 v86, v81, v86, 0x3dd9f331
v_cndmask_b32_e64 v83, v83, v86, s[4:5]
v_mov_b32_e32 v86, 0xb684e21a
v_madak_f32_e32 v86, v86, v81, 0x390aee49
v_madak_f32_e32 v86, v81, v86, 0x3ba68116
v_madak_f32_e32 v86, v81, v86, 0x3d852a63
v_madak_f32_e32 v86, v81, v86, 0x3ecbbbce
v_cndmask_b32_e64 v83, v83, v86, s[8:9]
v_mad_f32 v83, v81, v83, 1.0
v_and_b32_e32 v90, s50, v45
v_mov_b32_e32 v91, 0xbf100000
v_cmp_gt_f32_e64 vcc, |v83|, v87
v_mov_b32_e32 v88, 0x2f800000
v_mad_f32 v91, v90, -v90, v91
v_cndmask_b32_e32 v89, 1.0, v88, vcc
v_cmp_gt_f32_e32 vcc, 0, v91
v_cndmask_b32_e64 v92, 0.5, -0.5, vcc
v_mov_b32_e32 v93, 0x3fb8aa3b
v_mac_f32_e32 v92, v93, v91
v_mov_b32_e32 v86, 0xb7c756b1
v_cvt_i32_f32_e32 v92, v92
v_madak_f32_e32 v86, v86, v81, 0xbbbd1489
v_madak_f32_e32 v86, v81, v86, 0xbce9528f
v_madak_f32_e32 v86, v81, v86, 0xbea66beb
v_mul_f32_e32 v83, v89, v83
v_madak_f32_e32 v81, v81, v86, 0x3e0375d4
v_rcp_f32_e32 v83, v83
v_cvt_f32_i32_e32 v86, v92
v_cndmask_b32_e64 v81, v82, v81, s[8:9]
v_mov_b32_e32 v82, 0xbf317180
v_mul_f32_e32 v81, v83, v81
v_mad_f32 v83, v82, v86, v91
v_mov_b32_e32 v94, 0xb717f7d1
v_mad_f32 v95, v94, v86, v83
v_mul_f32_e32 v96, v95, v95
v_mov_b32_e32 v97, 0xb5ddea0e
v_mov_b32_e32 v98, 0x3331bb4c
v_mad_f32 v99, v98, v96, v97
v_mov_b32_e32 v100, 0x388ab355
v_mad_f32 v99, v99, v96, v100
v_mov_b32_e32 v101, 0xbb360b61
v_mad_f32 v99, v99, v96, v101
v_mov_b32_e32 v102, 0x3e2aaaab
v_mad_f32 v99, v99, v96, v102
v_mad_f32 v96, -v96, v99, v95
v_mul_f32_e32 v95, v96, v95
v_sub_f32_e32 v96, 2.0, v96
v_cmp_gt_f32_e64 vcc, |v96|, v87
v_cndmask_b32_e32 v99, 1.0, v88, vcc
v_mul_f32_e32 v96, v99, v96
v_rcp_f32_e32 v96, v96
v_mul_f32_e32 v95, v96, v95
v_mul_f32_e32 v95, v95, v99
v_mad_f32 v86, -v86, v94, -v95
v_subrev_f32_e32 v83, v83, v86
v_lshlrev_b32_e32 v86, 23, v92
v_sub_f32_e32 v83, 1.0, v83
v_add_i32_e32 v83, vcc, v83, v86
v_mov_b32_e32 v86, 0xc2aeac4f
v_cmp_ge_f32_e32 vcc, v91, v86
v_mov_b32_e32 v92, 0x42b17218
v_cndmask_b32_e32 v83, 0, v83, vcc
v_cmp_lt_f32_e32 vcc, v91, v92
v_mov_b32_e32 v95, 0x7f800000
v_cndmask_b32_e32 v83, v95, v83, vcc
v_cmp_u_f32_e32 vcc, v91, v91
v_cndmask_b32_e32 v83, v83, v91, vcc
v_subrev_f32_e32 v91, v76, v90
v_mul_f32_e32 v96, v81, v89
v_add_f32_e32 v90, v76, v90
v_mad_f32 v90, v90, v91, v96
v_cmp_gt_f32_e32 vcc, 0, v90
v_cndmask_b32_e64 v91, 0.5, -0.5, vcc
v_mac_f32_e32 v91, v93, v90
v_cvt_i32_f32_e32 v91, v91
v_madak_f32_e32 v81, v89, v81, 0x3f58560b
v_cvt_f32_i32_e32 v93, v91
v_lshlrev_b32_e32 v91, 23, v91
v_mad_f32 v82, v82, v93, v90
v_mad_f32 v99, v94, v93, v82
v_mul_f32_e32 v103, v99, v99
v_mac_f32_e32 v97, v98, v103
v_mac_f32_e32 v100, v97, v103
v_mac_f32_e32 v101, v100, v103
v_mac_f32_e32 v102, v101, v103
v_mad_f32 v97, -v103, v102, v99
v_mul_f32_e32 v98, v97, v99
v_sub_f32_e32 v97, 2.0, v97
v_cmp_gt_f32_e64 vcc, |v97|, v87
v_cndmask_b32_e32 v99, 1.0, v88, vcc
v_mul_f32_e32 v97, v99, v97
v_rcp_f32_e32 v97, v97
v_mul_f32_e32 v97, v97, v98
v_mul_f32_e32 v97, v97, v99
v_mad_f32 v93, -v93, v94, -v97
v_subrev_f32_e32 v82, v82, v93
v_sub_f32_e32 v82, 1.0, v82
v_add_i32_e32 v82, vcc, v82, v91
v_cmp_ge_f32_e32 vcc, v90, v86
v_cndmask_b32_e32 v82, 0, v82, vcc
v_cmp_lt_f32_e32 vcc, v90, v92
v_cndmask_b32_e32 v82, v95, v82, vcc
v_cmp_u_f32_e32 vcc, v90, v90
v_cndmask_b32_e32 v82, v82, v90, vcc
v_mul_f32_e32 v82, v82, v83
v_mov_b32_e32 v83, 0x40c00000
v_cmp_gt_f32_e64 s[10:11], v83, v76
v_mov_b32_e32 v83, 0x31800000
v_cmp_gt_f32_e64 vcc, |v76|, v87
v_cmp_gt_f32_e64 s[12:13], v83, v76
v_cndmask_b32_e32 v83, 1.0, v88, vcc
v_mul_f32_e32 v76, v83, v76
v_rcp_f32_e32 v76, v76
v_cmp_u_f32_e32 vcc, v45, v45
v_mul_f32_e32 v76, v76, v82
v_mad_f32 v76, -v83, v76, 1.0
v_cndmask_b32_e64 v76, 1.0, v76, s[10:11]
v_cndmask_b32_e64 v76, v76, v81, s[4:5]
v_and_b32_e32 v81, s51, v45
v_or_b32_e32 v76, v81, v76
v_mad_f32 v81, v96, v45, v45
v_cndmask_b32_e64 v76, v76, v81, s[8:9]
v_mul_f32_e32 v81, 0x3f8375d4, v45
v_mac_f32_e32 v81, 0x41000000, v45
v_mul_f32_e32 v81, 0x3e000000, v81
v_cndmask_b32_e64 v76, v76, v81, s[12:13]
v_cndmask_b32_e32 v45, v76, v45, vcc
v_subrev_f32_e32 v45, v45, v70
v_mul_f32_e32 v70, s19, v70
v_mad_f32 v45, v79, v45, -v70
v_mul_f32_e32 v70, v80, v84
v_mac_f32_e32 v5, v45, v51
v_mul_f32_e32 v45, v85, v70
v_mad_f32 v45, v51, v77, -v45
v_mul_f32_e32 v51, v77, v51
v_mad_f32 v51, v70, v85, -v51
v_mad_f32 v78, v35, v51, v78
v_mad_f32 v71, v31, v51, v71
v_mac_f32_e32 v69, v27, v51
v_mad_f32 v30, v35, v45, v30
v_mad_f32 v29, v31, v45, v29
v_mac_f32_e32 v28, v27, v45
BB6_110: ; %Flow1228
; in Loop: Header=BB6_11 Depth=1
s_or_b64 exec, exec, s[38:39]
BB6_111: ; in Loop: Header=BB6_11 Depth=1
s_or_b64 exec, exec, s[34:35]
v_lshrrev_b32_e32 v27, 20, v65
v_and_b32_e32 v27, 1, v27
v_cmp_eq_u32_e32 vcc, 1, v27
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[34:35], exec, s[4:5]
; mask branch BB6_115
s_cbranch_execz BB6_115
BB6_112: ; in Loop: Header=BB6_11 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[79:82], v55 offset0:64 offset1:65
s_and_b64 s[4:5], exec, s[0:1]
v_cmp_ne_u32_e32 vcc, v61, v23
s_xor_b64 s[4:5], s[4:5], -1
s_or_b64 s[4:5], s[4:5], vcc
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v31, v73, v80
s_and_b64 s[8:9], exec, s[2:3]
s_or_b64 s[4:5], s[8:9], s[4:5]
v_subrev_f32_e32 v27, v72, v79
v_mul_f32_e32 v45, v31, v31
v_cndmask_b32_e64 v51, 0, 1.0, s[4:5]
v_subrev_f32_e32 v35, v74, v81
v_mac_f32_e32 v45, v27, v27
v_mac_f32_e32 v45, v35, v35
v_mul_f32_e32 v51, s22, v51
v_cmp_lt_f32_e32 vcc, v45, v51
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[38:39], exec, s[4:5]
; mask branch BB6_114
s_cbranch_execz BB6_114
BB6_113: ; in Loop: Header=BB6_11 Depth=1
v_max_f32_e32 v45, 0x34cd15ae, v45
v_mul_f32_e32 v80, v9, v45
v_mul_f32_e32 v76, v80, v80
v_mov_b32_e32 v77, 0x3a92b707
v_mov_b32_e32 v81, 0x3c739487
v_madak_f32_e32 v77, v77, v76, 0x3ded3cb2
v_mul_f32_e32 v51, v75, v82
v_mad_f32 v82, v77, v76, 1.0
v_mov_b32_e32 v77, 0xb2951928
v_madak_f32_e32 v81, v81, v76, 0x3f01e2bc
v_madak_f32_e32 v77, v77, v76, 0xb85ffb93
v_rsq_f32_e32 v79, v45
v_mac_f32_e32 v82, v80, v81
v_mov_b32_e32 v81, 0x35c55945
v_madak_f32_e32 v81, v81, v76, 0x3a83ca0c
v_madak_f32_e32 v77, v77, v76, 0xbc9ded90
s_mov_b32 m0, -1
v_lshrrev_b32_e32 v70, 20, v19
v_madak_f32_e32 v81, v81, v76, 0x3d8eaf3b
v_madak_f32_e32 v83, v77, v76, 0xbf409397
ds_read_b64 v[76:77], v56 offset:256
v_and_b32_e32 v70, 1, v70
v_cmp_eq_u32_e32 vcc, 1, v70
v_mac_f32_e32 v83, v80, v81
v_mul_f32_e32 v80, v79, v79
v_cndmask_b32_e64 v70, 0, 1.0, vcc
v_mul_f32_e32 v81, v80, v80
v_mul_f32_e32 v81, v70, v81
v_mul_f32_e32 v84, v80, v81
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v77, v68, v77
v_mul_f32_e32 v85, v84, v77
v_mad_f32 v81, v81, v80, s23
v_mad_f32 v85, v76, v67, -v85
v_mad_f32 v86, v84, v84, s26
v_mul_f32_e32 v76, v67, v76
v_mul_f32_e32 v81, 0xbe2aaaab, v81
v_mul_f32_e32 v77, v86, v77
v_mul_f32_e32 v76, v76, v81
v_mac_f32_e32 v76, 0x3daaaaaa, v77
v_rcp_f32_e32 v77, v82
v_mul_f32_e32 v45, s18, v45
v_mac_f32_e32 v8, v70, v76
v_mul_f32_e32 v45, v79, v45
v_mul_f32_e32 v77, v37, v77
v_mul_f32_e32 v76, v70, v80
v_mul_f32_e32 v77, v83, v77
v_mac_f32_e32 v77, v79, v76
v_and_b32_e32 v76, s27, v45
v_mov_b32_e32 v81, 0x3fa00000
v_cmp_gt_f32_e64 s[4:5], v81, v76
v_mul_f32_e32 v81, v76, v76
v_rcp_f32_e32 v82, v81
v_add_f32_e32 v83, -1.0, v76
v_mov_b32_e32 v86, 0xbd777f97
v_mov_b32_e32 v87, 0x6f800000
v_cndmask_b32_e64 v82, v82, v83, s[4:5]
v_mov_b32_e32 v83, 0x3f580000
v_cmp_gt_f32_e64 s[8:9], v83, v76
v_cndmask_b32_e64 v81, v82, v81, s[8:9]
v_mov_b32_e32 v83, 0xc11d077e
v_mov_b32_e32 v82, 0x4036db6e
v_madak_f32_e32 v83, v83, v81, 0xc2a2932b
v_cmp_gt_f32_e32 vcc, v82, v76
v_mov_b32_e32 v82, 0xc3f1c275
v_madak_f32_e32 v82, v82, v81, 0xc480230b
v_madak_f32_e32 v83, v81, v83, 0xc3389ae7
v_madak_f32_e32 v82, v81, v82, 0xc41f6441
v_madak_f32_e32 v83, v81, v83, 0xc322658c
v_madak_f32_e32 v82, v81, v82, 0xc320a2ea
v_madak_f32_e32 v83, v81, v83, 0xc2798057
v_madak_f32_e32 v82, v81, v82, 0xc18e104b
v_madak_f32_e32 v83, v81, v83, 0xc128f022
v_madak_f32_e32 v82, v81, v82, 0xbf4c9dd4
v_madak_f32_e32 v83, v81, v83, 0xbf31a0b7
v_madak_f32_e32 v82, v81, v82, 0xbc21a092
v_madak_f32_e32 v83, v81, v83, 0xbc21a093
v_madak_f32_e32 v86, v86, v81, 0x40d23f7c
v_cndmask_b32_e32 v82, v82, v83, vcc
v_mov_b32_e32 v83, 0xc1b38712
v_madak_f32_e32 v83, v83, v81, 0x43ed43a7
v_madak_f32_e32 v86, v81, v86, 0x42d9451f
v_madak_f32_e32 v83, v81, v83, 0x451f90ce
v_madak_f32_e32 v86, v81, v86, 0x43d6810b
v_madak_f32_e32 v83, v81, v83, 0x4547fdbb
v_madak_f32_e32 v86, v81, v86, 0x442158c9
v_madak_f32_e32 v83, v81, v83, 0x44c01759
v_madak_f32_e32 v86, v81, v86, 0x43d9486f
v_madak_f32_e32 v83, v81, v83, 0x43a2e571
v_madak_f32_e32 v86, v81, v86, 0x4309a863
v_madak_f32_e32 v83, v81, v83, 0x41f2b459
v_madak_f32_e32 v86, v81, v86, 0x419d35ce
v_cndmask_b32_e32 v83, v83, v86, vcc
v_mov_b32_e32 v86, 0xbb0df9c0
v_madak_f32_e32 v86, v86, v81, 0x3d1151b3
v_madak_f32_e32 v86, v81, v86, 0xbde31cc2
v_madak_f32_e32 v86, v81, v86, 0x3ea2fe54
v_madak_f32_e32 v86, v81, v86, 0xbebe9208
v_madak_f32_e32 v86, v81, v86, 0x3ed46805
v_madak_f32_e32 v86, v81, v86, 0xbb1acdc6
v_cndmask_b32_e64 v82, v82, v86, s[4:5]
v_mov_b32_e32 v86, 0x3c445aa3
v_madak_f32_e32 v86, v86, v81, 0x3c5f6e13
v_madak_f32_e32 v86, v81, v86, 0x3e013307
v_madak_f32_e32 v86, v81, v86, 0x3d931ae7
v_madak_f32_e32 v86, v81, v86, 0x3f0a5785
v_madak_f32_e32 v86, v81, v86, 0x3dd9f331
v_cndmask_b32_e64 v83, v83, v86, s[4:5]
v_mov_b32_e32 v86, 0xb684e21a
v_madak_f32_e32 v86, v86, v81, 0x390aee49
v_madak_f32_e32 v86, v81, v86, 0x3ba68116
v_madak_f32_e32 v86, v81, v86, 0x3d852a63
v_madak_f32_e32 v86, v81, v86, 0x3ecbbbce
v_cndmask_b32_e64 v83, v83, v86, s[8:9]
v_mad_f32 v83, v81, v83, 1.0
v_and_b32_e32 v90, s50, v45
v_mov_b32_e32 v91, 0xbf100000
v_cmp_gt_f32_e64 vcc, |v83|, v87
v_mov_b32_e32 v88, 0x2f800000
v_mad_f32 v91, v90, -v90, v91
v_cndmask_b32_e32 v89, 1.0, v88, vcc
v_cmp_gt_f32_e32 vcc, 0, v91
v_cndmask_b32_e64 v92, 0.5, -0.5, vcc
v_mov_b32_e32 v93, 0x3fb8aa3b
v_mac_f32_e32 v92, v93, v91
v_mov_b32_e32 v86, 0xb7c756b1
v_cvt_i32_f32_e32 v92, v92
v_madak_f32_e32 v86, v86, v81, 0xbbbd1489
v_madak_f32_e32 v86, v81, v86, 0xbce9528f
v_madak_f32_e32 v86, v81, v86, 0xbea66beb
v_mul_f32_e32 v83, v89, v83
v_madak_f32_e32 v81, v81, v86, 0x3e0375d4
v_rcp_f32_e32 v83, v83
v_cvt_f32_i32_e32 v86, v92
v_cndmask_b32_e64 v81, v82, v81, s[8:9]
v_mov_b32_e32 v82, 0xbf317180
v_mul_f32_e32 v81, v83, v81
v_mad_f32 v83, v82, v86, v91
v_mov_b32_e32 v94, 0xb717f7d1
v_mad_f32 v95, v94, v86, v83
v_mul_f32_e32 v96, v95, v95
v_mov_b32_e32 v97, 0xb5ddea0e
v_mov_b32_e32 v98, 0x3331bb4c
v_mad_f32 v99, v98, v96, v97
v_mov_b32_e32 v100, 0x388ab355
v_mad_f32 v99, v99, v96, v100
v_mov_b32_e32 v101, 0xbb360b61
v_mad_f32 v99, v99, v96, v101
v_mov_b32_e32 v102, 0x3e2aaaab
v_mad_f32 v99, v99, v96, v102
v_mad_f32 v96, -v96, v99, v95
v_mul_f32_e32 v95, v96, v95
v_sub_f32_e32 v96, 2.0, v96
v_cmp_gt_f32_e64 vcc, |v96|, v87
v_cndmask_b32_e32 v99, 1.0, v88, vcc
v_mul_f32_e32 v96, v99, v96
v_rcp_f32_e32 v96, v96
v_mul_f32_e32 v95, v96, v95
v_mul_f32_e32 v95, v95, v99
v_mad_f32 v86, -v86, v94, -v95
v_subrev_f32_e32 v83, v83, v86
v_lshlrev_b32_e32 v86, 23, v92
v_sub_f32_e32 v83, 1.0, v83
v_add_i32_e32 v83, vcc, v83, v86
v_mov_b32_e32 v86, 0xc2aeac4f
v_cmp_ge_f32_e32 vcc, v91, v86
v_mov_b32_e32 v92, 0x42b17218
v_cndmask_b32_e32 v83, 0, v83, vcc
v_cmp_lt_f32_e32 vcc, v91, v92
v_mov_b32_e32 v95, 0x7f800000
v_cndmask_b32_e32 v83, v95, v83, vcc
v_cmp_u_f32_e32 vcc, v91, v91
v_cndmask_b32_e32 v83, v83, v91, vcc
v_subrev_f32_e32 v91, v76, v90
v_mul_f32_e32 v96, v81, v89
v_add_f32_e32 v90, v76, v90
v_mad_f32 v90, v90, v91, v96
v_cmp_gt_f32_e32 vcc, 0, v90
v_cndmask_b32_e64 v91, 0.5, -0.5, vcc
v_mac_f32_e32 v91, v93, v90
v_cvt_i32_f32_e32 v91, v91
v_madak_f32_e32 v81, v89, v81, 0x3f58560b
v_cvt_f32_i32_e32 v93, v91
v_lshlrev_b32_e32 v91, 23, v91
v_mad_f32 v82, v82, v93, v90
v_mad_f32 v99, v94, v93, v82
v_mul_f32_e32 v103, v99, v99
v_mac_f32_e32 v97, v98, v103
v_mac_f32_e32 v100, v97, v103
v_mac_f32_e32 v101, v100, v103
v_mac_f32_e32 v102, v101, v103
v_mad_f32 v97, -v103, v102, v99
v_mul_f32_e32 v98, v97, v99
v_sub_f32_e32 v97, 2.0, v97
v_cmp_gt_f32_e64 vcc, |v97|, v87
v_cndmask_b32_e32 v99, 1.0, v88, vcc
v_mul_f32_e32 v97, v99, v97
v_rcp_f32_e32 v97, v97
v_mul_f32_e32 v97, v97, v98
v_mul_f32_e32 v97, v97, v99
v_mad_f32 v93, -v93, v94, -v97
v_subrev_f32_e32 v82, v82, v93
v_sub_f32_e32 v82, 1.0, v82
v_add_i32_e32 v82, vcc, v82, v91
v_cmp_ge_f32_e32 vcc, v90, v86
v_cndmask_b32_e32 v82, 0, v82, vcc
v_cmp_lt_f32_e32 vcc, v90, v92
v_cndmask_b32_e32 v82, v95, v82, vcc
v_cmp_u_f32_e32 vcc, v90, v90
v_cndmask_b32_e32 v82, v82, v90, vcc
v_mul_f32_e32 v82, v82, v83
v_mov_b32_e32 v83, 0x40c00000
v_cmp_gt_f32_e64 s[10:11], v83, v76
v_mov_b32_e32 v83, 0x31800000
v_cmp_gt_f32_e64 vcc, |v76|, v87
v_cmp_gt_f32_e64 s[12:13], v83, v76
v_cndmask_b32_e32 v83, 1.0, v88, vcc
v_mul_f32_e32 v76, v83, v76
v_rcp_f32_e32 v76, v76
v_cmp_u_f32_e32 vcc, v45, v45
v_mul_f32_e32 v76, v76, v82
v_mad_f32 v76, -v83, v76, 1.0
v_cndmask_b32_e64 v76, 1.0, v76, s[10:11]
v_cndmask_b32_e64 v76, v76, v81, s[4:5]
v_and_b32_e32 v81, s51, v45
v_or_b32_e32 v76, v81, v76
v_mad_f32 v81, v96, v45, v45
v_cndmask_b32_e64 v76, v76, v81, s[8:9]
v_mul_f32_e32 v81, 0x3f8375d4, v45
v_mac_f32_e32 v81, 0x41000000, v45
v_mul_f32_e32 v81, 0x3e000000, v81
v_cndmask_b32_e64 v76, v76, v81, s[12:13]
v_cndmask_b32_e32 v45, v76, v45, vcc
v_subrev_f32_e32 v45, v45, v70
v_mul_f32_e32 v70, s19, v70
v_mad_f32 v45, v79, v45, -v70
v_mul_f32_e32 v70, v80, v84
v_mac_f32_e32 v5, v45, v51
v_mul_f32_e32 v45, v85, v70
v_mad_f32 v45, v51, v77, -v45
v_mul_f32_e32 v51, v77, v51
v_mad_f32 v51, v70, v85, -v51
v_mad_f32 v78, v35, v51, v78
v_mad_f32 v71, v31, v51, v71
v_mac_f32_e32 v69, v27, v51
v_mad_f32 v26, v35, v45, v26
v_mad_f32 v25, v31, v45, v25
v_mac_f32_e32 v24, v27, v45
BB6_114: ; %Flow1227
; in Loop: Header=BB6_11 Depth=1
s_or_b64 exec, exec, s[38:39]
BB6_115: ; in Loop: Header=BB6_11 Depth=1
s_or_b64 exec, exec, s[34:35]
v_lshrrev_b32_e32 v27, 21, v65
v_and_b32_e32 v27, 1, v27
v_cmp_eq_u32_e32 vcc, 1, v27
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[34:35], exec, s[4:5]
; mask branch BB6_119
s_cbranch_execz BB6_119
BB6_116: ; in Loop: Header=BB6_11 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[79:82], v55 offset0:80 offset1:81
s_and_b64 s[4:5], exec, s[0:1]
v_cmp_ne_u32_e32 vcc, v60, v23
s_xor_b64 s[4:5], s[4:5], -1
s_or_b64 s[4:5], s[4:5], vcc
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v31, v73, v80
s_and_b64 s[8:9], exec, s[2:3]
s_or_b64 s[4:5], s[8:9], s[4:5]
v_subrev_f32_e32 v27, v72, v79
v_mul_f32_e32 v45, v31, v31
v_cndmask_b32_e64 v51, 0, 1.0, s[4:5]
v_subrev_f32_e32 v35, v74, v81
v_mac_f32_e32 v45, v27, v27
v_mac_f32_e32 v45, v35, v35
v_mul_f32_e32 v51, s22, v51
v_cmp_lt_f32_e32 vcc, v45, v51
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[38:39], exec, s[4:5]
; mask branch BB6_118
s_cbranch_execz BB6_118
BB6_117: ; in Loop: Header=BB6_11 Depth=1
v_max_f32_e32 v45, 0x34cd15ae, v45
v_mul_f32_e32 v80, v9, v45
v_mul_f32_e32 v76, v80, v80
v_mov_b32_e32 v77, 0x3a92b707
v_mov_b32_e32 v81, 0x3c739487
v_madak_f32_e32 v77, v77, v76, 0x3ded3cb2
v_mul_f32_e32 v51, v75, v82
v_mad_f32 v82, v77, v76, 1.0
v_mov_b32_e32 v77, 0xb2951928
v_madak_f32_e32 v81, v81, v76, 0x3f01e2bc
v_madak_f32_e32 v77, v77, v76, 0xb85ffb93
v_rsq_f32_e32 v79, v45
v_mac_f32_e32 v82, v80, v81
v_mov_b32_e32 v81, 0x35c55945
v_madak_f32_e32 v81, v81, v76, 0x3a83ca0c
v_madak_f32_e32 v77, v77, v76, 0xbc9ded90
s_mov_b32 m0, -1
v_lshrrev_b32_e32 v70, 21, v19
v_madak_f32_e32 v81, v81, v76, 0x3d8eaf3b
v_madak_f32_e32 v83, v77, v76, 0xbf409397
ds_read_b64 v[76:77], v56 offset:320
v_and_b32_e32 v70, 1, v70
v_cmp_eq_u32_e32 vcc, 1, v70
v_mac_f32_e32 v83, v80, v81
v_mul_f32_e32 v80, v79, v79
v_cndmask_b32_e64 v70, 0, 1.0, vcc
v_mul_f32_e32 v81, v80, v80
v_mul_f32_e32 v81, v70, v81
v_mul_f32_e32 v84, v80, v81
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v77, v68, v77
v_mul_f32_e32 v85, v84, v77
v_mad_f32 v81, v81, v80, s23
v_mad_f32 v85, v76, v67, -v85
v_mad_f32 v86, v84, v84, s26
v_mul_f32_e32 v76, v67, v76
v_mul_f32_e32 v81, 0xbe2aaaab, v81
v_mul_f32_e32 v77, v86, v77
v_mul_f32_e32 v76, v76, v81
v_mac_f32_e32 v76, 0x3daaaaaa, v77
v_rcp_f32_e32 v77, v82
v_mul_f32_e32 v45, s18, v45
v_mac_f32_e32 v8, v70, v76
v_mul_f32_e32 v45, v79, v45
v_mul_f32_e32 v77, v37, v77
v_mul_f32_e32 v76, v70, v80
v_mul_f32_e32 v77, v83, v77
v_mac_f32_e32 v77, v79, v76
v_and_b32_e32 v76, s27, v45
v_mov_b32_e32 v81, 0x3fa00000
v_cmp_gt_f32_e64 s[4:5], v81, v76
v_mul_f32_e32 v81, v76, v76
v_rcp_f32_e32 v82, v81
v_add_f32_e32 v83, -1.0, v76
v_mov_b32_e32 v86, 0xbd777f97
v_mov_b32_e32 v87, 0x6f800000
v_cndmask_b32_e64 v82, v82, v83, s[4:5]
v_mov_b32_e32 v83, 0x3f580000
v_cmp_gt_f32_e64 s[8:9], v83, v76
v_cndmask_b32_e64 v81, v82, v81, s[8:9]
v_mov_b32_e32 v83, 0xc11d077e
v_mov_b32_e32 v82, 0x4036db6e
v_madak_f32_e32 v83, v83, v81, 0xc2a2932b
v_cmp_gt_f32_e32 vcc, v82, v76
v_mov_b32_e32 v82, 0xc3f1c275
v_madak_f32_e32 v82, v82, v81, 0xc480230b
v_madak_f32_e32 v83, v81, v83, 0xc3389ae7
v_madak_f32_e32 v82, v81, v82, 0xc41f6441
v_madak_f32_e32 v83, v81, v83, 0xc322658c
v_madak_f32_e32 v82, v81, v82, 0xc320a2ea
v_madak_f32_e32 v83, v81, v83, 0xc2798057
v_madak_f32_e32 v82, v81, v82, 0xc18e104b
v_madak_f32_e32 v83, v81, v83, 0xc128f022
v_madak_f32_e32 v82, v81, v82, 0xbf4c9dd4
v_madak_f32_e32 v83, v81, v83, 0xbf31a0b7
v_madak_f32_e32 v82, v81, v82, 0xbc21a092
v_madak_f32_e32 v83, v81, v83, 0xbc21a093
v_madak_f32_e32 v86, v86, v81, 0x40d23f7c
v_cndmask_b32_e32 v82, v82, v83, vcc
v_mov_b32_e32 v83, 0xc1b38712
v_madak_f32_e32 v83, v83, v81, 0x43ed43a7
v_madak_f32_e32 v86, v81, v86, 0x42d9451f
v_madak_f32_e32 v83, v81, v83, 0x451f90ce
v_madak_f32_e32 v86, v81, v86, 0x43d6810b
v_madak_f32_e32 v83, v81, v83, 0x4547fdbb
v_madak_f32_e32 v86, v81, v86, 0x442158c9
v_madak_f32_e32 v83, v81, v83, 0x44c01759
v_madak_f32_e32 v86, v81, v86, 0x43d9486f
v_madak_f32_e32 v83, v81, v83, 0x43a2e571
v_madak_f32_e32 v86, v81, v86, 0x4309a863
v_madak_f32_e32 v83, v81, v83, 0x41f2b459
v_madak_f32_e32 v86, v81, v86, 0x419d35ce
v_cndmask_b32_e32 v83, v83, v86, vcc
v_mov_b32_e32 v86, 0xbb0df9c0
v_madak_f32_e32 v86, v86, v81, 0x3d1151b3
v_madak_f32_e32 v86, v81, v86, 0xbde31cc2
v_madak_f32_e32 v86, v81, v86, 0x3ea2fe54
v_madak_f32_e32 v86, v81, v86, 0xbebe9208
v_madak_f32_e32 v86, v81, v86, 0x3ed46805
v_madak_f32_e32 v86, v81, v86, 0xbb1acdc6
v_cndmask_b32_e64 v82, v82, v86, s[4:5]
v_mov_b32_e32 v86, 0x3c445aa3
v_madak_f32_e32 v86, v86, v81, 0x3c5f6e13
v_madak_f32_e32 v86, v81, v86, 0x3e013307
v_madak_f32_e32 v86, v81, v86, 0x3d931ae7
v_madak_f32_e32 v86, v81, v86, 0x3f0a5785
v_madak_f32_e32 v86, v81, v86, 0x3dd9f331
v_cndmask_b32_e64 v83, v83, v86, s[4:5]
v_mov_b32_e32 v86, 0xb684e21a
v_madak_f32_e32 v86, v86, v81, 0x390aee49
v_madak_f32_e32 v86, v81, v86, 0x3ba68116
v_madak_f32_e32 v86, v81, v86, 0x3d852a63
v_madak_f32_e32 v86, v81, v86, 0x3ecbbbce
v_cndmask_b32_e64 v83, v83, v86, s[8:9]
v_mad_f32 v83, v81, v83, 1.0
v_and_b32_e32 v90, s50, v45
v_mov_b32_e32 v91, 0xbf100000
v_cmp_gt_f32_e64 vcc, |v83|, v87
v_mov_b32_e32 v88, 0x2f800000
v_mad_f32 v91, v90, -v90, v91
v_cndmask_b32_e32 v89, 1.0, v88, vcc
v_cmp_gt_f32_e32 vcc, 0, v91
v_cndmask_b32_e64 v92, 0.5, -0.5, vcc
v_mov_b32_e32 v93, 0x3fb8aa3b
v_mac_f32_e32 v92, v93, v91
v_mov_b32_e32 v86, 0xb7c756b1
v_cvt_i32_f32_e32 v92, v92
v_madak_f32_e32 v86, v86, v81, 0xbbbd1489
v_madak_f32_e32 v86, v81, v86, 0xbce9528f
v_madak_f32_e32 v86, v81, v86, 0xbea66beb
v_mul_f32_e32 v83, v89, v83
v_madak_f32_e32 v81, v81, v86, 0x3e0375d4
v_rcp_f32_e32 v83, v83
v_cvt_f32_i32_e32 v86, v92
v_cndmask_b32_e64 v81, v82, v81, s[8:9]
v_mov_b32_e32 v82, 0xbf317180
v_mul_f32_e32 v81, v83, v81
v_mad_f32 v83, v82, v86, v91
v_mov_b32_e32 v94, 0xb717f7d1
v_mad_f32 v95, v94, v86, v83
v_mul_f32_e32 v96, v95, v95
v_mov_b32_e32 v97, 0xb5ddea0e
v_mov_b32_e32 v98, 0x3331bb4c
v_mad_f32 v99, v98, v96, v97
v_mov_b32_e32 v100, 0x388ab355
v_mad_f32 v99, v99, v96, v100
v_mov_b32_e32 v101, 0xbb360b61
v_mad_f32 v99, v99, v96, v101
v_mov_b32_e32 v102, 0x3e2aaaab
v_mad_f32 v99, v99, v96, v102
v_mad_f32 v96, -v96, v99, v95
v_mul_f32_e32 v95, v96, v95
v_sub_f32_e32 v96, 2.0, v96
v_cmp_gt_f32_e64 vcc, |v96|, v87
v_cndmask_b32_e32 v99, 1.0, v88, vcc
v_mul_f32_e32 v96, v99, v96
v_rcp_f32_e32 v96, v96
v_mul_f32_e32 v95, v96, v95
v_mul_f32_e32 v95, v95, v99
v_mad_f32 v86, -v86, v94, -v95
v_subrev_f32_e32 v83, v83, v86
v_lshlrev_b32_e32 v86, 23, v92
v_sub_f32_e32 v83, 1.0, v83
v_add_i32_e32 v83, vcc, v83, v86
v_mov_b32_e32 v86, 0xc2aeac4f
v_cmp_ge_f32_e32 vcc, v91, v86
v_mov_b32_e32 v92, 0x42b17218
v_cndmask_b32_e32 v83, 0, v83, vcc
v_cmp_lt_f32_e32 vcc, v91, v92
v_mov_b32_e32 v95, 0x7f800000
v_cndmask_b32_e32 v83, v95, v83, vcc
v_cmp_u_f32_e32 vcc, v91, v91
v_cndmask_b32_e32 v83, v83, v91, vcc
v_subrev_f32_e32 v91, v76, v90
v_mul_f32_e32 v96, v81, v89
v_add_f32_e32 v90, v76, v90
v_mad_f32 v90, v90, v91, v96
v_cmp_gt_f32_e32 vcc, 0, v90
v_cndmask_b32_e64 v91, 0.5, -0.5, vcc
v_mac_f32_e32 v91, v93, v90
v_cvt_i32_f32_e32 v91, v91
v_madak_f32_e32 v81, v89, v81, 0x3f58560b
v_cvt_f32_i32_e32 v93, v91
v_lshlrev_b32_e32 v91, 23, v91
v_mad_f32 v82, v82, v93, v90
v_mad_f32 v99, v94, v93, v82
v_mul_f32_e32 v103, v99, v99
v_mac_f32_e32 v97, v98, v103
v_mac_f32_e32 v100, v97, v103
v_mac_f32_e32 v101, v100, v103
v_mac_f32_e32 v102, v101, v103
v_mad_f32 v97, -v103, v102, v99
v_mul_f32_e32 v98, v97, v99
v_sub_f32_e32 v97, 2.0, v97
v_cmp_gt_f32_e64 vcc, |v97|, v87
v_cndmask_b32_e32 v99, 1.0, v88, vcc
v_mul_f32_e32 v97, v99, v97
v_rcp_f32_e32 v97, v97
v_mul_f32_e32 v97, v97, v98
v_mul_f32_e32 v97, v97, v99
v_mad_f32 v93, -v93, v94, -v97
v_subrev_f32_e32 v82, v82, v93
v_sub_f32_e32 v82, 1.0, v82
v_add_i32_e32 v82, vcc, v82, v91
v_cmp_ge_f32_e32 vcc, v90, v86
v_cndmask_b32_e32 v82, 0, v82, vcc
v_cmp_lt_f32_e32 vcc, v90, v92
v_cndmask_b32_e32 v82, v95, v82, vcc
v_cmp_u_f32_e32 vcc, v90, v90
v_cndmask_b32_e32 v82, v82, v90, vcc
v_mul_f32_e32 v82, v82, v83
v_mov_b32_e32 v83, 0x40c00000
v_cmp_gt_f32_e64 s[10:11], v83, v76
v_mov_b32_e32 v83, 0x31800000
v_cmp_gt_f32_e64 vcc, |v76|, v87
v_cmp_gt_f32_e64 s[12:13], v83, v76
v_cndmask_b32_e32 v83, 1.0, v88, vcc
v_mul_f32_e32 v76, v83, v76
v_rcp_f32_e32 v76, v76
v_cmp_u_f32_e32 vcc, v45, v45
v_mul_f32_e32 v76, v76, v82
v_mad_f32 v76, -v83, v76, 1.0
v_cndmask_b32_e64 v76, 1.0, v76, s[10:11]
v_cndmask_b32_e64 v76, v76, v81, s[4:5]
v_and_b32_e32 v81, s51, v45
v_or_b32_e32 v76, v81, v76
v_mad_f32 v81, v96, v45, v45
v_cndmask_b32_e64 v76, v76, v81, s[8:9]
v_mul_f32_e32 v81, 0x3f8375d4, v45
v_mac_f32_e32 v81, 0x41000000, v45
v_mul_f32_e32 v81, 0x3e000000, v81
v_cndmask_b32_e64 v76, v76, v81, s[12:13]
v_cndmask_b32_e32 v45, v76, v45, vcc
v_subrev_f32_e32 v45, v45, v70
v_mul_f32_e32 v70, s19, v70
v_mad_f32 v45, v79, v45, -v70
v_mul_f32_e32 v70, v80, v84
v_mac_f32_e32 v5, v45, v51
v_mul_f32_e32 v45, v85, v70
v_mad_f32 v45, v51, v77, -v45
v_mul_f32_e32 v51, v77, v51
v_mad_f32 v51, v70, v85, -v51
v_mad_f32 v78, v35, v51, v78
v_mad_f32 v71, v31, v51, v71
v_mac_f32_e32 v69, v27, v51
v_mad_f32 v22, v35, v45, v22
v_mad_f32 v21, v31, v45, v21
v_mac_f32_e32 v20, v27, v45
BB6_118: ; %Flow1226
; in Loop: Header=BB6_11 Depth=1
s_or_b64 exec, exec, s[38:39]
BB6_119: ; in Loop: Header=BB6_11 Depth=1
s_or_b64 exec, exec, s[34:35]
v_lshrrev_b32_e32 v27, 22, v65
v_and_b32_e32 v27, 1, v27
v_cmp_eq_u32_e32 vcc, 1, v27
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[34:35], exec, s[4:5]
; mask branch BB6_123
s_cbranch_execz BB6_123
BB6_120: ; in Loop: Header=BB6_11 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[79:82], v55 offset0:96 offset1:97
s_and_b64 s[4:5], exec, s[0:1]
v_cmp_ne_u32_e32 vcc, v59, v23
s_xor_b64 s[4:5], s[4:5], -1
s_or_b64 s[4:5], s[4:5], vcc
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v31, v73, v80
s_and_b64 s[8:9], exec, s[2:3]
s_or_b64 s[4:5], s[8:9], s[4:5]
v_subrev_f32_e32 v27, v72, v79
v_mul_f32_e32 v45, v31, v31
v_cndmask_b32_e64 v51, 0, 1.0, s[4:5]
v_subrev_f32_e32 v35, v74, v81
v_mac_f32_e32 v45, v27, v27
v_mac_f32_e32 v45, v35, v35
v_mul_f32_e32 v51, s22, v51
v_cmp_lt_f32_e32 vcc, v45, v51
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[38:39], exec, s[4:5]
; mask branch BB6_122
s_cbranch_execz BB6_122
BB6_121: ; in Loop: Header=BB6_11 Depth=1
v_max_f32_e32 v45, 0x34cd15ae, v45
v_mul_f32_e32 v80, v9, v45
v_mul_f32_e32 v76, v80, v80
v_mov_b32_e32 v77, 0x3a92b707
v_mov_b32_e32 v81, 0x3c739487
v_madak_f32_e32 v77, v77, v76, 0x3ded3cb2
v_mul_f32_e32 v51, v75, v82
v_mad_f32 v82, v77, v76, 1.0
v_mov_b32_e32 v77, 0xb2951928
v_madak_f32_e32 v81, v81, v76, 0x3f01e2bc
v_madak_f32_e32 v77, v77, v76, 0xb85ffb93
v_rsq_f32_e32 v79, v45
v_mac_f32_e32 v82, v80, v81
v_mov_b32_e32 v81, 0x35c55945
v_madak_f32_e32 v81, v81, v76, 0x3a83ca0c
v_madak_f32_e32 v77, v77, v76, 0xbc9ded90
s_mov_b32 m0, -1
v_lshrrev_b32_e32 v70, 22, v19
v_madak_f32_e32 v81, v81, v76, 0x3d8eaf3b
v_madak_f32_e32 v83, v77, v76, 0xbf409397
ds_read_b64 v[76:77], v56 offset:384
v_and_b32_e32 v70, 1, v70
v_cmp_eq_u32_e32 vcc, 1, v70
v_mac_f32_e32 v83, v80, v81
v_mul_f32_e32 v80, v79, v79
v_cndmask_b32_e64 v70, 0, 1.0, vcc
v_mul_f32_e32 v81, v80, v80
v_mul_f32_e32 v81, v70, v81
v_mul_f32_e32 v84, v80, v81
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v77, v68, v77
v_mul_f32_e32 v85, v84, v77
v_mad_f32 v81, v81, v80, s23
v_mad_f32 v85, v76, v67, -v85
v_mad_f32 v86, v84, v84, s26
v_mul_f32_e32 v76, v67, v76
v_mul_f32_e32 v81, 0xbe2aaaab, v81
v_mul_f32_e32 v77, v86, v77
v_mul_f32_e32 v76, v76, v81
v_mac_f32_e32 v76, 0x3daaaaaa, v77
v_rcp_f32_e32 v77, v82
v_mul_f32_e32 v45, s18, v45
v_mac_f32_e32 v8, v70, v76
v_mul_f32_e32 v45, v79, v45
v_mul_f32_e32 v77, v37, v77
v_mul_f32_e32 v76, v70, v80
v_mul_f32_e32 v77, v83, v77
v_mac_f32_e32 v77, v79, v76
v_and_b32_e32 v76, s27, v45
v_mov_b32_e32 v81, 0x3fa00000
v_cmp_gt_f32_e64 s[4:5], v81, v76
v_mul_f32_e32 v81, v76, v76
v_rcp_f32_e32 v82, v81
v_add_f32_e32 v83, -1.0, v76
v_mov_b32_e32 v86, 0xbd777f97
v_mov_b32_e32 v87, 0x6f800000
v_cndmask_b32_e64 v82, v82, v83, s[4:5]
v_mov_b32_e32 v83, 0x3f580000
v_cmp_gt_f32_e64 s[8:9], v83, v76
v_cndmask_b32_e64 v81, v82, v81, s[8:9]
v_mov_b32_e32 v83, 0xc11d077e
v_mov_b32_e32 v82, 0x4036db6e
v_madak_f32_e32 v83, v83, v81, 0xc2a2932b
v_cmp_gt_f32_e32 vcc, v82, v76
v_mov_b32_e32 v82, 0xc3f1c275
v_madak_f32_e32 v82, v82, v81, 0xc480230b
v_madak_f32_e32 v83, v81, v83, 0xc3389ae7
v_madak_f32_e32 v82, v81, v82, 0xc41f6441
v_madak_f32_e32 v83, v81, v83, 0xc322658c
v_madak_f32_e32 v82, v81, v82, 0xc320a2ea
v_madak_f32_e32 v83, v81, v83, 0xc2798057
v_madak_f32_e32 v82, v81, v82, 0xc18e104b
v_madak_f32_e32 v83, v81, v83, 0xc128f022
v_madak_f32_e32 v82, v81, v82, 0xbf4c9dd4
v_madak_f32_e32 v83, v81, v83, 0xbf31a0b7
v_madak_f32_e32 v82, v81, v82, 0xbc21a092
v_madak_f32_e32 v83, v81, v83, 0xbc21a093
v_madak_f32_e32 v86, v86, v81, 0x40d23f7c
v_cndmask_b32_e32 v82, v82, v83, vcc
v_mov_b32_e32 v83, 0xc1b38712
v_madak_f32_e32 v83, v83, v81, 0x43ed43a7
v_madak_f32_e32 v86, v81, v86, 0x42d9451f
v_madak_f32_e32 v83, v81, v83, 0x451f90ce
v_madak_f32_e32 v86, v81, v86, 0x43d6810b
v_madak_f32_e32 v83, v81, v83, 0x4547fdbb
v_madak_f32_e32 v86, v81, v86, 0x442158c9
v_madak_f32_e32 v83, v81, v83, 0x44c01759
v_madak_f32_e32 v86, v81, v86, 0x43d9486f
v_madak_f32_e32 v83, v81, v83, 0x43a2e571
v_madak_f32_e32 v86, v81, v86, 0x4309a863
v_madak_f32_e32 v83, v81, v83, 0x41f2b459
v_madak_f32_e32 v86, v81, v86, 0x419d35ce
v_cndmask_b32_e32 v83, v83, v86, vcc
v_mov_b32_e32 v86, 0xbb0df9c0
v_madak_f32_e32 v86, v86, v81, 0x3d1151b3
v_madak_f32_e32 v86, v81, v86, 0xbde31cc2
v_madak_f32_e32 v86, v81, v86, 0x3ea2fe54
v_madak_f32_e32 v86, v81, v86, 0xbebe9208
v_madak_f32_e32 v86, v81, v86, 0x3ed46805
v_madak_f32_e32 v86, v81, v86, 0xbb1acdc6
v_cndmask_b32_e64 v82, v82, v86, s[4:5]
v_mov_b32_e32 v86, 0x3c445aa3
v_madak_f32_e32 v86, v86, v81, 0x3c5f6e13
v_madak_f32_e32 v86, v81, v86, 0x3e013307
v_madak_f32_e32 v86, v81, v86, 0x3d931ae7
v_madak_f32_e32 v86, v81, v86, 0x3f0a5785
v_madak_f32_e32 v86, v81, v86, 0x3dd9f331
v_cndmask_b32_e64 v83, v83, v86, s[4:5]
v_mov_b32_e32 v86, 0xb684e21a
v_madak_f32_e32 v86, v86, v81, 0x390aee49
v_madak_f32_e32 v86, v81, v86, 0x3ba68116
v_madak_f32_e32 v86, v81, v86, 0x3d852a63
v_madak_f32_e32 v86, v81, v86, 0x3ecbbbce
v_cndmask_b32_e64 v83, v83, v86, s[8:9]
v_mad_f32 v83, v81, v83, 1.0
v_and_b32_e32 v90, s50, v45
v_mov_b32_e32 v91, 0xbf100000
v_cmp_gt_f32_e64 vcc, |v83|, v87
v_mov_b32_e32 v88, 0x2f800000
v_mad_f32 v91, v90, -v90, v91
v_cndmask_b32_e32 v89, 1.0, v88, vcc
v_cmp_gt_f32_e32 vcc, 0, v91
v_cndmask_b32_e64 v92, 0.5, -0.5, vcc
v_mov_b32_e32 v93, 0x3fb8aa3b
v_mac_f32_e32 v92, v93, v91
v_mov_b32_e32 v86, 0xb7c756b1
v_cvt_i32_f32_e32 v92, v92
v_madak_f32_e32 v86, v86, v81, 0xbbbd1489
v_madak_f32_e32 v86, v81, v86, 0xbce9528f
v_madak_f32_e32 v86, v81, v86, 0xbea66beb
v_mul_f32_e32 v83, v89, v83
v_madak_f32_e32 v81, v81, v86, 0x3e0375d4
v_rcp_f32_e32 v83, v83
v_cvt_f32_i32_e32 v86, v92
v_cndmask_b32_e64 v81, v82, v81, s[8:9]
v_mov_b32_e32 v82, 0xbf317180
v_mul_f32_e32 v81, v83, v81
v_mad_f32 v83, v82, v86, v91
v_mov_b32_e32 v94, 0xb717f7d1
v_mad_f32 v95, v94, v86, v83
v_mul_f32_e32 v96, v95, v95
v_mov_b32_e32 v97, 0xb5ddea0e
v_mov_b32_e32 v98, 0x3331bb4c
v_mad_f32 v99, v98, v96, v97
v_mov_b32_e32 v100, 0x388ab355
v_mad_f32 v99, v99, v96, v100
v_mov_b32_e32 v101, 0xbb360b61
v_mad_f32 v99, v99, v96, v101
v_mov_b32_e32 v102, 0x3e2aaaab
v_mad_f32 v99, v99, v96, v102
v_mad_f32 v96, -v96, v99, v95
v_mul_f32_e32 v95, v96, v95
v_sub_f32_e32 v96, 2.0, v96
v_cmp_gt_f32_e64 vcc, |v96|, v87
v_cndmask_b32_e32 v99, 1.0, v88, vcc
v_mul_f32_e32 v96, v99, v96
v_rcp_f32_e32 v96, v96
v_mul_f32_e32 v95, v96, v95
v_mul_f32_e32 v95, v95, v99
v_mad_f32 v86, -v86, v94, -v95
v_subrev_f32_e32 v83, v83, v86
v_lshlrev_b32_e32 v86, 23, v92
v_sub_f32_e32 v83, 1.0, v83
v_add_i32_e32 v83, vcc, v83, v86
v_mov_b32_e32 v86, 0xc2aeac4f
v_cmp_ge_f32_e32 vcc, v91, v86
v_mov_b32_e32 v92, 0x42b17218
v_cndmask_b32_e32 v83, 0, v83, vcc
v_cmp_lt_f32_e32 vcc, v91, v92
v_mov_b32_e32 v95, 0x7f800000
v_cndmask_b32_e32 v83, v95, v83, vcc
v_cmp_u_f32_e32 vcc, v91, v91
v_cndmask_b32_e32 v83, v83, v91, vcc
v_subrev_f32_e32 v91, v76, v90
v_mul_f32_e32 v96, v81, v89
v_add_f32_e32 v90, v76, v90
v_mad_f32 v90, v90, v91, v96
v_cmp_gt_f32_e32 vcc, 0, v90
v_cndmask_b32_e64 v91, 0.5, -0.5, vcc
v_mac_f32_e32 v91, v93, v90
v_cvt_i32_f32_e32 v91, v91
v_madak_f32_e32 v81, v89, v81, 0x3f58560b
v_cvt_f32_i32_e32 v93, v91
v_lshlrev_b32_e32 v91, 23, v91
v_mad_f32 v82, v82, v93, v90
v_mad_f32 v99, v94, v93, v82
v_mul_f32_e32 v103, v99, v99
v_mac_f32_e32 v97, v98, v103
v_mac_f32_e32 v100, v97, v103
v_mac_f32_e32 v101, v100, v103
v_mac_f32_e32 v102, v101, v103
v_mad_f32 v97, -v103, v102, v99
v_mul_f32_e32 v98, v97, v99
v_sub_f32_e32 v97, 2.0, v97
v_cmp_gt_f32_e64 vcc, |v97|, v87
v_cndmask_b32_e32 v99, 1.0, v88, vcc
v_mul_f32_e32 v97, v99, v97
v_rcp_f32_e32 v97, v97
v_mul_f32_e32 v97, v97, v98
v_mul_f32_e32 v97, v97, v99
v_mad_f32 v93, -v93, v94, -v97
v_subrev_f32_e32 v82, v82, v93
v_sub_f32_e32 v82, 1.0, v82
v_add_i32_e32 v82, vcc, v82, v91
v_cmp_ge_f32_e32 vcc, v90, v86
v_cndmask_b32_e32 v82, 0, v82, vcc
v_cmp_lt_f32_e32 vcc, v90, v92
v_cndmask_b32_e32 v82, v95, v82, vcc
v_cmp_u_f32_e32 vcc, v90, v90
v_cndmask_b32_e32 v82, v82, v90, vcc
v_mul_f32_e32 v82, v82, v83
v_mov_b32_e32 v83, 0x40c00000
v_cmp_gt_f32_e64 s[10:11], v83, v76
v_mov_b32_e32 v83, 0x31800000
v_cmp_gt_f32_e64 vcc, |v76|, v87
v_cmp_gt_f32_e64 s[12:13], v83, v76
v_cndmask_b32_e32 v83, 1.0, v88, vcc
v_mul_f32_e32 v76, v83, v76
v_rcp_f32_e32 v76, v76
v_cmp_u_f32_e32 vcc, v45, v45
v_mul_f32_e32 v76, v76, v82
v_mad_f32 v76, -v83, v76, 1.0
v_cndmask_b32_e64 v76, 1.0, v76, s[10:11]
v_cndmask_b32_e64 v76, v76, v81, s[4:5]
v_and_b32_e32 v81, s51, v45
v_or_b32_e32 v76, v81, v76
v_mad_f32 v81, v96, v45, v45
v_cndmask_b32_e64 v76, v76, v81, s[8:9]
v_mul_f32_e32 v81, 0x3f8375d4, v45
v_mac_f32_e32 v81, 0x41000000, v45
v_mul_f32_e32 v81, 0x3e000000, v81
v_cndmask_b32_e64 v76, v76, v81, s[12:13]
v_cndmask_b32_e32 v45, v76, v45, vcc
v_subrev_f32_e32 v45, v45, v70
v_mul_f32_e32 v70, s19, v70
v_mad_f32 v45, v79, v45, -v70
v_mul_f32_e32 v70, v80, v84
v_mac_f32_e32 v5, v45, v51
v_mul_f32_e32 v45, v85, v70
v_mad_f32 v45, v51, v77, -v45
v_mul_f32_e32 v51, v77, v51
v_mad_f32 v51, v70, v85, -v51
v_mad_f32 v78, v35, v51, v78
v_mad_f32 v71, v31, v51, v71
v_mac_f32_e32 v69, v27, v51
v_mad_f32 v18, v35, v45, v18
v_mad_f32 v17, v31, v45, v17
v_mac_f32_e32 v16, v27, v45
BB6_122: ; %Flow1225
; in Loop: Header=BB6_11 Depth=1
s_or_b64 exec, exec, s[38:39]
BB6_123: ; in Loop: Header=BB6_11 Depth=1
s_or_b64 exec, exec, s[34:35]
v_lshrrev_b32_e32 v27, 23, v65
v_and_b32_e32 v27, 1, v27
v_cmp_eq_u32_e32 vcc, 1, v27
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[12:13], exec, s[4:5]
; mask branch BB6_127
s_cbranch_execz BB6_127
BB6_124: ; in Loop: Header=BB6_11 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[79:82], v55 offset0:112 offset1:113
s_and_b64 s[4:5], exec, s[0:1]
v_cmp_ne_u32_e32 vcc, v38, v23
s_xor_b64 s[4:5], s[4:5], -1
s_or_b64 s[4:5], s[4:5], vcc
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v27, v73, v80
s_and_b64 s[8:9], exec, s[2:3]
s_or_b64 s[4:5], s[8:9], s[4:5]
v_subrev_f32_e32 v23, v72, v79
v_mul_f32_e32 v35, v27, v27
v_cndmask_b32_e64 v45, 0, 1.0, s[4:5]
v_subrev_f32_e32 v31, v74, v81
v_mac_f32_e32 v35, v23, v23
v_mac_f32_e32 v35, v31, v31
v_mul_f32_e32 v45, s22, v45
v_cmp_lt_f32_e32 vcc, v35, v45
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[34:35], exec, s[4:5]
; mask branch BB6_126
s_cbranch_execz BB6_126
BB6_125: ; in Loop: Header=BB6_11 Depth=1
v_lshrrev_b32_e32 v51, 23, v19
v_and_b32_e32 v51, 1, v51
v_max_f32_e32 v35, 0x34cd15ae, v35
v_cmp_eq_u32_e32 vcc, 1, v51
v_rsq_f32_e32 v51, v35
s_mov_b32 m0, -1
v_cndmask_b32_e64 v70, 0, 1.0, vcc
v_mul_f32_e32 v45, v75, v82
v_mul_f32_e32 v74, v51, v51
v_mul_f32_e32 v72, v74, v74
v_mul_f32_e32 v75, v70, v72
ds_read_b64 v[72:73], v56 offset:448
v_mul_f32_e32 v77, v9, v35
v_mul_f32_e32 v79, v77, v77
v_mov_b32_e32 v80, 0x3a92b707
v_madak_f32_e32 v80, v80, v79, 0x3ded3cb2
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v68, v68, v73
v_mul_f32_e32 v73, v74, v75
v_mul_f32_e32 v76, v73, v68
v_mad_f32 v76, v72, v67, -v76
v_mul_f32_e32 v67, v67, v72
v_mad_f32 v72, v73, v73, s26
v_mul_f32_e32 v68, v72, v68
v_mad_f32 v72, v75, v74, s23
v_mov_b32_e32 v81, 0x3c739487
v_mul_f32_e32 v72, 0xbe2aaaab, v72
v_mul_f32_e32 v67, v67, v72
v_madak_f32_e32 v81, v81, v79, 0x3f01e2bc
v_mad_f32 v80, v80, v79, 1.0
v_mac_f32_e32 v67, 0x3daaaaaa, v68
v_mac_f32_e32 v80, v77, v81
v_mov_b32_e32 v81, 0xb2951928
v_mac_f32_e32 v8, v70, v67
v_rcp_f32_e32 v67, v80
v_madak_f32_e32 v81, v81, v79, 0xb85ffb93
v_mov_b32_e32 v82, 0x35c55945
v_madak_f32_e32 v82, v82, v79, 0x3a83ca0c
v_madak_f32_e32 v81, v81, v79, 0xbc9ded90
v_madak_f32_e32 v82, v82, v79, 0x3d8eaf3b
v_madak_f32_e32 v79, v81, v79, 0xbf409397
v_mul_f32_e32 v35, s18, v35
v_mac_f32_e32 v79, v77, v82
v_mul_f32_e32 v67, v37, v67
v_mul_f32_e32 v67, v79, v67
v_mul_f32_e32 v68, v70, v74
v_mul_f32_e32 v35, v51, v35
v_mac_f32_e32 v67, v51, v68
v_and_b32_e32 v68, s27, v35
v_mov_b32_e32 v72, 0x3fa00000
v_cmp_gt_f32_e64 s[4:5], v72, v68
v_mul_f32_e32 v72, v68, v68
v_rcp_f32_e32 v75, v72
v_add_f32_e32 v77, -1.0, v68
v_mov_b32_e32 v79, 0xc11d077e
v_mov_b32_e32 v80, 0xbd777f97
v_cndmask_b32_e64 v75, v75, v77, s[4:5]
v_mov_b32_e32 v77, 0x3f580000
v_cmp_gt_f32_e64 s[8:9], v77, v68
v_cndmask_b32_e64 v72, v75, v72, s[8:9]
v_mov_b32_e32 v75, 0xc3f1c275
v_madak_f32_e32 v79, v79, v72, 0xc2a2932b
v_madak_f32_e32 v75, v75, v72, 0xc480230b
v_mov_b32_e32 v77, 0xc1b38712
v_madak_f32_e32 v79, v72, v79, 0xc3389ae7
v_madak_f32_e32 v80, v80, v72, 0x40d23f7c
v_madak_f32_e32 v75, v72, v75, 0xc41f6441
v_madak_f32_e32 v77, v77, v72, 0x43ed43a7
v_madak_f32_e32 v79, v72, v79, 0xc322658c
v_madak_f32_e32 v80, v72, v80, 0x42d9451f
v_madak_f32_e32 v75, v72, v75, 0xc320a2ea
v_madak_f32_e32 v77, v72, v77, 0x451f90ce
v_madak_f32_e32 v79, v72, v79, 0xc2798057
v_madak_f32_e32 v80, v72, v80, 0x43d6810b
v_madak_f32_e32 v75, v72, v75, 0xc18e104b
v_madak_f32_e32 v77, v72, v77, 0x4547fdbb
v_madak_f32_e32 v79, v72, v79, 0xc128f022
v_madak_f32_e32 v80, v72, v80, 0x442158c9
v_madak_f32_e32 v75, v72, v75, 0xbf4c9dd4
v_madak_f32_e32 v77, v72, v77, 0x44c01759
v_madak_f32_e32 v79, v72, v79, 0xbf31a0b7
v_madak_f32_e32 v80, v72, v80, 0x43d9486f
v_mov_b32_e32 v81, 0x4036db6e
v_madak_f32_e32 v77, v72, v77, 0x43a2e571
v_madak_f32_e32 v80, v72, v80, 0x4309a863
v_madak_f32_e32 v75, v72, v75, 0xbc21a092
v_madak_f32_e32 v79, v72, v79, 0xbc21a093
v_cmp_gt_f32_e32 vcc, v81, v68
v_cndmask_b32_e32 v75, v75, v79, vcc
v_mov_b32_e32 v79, 0xbb0df9c0
v_madak_f32_e32 v77, v72, v77, 0x41f2b459
v_madak_f32_e32 v80, v72, v80, 0x419d35ce
v_madak_f32_e32 v79, v79, v72, 0x3d1151b3
v_cndmask_b32_e32 v77, v77, v80, vcc
v_mov_b32_e32 v80, 0x3c445aa3
v_madak_f32_e32 v79, v72, v79, 0xbde31cc2
v_madak_f32_e32 v80, v80, v72, 0x3c5f6e13
v_madak_f32_e32 v79, v72, v79, 0x3ea2fe54
v_madak_f32_e32 v80, v72, v80, 0x3e013307
v_madak_f32_e32 v79, v72, v79, 0xbebe9208
v_madak_f32_e32 v80, v72, v80, 0x3d931ae7
v_madak_f32_e32 v79, v72, v79, 0x3ed46805
v_madak_f32_e32 v80, v72, v80, 0x3f0a5785
v_madak_f32_e32 v79, v72, v79, 0xbb1acdc6
v_madak_f32_e32 v80, v72, v80, 0x3dd9f331
v_cndmask_b32_e64 v75, v75, v79, s[4:5]
v_mov_b32_e32 v79, 0xb7c756b1
v_cndmask_b32_e64 v77, v77, v80, s[4:5]
v_mov_b32_e32 v80, 0xb684e21a
v_madak_f32_e32 v79, v79, v72, 0xbbbd1489
v_madak_f32_e32 v80, v80, v72, 0x390aee49
v_madak_f32_e32 v79, v72, v79, 0xbce9528f
v_madak_f32_e32 v80, v72, v80, 0x3ba68116
v_madak_f32_e32 v79, v72, v79, 0xbea66beb
v_madak_f32_e32 v80, v72, v80, 0x3d852a63
v_madak_f32_e32 v79, v72, v79, 0x3e0375d4
v_madak_f32_e32 v80, v72, v80, 0x3ecbbbce
v_cndmask_b32_e64 v77, v77, v80, s[8:9]
v_cndmask_b32_e64 v75, v75, v79, s[8:9]
v_and_b32_e32 v79, s50, v35
v_mov_b32_e32 v80, 0xbf100000
v_mad_f32 v80, v79, -v79, v80
v_cmp_gt_f32_e64 s[10:11], 0, v80
v_cndmask_b32_e64 v81, 0.5, -0.5, s[10:11]
v_mov_b32_e32 v82, 0x3fb8aa3b
v_mac_f32_e32 v81, v82, v80
v_cvt_i32_f32_e32 v81, v81
v_mov_b32_e32 v86, 0xbf317180
v_mov_b32_e32 v88, 0xb717f7d1
v_mov_b32_e32 v91, 0xb5ddea0e
v_cvt_f32_i32_e32 v85, v81
v_mov_b32_e32 v92, 0x3331bb4c
v_mov_b32_e32 v94, 0x388ab355
v_mov_b32_e32 v95, 0xbb360b61
v_mad_f32 v87, v86, v85, v80
v_mad_f32 v89, v88, v85, v87
v_mul_f32_e32 v90, v89, v89
v_mad_f32 v93, v92, v90, v91
v_mad_f32 v93, v93, v90, v94
v_mad_f32 v93, v93, v90, v95
v_mov_b32_e32 v96, 0x3e2aaaab
v_mad_f32 v93, v93, v90, v96
v_mad_f32 v90, -v90, v93, v89
v_mad_f32 v72, v72, v77, 1.0
v_mov_b32_e32 v77, 0x6f800000
v_cmp_gt_f32_e64 vcc, |v72|, v77
v_mov_b32_e32 v83, 0x2f800000
v_sub_f32_e32 v93, 2.0, v90
v_cndmask_b32_e32 v84, 1.0, v83, vcc
v_cmp_gt_f32_e64 vcc, |v93|, v77
v_cndmask_b32_e32 v97, 1.0, v83, vcc
v_mul_f32_e32 v93, v97, v93
v_rcp_f32_e32 v93, v93
v_mul_f32_e32 v72, v84, v72
v_mul_f32_e32 v89, v90, v89
v_rcp_f32_e32 v72, v72
v_mul_f32_e32 v89, v93, v89
v_mul_f32_e32 v89, v89, v97
v_mad_f32 v85, -v85, v88, -v89
v_subrev_f32_e32 v85, v87, v85
v_mul_f32_e32 v72, v72, v75
v_mul_f32_e32 v75, v72, v84
v_subrev_f32_e32 v87, v68, v79
v_add_f32_e32 v79, v68, v79
v_sub_f32_e32 v85, 1.0, v85
v_lshlrev_b32_e32 v81, 23, v81
v_add_i32_e32 v81, vcc, v85, v81
v_mad_f32 v79, v79, v87, v75
v_cmp_gt_f32_e32 vcc, 0, v79
v_cndmask_b32_e64 v87, 0.5, -0.5, vcc
v_mac_f32_e32 v87, v82, v79
v_cvt_i32_f32_e32 v82, v87
v_mov_b32_e32 v85, 0xc2aeac4f
v_cmp_ge_f32_e32 vcc, v80, v85
v_mov_b32_e32 v87, 0x42b17218
v_cvt_f32_i32_e32 v89, v82
v_cndmask_b32_e32 v81, 0, v81, vcc
v_cmp_lt_f32_e32 vcc, v80, v87
v_mov_b32_e32 v90, 0x7f800000
v_mad_f32 v86, v86, v89, v79
v_mad_f32 v93, v88, v89, v86
v_mul_f32_e32 v97, v93, v93
v_mac_f32_e32 v91, v92, v97
v_mac_f32_e32 v94, v91, v97
v_mac_f32_e32 v95, v94, v97
v_mac_f32_e32 v96, v95, v97
v_mad_f32 v91, -v97, v96, v93
v_sub_f32_e32 v92, 2.0, v91
v_cndmask_b32_e32 v81, v90, v81, vcc
v_cmp_gt_f32_e64 vcc, |v92|, v77
v_cndmask_b32_e32 v94, 1.0, v83, vcc
v_mul_f32_e32 v92, v94, v92
v_rcp_f32_e32 v92, v92
v_cmp_u_f32_e32 vcc, v80, v80
v_cndmask_b32_e32 v80, v81, v80, vcc
v_mul_f32_e32 v81, v91, v93
v_mul_f32_e32 v81, v92, v81
v_mul_f32_e32 v81, v81, v94
v_mad_f32 v81, -v89, v88, -v81
v_subrev_f32_e32 v81, v86, v81
v_sub_f32_e32 v81, 1.0, v81
v_lshlrev_b32_e32 v82, 23, v82
v_add_i32_e32 v81, vcc, v81, v82
v_cmp_ge_f32_e32 vcc, v79, v85
v_cndmask_b32_e32 v81, 0, v81, vcc
v_cmp_lt_f32_e64 s[10:11], v79, v87
v_cndmask_b32_e64 v81, v90, v81, s[10:11]
v_cmp_u_f32_e32 vcc, v79, v79
v_cndmask_b32_e32 v79, v81, v79, vcc
v_cmp_gt_f32_e64 vcc, |v68|, v77
v_cndmask_b32_e32 v77, 1.0, v83, vcc
v_mul_f32_e32 v81, v77, v68
v_rcp_f32_e32 v81, v81
v_mul_f32_e32 v79, v79, v80
v_mov_b32_e32 v80, 0x40c00000
v_cmp_gt_f32_e32 vcc, v80, v68
v_mov_b32_e32 v80, 0x31800000
v_mul_f32_e32 v79, v81, v79
v_cmp_gt_f32_e64 s[10:11], v80, v68
v_mad_f32 v68, -v77, v79, 1.0
v_cndmask_b32_e32 v68, 1.0, v68, vcc
v_madak_f32_e32 v72, v84, v72, 0x3f58560b
v_cndmask_b32_e64 v68, v68, v72, s[4:5]
v_and_b32_e32 v72, s51, v35
v_or_b32_e32 v68, v72, v68
v_mad_f32 v72, v75, v35, v35
v_cndmask_b32_e64 v68, v68, v72, s[8:9]
v_mul_f32_e32 v72, 0x3f8375d4, v35
v_mac_f32_e32 v72, 0x41000000, v35
v_mul_f32_e32 v72, 0x3e000000, v72
v_cndmask_b32_e64 v68, v68, v72, s[10:11]
v_cmp_u_f32_e32 vcc, v35, v35
v_cndmask_b32_e32 v35, v68, v35, vcc
v_subrev_f32_e32 v35, v35, v70
v_mul_f32_e32 v68, s19, v70
v_mad_f32 v35, v51, v35, -v68
v_mul_f32_e32 v51, v74, v73
v_mac_f32_e32 v5, v35, v45
v_mul_f32_e32 v35, v76, v51
v_mad_f32 v35, v45, v67, -v35
v_mul_f32_e32 v45, v67, v45
v_mad_f32 v45, v51, v76, -v45
v_mad_f32 v78, v31, v45, v78
v_mad_f32 v71, v27, v45, v71
v_mac_f32_e32 v69, v23, v45
v_mad_f32 v15, v31, v35, v15
v_mad_f32 v14, v27, v35, v14
v_mac_f32_e32 v13, v23, v35
BB6_126: ; %Flow1224
; in Loop: Header=BB6_11 Depth=1
s_or_b64 exec, exec, s[34:35]
BB6_127: ; in Loop: Header=BB6_11 Depth=1
s_or_b64 exec, exec, s[12:13]
s_mov_b32 m0, -1
v_cmp_gt_i32_e32 vcc, 3, v2
ds_write_b32 v6, v69
ds_write_b32 v7, v71
ds_write_b32 v12, v78
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[4:5], exec, s[4:5]
s_waitcnt lgkmcnt(0)
; mask branch BB6_133
s_cbranch_execz BB6_133
BB6_128: ; in Loop: Header=BB6_11 Depth=1
v_lshlrev_b32_e32 v27, 6, v2
v_add_i32_e32 v23, vcc, v11, v27
v_lshlrev_b32_e32 v23, 2, v23
v_add_i32_e32 v31, vcc, s15, v23
s_mov_b32 m0, -1
ds_read_b32 v23, v31
v_add_i32_e32 v35, vcc, 8, v11
v_or_b32_e32 v45, 1, v11
v_cmp_lt_i32_e32 vcc, v45, v35
s_and_saveexec_b64 s[8:9], vcc
s_xor_b64 s[8:9], exec, s[8:9]
s_waitcnt lgkmcnt(0)
; mask branch BB6_130
s_cbranch_execz BB6_130
BB6_129: ; in Loop: Header=BB6_11 Depth=1
s_mov_b32 m0, -1
ds_read2_b32 v[67:68], v31 offset0:1 offset1:2
v_or_b32_e32 v35, 3, v11
v_add_i32_e32 v27, vcc, v35, v27
v_lshlrev_b32_e32 v27, 2, v27
ds_read2_b32 v[69:70], v31 offset0:3 offset1:4
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v23, v23, v67
v_add_i32_e32 v27, vcc, s15, v27
v_add_f32_e32 v23, v68, v23
ds_read2_b32 v[67:68], v27 offset0:2 offset1:3
ds_read_b32 v31, v31 offset:28
v_add_f32_e32 v23, v69, v23
v_add_f32_e32 v23, v70, v23
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v23, v67, v23
v_add_f32_e32 v23, v68, v23
v_add_f32_e32 v23, v31, v23
BB6_130: ; %._crit_edge.i26
; in Loop: Header=BB6_11 Depth=1
s_or_b64 exec, exec, s[8:9]
v_mul_lo_i32 v27, v66, 3
v_mov_b32_e32 v31, s29
s_mov_b64 s[8:9], s[28:29]
s_mov_b64 s[10:11], s[46:47]
v_add_i32_e32 v66, vcc, v27, v2
v_ashrrev_i32_e32 v67, 31, v66
v_lshl_b64 v[68:69], v[66:67], 2
v_add_i32_e32 v66, vcc, s28, v68
v_addc_u32_e32 v67, vcc, v69, v31, vcc
buffer_load_dword v69, v[68:69], s[8:11], 0 addr64
s_mov_b64 s[8:9], 0
s_waitcnt vmcnt(0)
BB6_131: ; Parent Loop BB6_11 Depth=1
; => This Inner Loop Header: Depth=2
v_add_f32_e32 v68, v23, v69
v_mov_b32_e32 v71, v69
v_mov_b32_e32 v70, v68
buffer_atomic_cmpswap v[70:71], v[66:67], s[44:47], 0 addr64 glc
v_mov_b32_e32 v27, -1
v_mov_b32_e32 v27, 0xf000
s_waitcnt vmcnt(0) expcnt(0)
v_cmp_eq_u32_e32 vcc, v70, v69
s_or_b64 s[8:9], vcc, s[8:9]
v_mov_b32_e32 v69, v70
s_andn2_b64 exec, exec, s[8:9]
s_cbranch_execnz BB6_131
; BB#132: ; %Flow1222
; in Loop: Header=BB6_11 Depth=1
s_or_b64 exec, exec, s[8:9]
BB6_133: ; %Flow1223
; in Loop: Header=BB6_11 Depth=1
s_or_b64 exec, exec, s[4:5]
BB6_134: ; %Flow1232
; in Loop: Header=BB6_11 Depth=1
s_or_b64 exec, exec, s[30:31]
v_mov_b32_e32 v23, 0xffffff
v_cmp_lt_u32_e32 vcc, v23, v65
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[30:31], exec, s[4:5]
; mask branch BB6_174
s_cbranch_execz BB6_174
BB6_135: ; in Loop: Header=BB6_11 Depth=1
s_mov_b32 m0, -1
ds_read_b32 v23, v54 offset:12
s_mov_b64 s[8:9], s[32:33]
s_mov_b64 s[10:11], s[46:47]
s_waitcnt lgkmcnt(0)
v_lshlrev_b32_e32 v27, 3, v23
v_add_i32_e32 v66, vcc, v27, v1
v_ashrrev_i32_e32 v67, 31, v66
v_lshl_b64 v[68:69], v[66:67], 4
buffer_load_dwordx4 v[70:73], v[68:69], s[8:11], 0 addr64
v_lshl_b64 v[74:75], v[66:67], 3
s_mov_b64 s[8:9], s[36:37]
buffer_load_dwordx2 v[67:68], v[74:75], s[8:11], 0 addr64
v_lshrrev_b32_e32 v27, 24, v65
v_mov_b32_e32 v69, 0
v_and_b32_e32 v27, 1, v27
v_cmp_eq_u32_e32 vcc, 1, v27
v_mov_b32_e32 v75, v69
v_mov_b32_e32 v78, v69
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[34:35], exec, s[4:5]
s_waitcnt vmcnt(0)
; mask branch BB6_139
s_cbranch_execz BB6_139
BB6_136: ; in Loop: Header=BB6_11 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[79:82], v55 offset1:1
s_and_b64 s[4:5], exec, s[0:1]
v_cmp_ne_u32_e32 vcc, v41, v23
s_xor_b64 s[4:5], s[4:5], -1
s_or_b64 s[4:5], s[4:5], vcc
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v27, v71, v80
s_and_b64 s[8:9], exec, s[2:3]
s_or_b64 s[4:5], s[8:9], s[4:5]
v_subrev_f32_e32 v31, v70, v79
v_mul_f32_e32 v45, v27, v27
v_cndmask_b32_e64 v51, 0, 1.0, s[4:5]
v_subrev_f32_e32 v35, v72, v81
v_mac_f32_e32 v45, v31, v31
v_mov_b32_e32 v69, 0
v_mac_f32_e32 v45, v35, v35
v_mul_f32_e32 v51, s22, v51
v_cmp_lt_f32_e32 vcc, v45, v51
v_mov_b32_e32 v75, v69
v_mov_b32_e32 v78, v69
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[38:39], exec, s[4:5]
; mask branch BB6_138
s_cbranch_execz BB6_138
BB6_137: ; in Loop: Header=BB6_11 Depth=1
v_max_f32_e32 v45, 0x34cd15ae, v45
v_mul_f32_e32 v69, v9, v45
v_mul_f32_e32 v74, v69, v69
v_mov_b32_e32 v75, 0x3a92b707
v_mov_b32_e32 v76, 0x3c739487
v_madak_f32_e32 v75, v75, v74, 0x3ded3cb2
v_mad_f32 v77, v75, v74, 1.0
v_mov_b32_e32 v75, 0xb2951928
v_madak_f32_e32 v76, v76, v74, 0x3f01e2bc
v_madak_f32_e32 v75, v75, v74, 0xb85ffb93
v_mac_f32_e32 v77, v69, v76
v_mov_b32_e32 v76, 0x35c55945
v_madak_f32_e32 v76, v76, v74, 0x3a83ca0c
v_madak_f32_e32 v75, v75, v74, 0xbc9ded90
v_madak_f32_e32 v76, v76, v74, 0x3d8eaf3b
v_madak_f32_e32 v78, v75, v74, 0xbf409397
v_mac_f32_e32 v78, v69, v76
v_lshrrev_b32_e32 v69, 24, v19
v_and_b32_e32 v69, 1, v69
v_cmp_eq_u32_e32 vcc, 1, v69
v_rsq_f32_e32 v69, v45
s_mov_b32 m0, -1
v_cndmask_b32_e64 v76, 0, 1.0, vcc
v_mul_f32_e32 v51, v73, v82
v_mul_f32_e32 v79, v69, v69
v_mul_f32_e32 v74, v79, v79
v_mul_f32_e32 v80, v76, v74
ds_read_b64 v[74:75], v56
v_mul_f32_e32 v81, v79, v80
v_mad_f32 v80, v80, v79, s23
v_mad_f32 v83, v81, v81, s26
v_mul_f32_e32 v80, 0xbe2aaaab, v80
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v75, v68, v75
v_mul_f32_e32 v82, v81, v75
v_mad_f32 v82, v74, v67, -v82
v_mul_f32_e32 v74, v67, v74
v_mul_f32_e32 v75, v83, v75
v_mul_f32_e32 v74, v74, v80
v_mac_f32_e32 v74, 0x3daaaaaa, v75
v_mac_f32_e32 v8, v76, v74
v_rcp_f32_e32 v74, v77
v_mul_f32_e32 v45, s18, v45
v_mul_f32_e32 v75, v76, v79
v_mul_f32_e32 v45, v69, v45
v_mul_f32_e32 v74, v37, v74
v_mul_f32_e32 v74, v78, v74
v_mac_f32_e32 v74, v69, v75
v_and_b32_e32 v75, s27, v45
v_mov_b32_e32 v77, 0x3fa00000
v_cmp_gt_f32_e64 s[4:5], v77, v75
v_mul_f32_e32 v77, v75, v75
v_rcp_f32_e32 v78, v77
v_add_f32_e32 v80, -1.0, v75
v_mov_b32_e32 v83, 0xbd777f97
v_mov_b32_e32 v84, 0xbf100000
v_cndmask_b32_e64 v78, v78, v80, s[4:5]
v_mov_b32_e32 v80, 0x3f580000
v_cmp_gt_f32_e64 s[8:9], v80, v75
v_cndmask_b32_e64 v77, v78, v77, s[8:9]
v_mov_b32_e32 v80, 0xc11d077e
v_mov_b32_e32 v78, 0x4036db6e
v_madak_f32_e32 v80, v80, v77, 0xc2a2932b
v_cmp_gt_f32_e32 vcc, v78, v75
v_mov_b32_e32 v78, 0xc3f1c275
v_madak_f32_e32 v78, v78, v77, 0xc480230b
v_madak_f32_e32 v80, v77, v80, 0xc3389ae7
v_madak_f32_e32 v78, v77, v78, 0xc41f6441
v_madak_f32_e32 v80, v77, v80, 0xc322658c
v_madak_f32_e32 v78, v77, v78, 0xc320a2ea
v_madak_f32_e32 v80, v77, v80, 0xc2798057
v_madak_f32_e32 v78, v77, v78, 0xc18e104b
v_madak_f32_e32 v80, v77, v80, 0xc128f022
v_madak_f32_e32 v78, v77, v78, 0xbf4c9dd4
v_madak_f32_e32 v80, v77, v80, 0xbf31a0b7
v_madak_f32_e32 v78, v77, v78, 0xbc21a092
v_madak_f32_e32 v80, v77, v80, 0xbc21a093
v_madak_f32_e32 v83, v83, v77, 0x40d23f7c
v_cndmask_b32_e32 v78, v78, v80, vcc
v_mov_b32_e32 v80, 0xc1b38712
v_madak_f32_e32 v80, v80, v77, 0x43ed43a7
v_madak_f32_e32 v83, v77, v83, 0x42d9451f
v_madak_f32_e32 v80, v77, v80, 0x451f90ce
v_madak_f32_e32 v83, v77, v83, 0x43d6810b
v_madak_f32_e32 v80, v77, v80, 0x4547fdbb
v_madak_f32_e32 v83, v77, v83, 0x442158c9
v_madak_f32_e32 v80, v77, v80, 0x44c01759
v_madak_f32_e32 v83, v77, v83, 0x43d9486f
v_madak_f32_e32 v80, v77, v80, 0x43a2e571
v_madak_f32_e32 v83, v77, v83, 0x4309a863
v_madak_f32_e32 v80, v77, v80, 0x41f2b459
v_madak_f32_e32 v83, v77, v83, 0x419d35ce
v_cndmask_b32_e32 v80, v80, v83, vcc
v_mov_b32_e32 v83, 0xbb0df9c0
v_madak_f32_e32 v83, v83, v77, 0x3d1151b3
v_madak_f32_e32 v83, v77, v83, 0xbde31cc2
v_madak_f32_e32 v83, v77, v83, 0x3ea2fe54
v_madak_f32_e32 v83, v77, v83, 0xbebe9208
v_madak_f32_e32 v83, v77, v83, 0x3ed46805
v_madak_f32_e32 v83, v77, v83, 0xbb1acdc6
v_cndmask_b32_e64 v78, v78, v83, s[4:5]
v_mov_b32_e32 v83, 0x3c445aa3
v_madak_f32_e32 v83, v83, v77, 0x3c5f6e13
v_madak_f32_e32 v83, v77, v83, 0x3e013307
v_madak_f32_e32 v83, v77, v83, 0x3d931ae7
v_madak_f32_e32 v83, v77, v83, 0x3f0a5785
v_madak_f32_e32 v83, v77, v83, 0x3dd9f331
v_cndmask_b32_e64 v80, v80, v83, s[4:5]
v_mov_b32_e32 v83, 0xb684e21a
v_madak_f32_e32 v83, v83, v77, 0x390aee49
v_madak_f32_e32 v83, v77, v83, 0x3ba68116
v_madak_f32_e32 v83, v77, v83, 0x3d852a63
v_madak_f32_e32 v83, v77, v83, 0x3ecbbbce
v_cndmask_b32_e64 v80, v80, v83, s[8:9]
v_mov_b32_e32 v83, 0xb7c756b1
v_madak_f32_e32 v83, v83, v77, 0xbbbd1489
v_madak_f32_e32 v83, v77, v83, 0xbce9528f
v_madak_f32_e32 v83, v77, v83, 0xbea66beb
v_madak_f32_e32 v83, v77, v83, 0x3e0375d4
v_mad_f32 v77, v77, v80, 1.0
v_and_b32_e32 v80, s50, v45
v_mad_f32 v84, v80, -v80, v84
v_cmp_gt_f32_e32 vcc, 0, v84
v_cndmask_b32_e64 v85, 0.5, -0.5, vcc
v_mov_b32_e32 v86, 0x3fb8aa3b
v_mac_f32_e32 v85, v86, v84
v_cvt_i32_f32_e32 v85, v85
v_mov_b32_e32 v91, 0xbf317180
v_mov_b32_e32 v93, 0xb717f7d1
v_mov_b32_e32 v96, 0xb5ddea0e
v_cvt_f32_i32_e32 v90, v85
v_mov_b32_e32 v97, 0x3331bb4c
v_mov_b32_e32 v99, 0x388ab355
v_mov_b32_e32 v100, 0xbb360b61
v_mad_f32 v92, v91, v90, v84
v_mad_f32 v94, v93, v90, v92
v_mul_f32_e32 v95, v94, v94
v_mad_f32 v98, v97, v95, v96
v_mad_f32 v98, v98, v95, v99
v_mad_f32 v98, v98, v95, v100
v_mov_b32_e32 v101, 0x3e2aaaab
v_mad_f32 v98, v98, v95, v101
v_mad_f32 v95, -v95, v98, v94
v_mov_b32_e32 v87, 0x6f800000
v_cmp_gt_f32_e64 vcc, |v77|, v87
v_mov_b32_e32 v88, 0x2f800000
v_sub_f32_e32 v98, 2.0, v95
v_cndmask_b32_e32 v89, 1.0, v88, vcc
v_cmp_gt_f32_e64 vcc, |v98|, v87
v_mul_f32_e32 v77, v89, v77
v_cndmask_b32_e32 v102, 1.0, v88, vcc
v_mul_f32_e32 v98, v102, v98
v_rcp_f32_e32 v77, v77
v_rcp_f32_e32 v98, v98
v_cndmask_b32_e64 v78, v78, v83, s[8:9]
v_lshlrev_b32_e32 v83, 23, v85
v_mul_f32_e32 v77, v77, v78
v_mul_f32_e32 v78, v95, v94
v_mul_f32_e32 v78, v98, v78
v_mul_f32_e32 v78, v78, v102
v_mad_f32 v78, -v90, v93, -v78
v_subrev_f32_e32 v78, v92, v78
v_sub_f32_e32 v78, 1.0, v78
v_add_i32_e32 v78, vcc, v78, v83
v_subrev_f32_e32 v83, v75, v80
v_mul_f32_e32 v85, v77, v89
v_add_f32_e32 v80, v75, v80
v_mad_f32 v80, v80, v83, v85
v_cmp_gt_f32_e32 vcc, 0, v80
v_cndmask_b32_e64 v83, 0.5, -0.5, vcc
v_mac_f32_e32 v83, v86, v80
v_cvt_i32_f32_e32 v83, v83
v_madak_f32_e32 v77, v89, v77, 0x3f58560b
v_cvt_f32_i32_e32 v86, v83
v_lshlrev_b32_e32 v83, 23, v83
v_mad_f32 v90, v91, v86, v80
v_mad_f32 v91, v93, v86, v90
v_mul_f32_e32 v92, v91, v91
v_mac_f32_e32 v96, v97, v92
v_mac_f32_e32 v99, v96, v92
v_mac_f32_e32 v100, v99, v92
v_mac_f32_e32 v101, v100, v92
v_mad_f32 v92, -v92, v101, v91
v_mul_f32_e32 v91, v92, v91
v_sub_f32_e32 v92, 2.0, v92
v_cmp_gt_f32_e64 vcc, |v92|, v87
v_cndmask_b32_e32 v94, 1.0, v88, vcc
v_mul_f32_e32 v92, v94, v92
v_rcp_f32_e32 v92, v92
v_mul_f32_e32 v91, v92, v91
v_mul_f32_e32 v91, v91, v94
v_mad_f32 v86, -v86, v93, -v91
v_mov_b32_e32 v91, 0xc2aeac4f
v_cmp_ge_f32_e32 vcc, v84, v91
v_mov_b32_e32 v92, 0x42b17218
v_cndmask_b32_e32 v78, 0, v78, vcc
v_cmp_lt_f32_e32 vcc, v84, v92
v_mov_b32_e32 v93, 0x7f800000
v_cndmask_b32_e32 v78, v93, v78, vcc
v_cmp_u_f32_e32 vcc, v84, v84
v_cndmask_b32_e32 v78, v78, v84, vcc
v_subrev_f32_e32 v84, v90, v86
v_sub_f32_e32 v84, 1.0, v84
v_add_i32_e32 v83, vcc, v84, v83
v_cmp_ge_f32_e32 vcc, v80, v91
v_cndmask_b32_e32 v83, 0, v83, vcc
v_cmp_lt_f32_e64 s[10:11], v80, v92
v_cndmask_b32_e64 v83, v93, v83, s[10:11]
v_cmp_u_f32_e32 vcc, v80, v80
v_cndmask_b32_e32 v80, v83, v80, vcc
v_mul_f32_e32 v78, v80, v78
v_mov_b32_e32 v80, 0x40c00000
v_cmp_gt_f32_e64 s[10:11], v80, v75
v_mov_b32_e32 v80, 0x31800000
v_cmp_gt_f32_e64 vcc, |v75|, v87
v_cmp_gt_f32_e64 s[12:13], v80, v75
v_cndmask_b32_e32 v80, 1.0, v88, vcc
v_mul_f32_e32 v75, v80, v75
v_rcp_f32_e32 v75, v75
v_cmp_u_f32_e32 vcc, v45, v45
v_mul_f32_e32 v75, v75, v78
v_mad_f32 v75, -v80, v75, 1.0
v_cndmask_b32_e64 v75, 1.0, v75, s[10:11]
v_cndmask_b32_e64 v75, v75, v77, s[4:5]
v_and_b32_e32 v77, s51, v45
v_or_b32_e32 v75, v77, v75
v_mad_f32 v77, v85, v45, v45
v_cndmask_b32_e64 v75, v75, v77, s[8:9]
v_mul_f32_e32 v77, 0x3f8375d4, v45
v_mac_f32_e32 v77, 0x41000000, v45
v_mul_f32_e32 v77, 0x3e000000, v77
v_cndmask_b32_e64 v75, v75, v77, s[12:13]
v_cndmask_b32_e32 v45, v75, v45, vcc
v_subrev_f32_e32 v45, v45, v76
v_mul_f32_e32 v75, s19, v76
v_mad_f32 v45, v69, v45, -v75
v_mul_f32_e32 v69, v79, v81
v_mac_f32_e32 v5, v45, v51
v_mul_f32_e32 v45, v82, v69
v_mad_f32 v45, v51, v74, -v45
v_mul_f32_e32 v51, v74, v51
v_mad_f32 v51, v69, v82, -v51
v_mad_f32 v50, v51, -v35, v50
v_mad_f32 v49, v51, -v27, v49
v_mad_f32 v48, v51, -v31, v48
v_mul_f32_e64 v78, v45, -v35
v_mul_f32_e64 v75, v45, -v27
v_mul_f32_e64 v69, v45, -v31
BB6_138: ; %Flow1220
; in Loop: Header=BB6_11 Depth=1
s_or_b64 exec, exec, s[38:39]
BB6_139: ; in Loop: Header=BB6_11 Depth=1
s_or_b64 exec, exec, s[34:35]
v_lshrrev_b32_e32 v27, 25, v65
v_and_b32_e32 v27, 1, v27
v_cmp_eq_u32_e32 vcc, 1, v27
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[34:35], exec, s[4:5]
; mask branch BB6_143
s_cbranch_execz BB6_143
BB6_140: ; in Loop: Header=BB6_11 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[79:82], v55 offset0:16 offset1:17
s_and_b64 s[4:5], exec, s[0:1]
v_cmp_ne_u32_e32 vcc, v64, v23
s_xor_b64 s[4:5], s[4:5], -1
s_or_b64 s[4:5], s[4:5], vcc
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v31, v71, v80
s_and_b64 s[8:9], exec, s[2:3]
s_or_b64 s[4:5], s[8:9], s[4:5]
v_subrev_f32_e32 v27, v70, v79
v_mul_f32_e32 v45, v31, v31
v_cndmask_b32_e64 v51, 0, 1.0, s[4:5]
v_subrev_f32_e32 v35, v72, v81
v_mac_f32_e32 v45, v27, v27
v_mac_f32_e32 v45, v35, v35
v_mul_f32_e32 v51, s22, v51
v_cmp_lt_f32_e32 vcc, v45, v51
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[38:39], exec, s[4:5]
; mask branch BB6_142
s_cbranch_execz BB6_142
BB6_141: ; in Loop: Header=BB6_11 Depth=1
v_max_f32_e32 v45, 0x34cd15ae, v45
v_mul_f32_e32 v80, v9, v45
v_mul_f32_e32 v76, v80, v80
v_mov_b32_e32 v77, 0x3a92b707
v_mov_b32_e32 v81, 0x3c739487
v_madak_f32_e32 v77, v77, v76, 0x3ded3cb2
v_mul_f32_e32 v51, v73, v82
v_mad_f32 v82, v77, v76, 1.0
v_mov_b32_e32 v77, 0xb2951928
v_madak_f32_e32 v81, v81, v76, 0x3f01e2bc
v_madak_f32_e32 v77, v77, v76, 0xb85ffb93
v_rsq_f32_e32 v79, v45
v_mac_f32_e32 v82, v80, v81
v_mov_b32_e32 v81, 0x35c55945
v_madak_f32_e32 v81, v81, v76, 0x3a83ca0c
v_madak_f32_e32 v77, v77, v76, 0xbc9ded90
s_mov_b32 m0, -1
v_lshrrev_b32_e32 v74, 25, v19
v_madak_f32_e32 v81, v81, v76, 0x3d8eaf3b
v_madak_f32_e32 v83, v77, v76, 0xbf409397
ds_read_b64 v[76:77], v56 offset:64
v_and_b32_e32 v74, 1, v74
v_cmp_eq_u32_e32 vcc, 1, v74
v_mac_f32_e32 v83, v80, v81
v_mul_f32_e32 v80, v79, v79
v_cndmask_b32_e64 v74, 0, 1.0, vcc
v_mul_f32_e32 v81, v80, v80
v_mul_f32_e32 v81, v74, v81
v_mul_f32_e32 v84, v80, v81
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v77, v68, v77
v_mul_f32_e32 v85, v84, v77
v_mad_f32 v81, v81, v80, s23
v_mad_f32 v85, v76, v67, -v85
v_mad_f32 v86, v84, v84, s26
v_mul_f32_e32 v76, v67, v76
v_mul_f32_e32 v81, 0xbe2aaaab, v81
v_mul_f32_e32 v77, v86, v77
v_mul_f32_e32 v76, v76, v81
v_mac_f32_e32 v76, 0x3daaaaaa, v77
v_rcp_f32_e32 v77, v82
v_mul_f32_e32 v45, s18, v45
v_mac_f32_e32 v8, v74, v76
v_mul_f32_e32 v45, v79, v45
v_mul_f32_e32 v77, v37, v77
v_mul_f32_e32 v76, v74, v80
v_mul_f32_e32 v77, v83, v77
v_mac_f32_e32 v77, v79, v76
v_and_b32_e32 v76, s27, v45
v_mov_b32_e32 v81, 0x3fa00000
v_cmp_gt_f32_e64 s[4:5], v81, v76
v_mul_f32_e32 v81, v76, v76
v_rcp_f32_e32 v82, v81
v_add_f32_e32 v83, -1.0, v76
v_mov_b32_e32 v86, 0xbd777f97
v_mov_b32_e32 v87, 0x6f800000
v_cndmask_b32_e64 v82, v82, v83, s[4:5]
v_mov_b32_e32 v83, 0x3f580000
v_cmp_gt_f32_e64 s[8:9], v83, v76
v_cndmask_b32_e64 v81, v82, v81, s[8:9]
v_mov_b32_e32 v83, 0xc11d077e
v_mov_b32_e32 v82, 0x4036db6e
v_madak_f32_e32 v83, v83, v81, 0xc2a2932b
v_cmp_gt_f32_e32 vcc, v82, v76
v_mov_b32_e32 v82, 0xc3f1c275
v_madak_f32_e32 v82, v82, v81, 0xc480230b
v_madak_f32_e32 v83, v81, v83, 0xc3389ae7
v_madak_f32_e32 v82, v81, v82, 0xc41f6441
v_madak_f32_e32 v83, v81, v83, 0xc322658c
v_madak_f32_e32 v82, v81, v82, 0xc320a2ea
v_madak_f32_e32 v83, v81, v83, 0xc2798057
v_madak_f32_e32 v82, v81, v82, 0xc18e104b
v_madak_f32_e32 v83, v81, v83, 0xc128f022
v_madak_f32_e32 v82, v81, v82, 0xbf4c9dd4
v_madak_f32_e32 v83, v81, v83, 0xbf31a0b7
v_madak_f32_e32 v82, v81, v82, 0xbc21a092
v_madak_f32_e32 v83, v81, v83, 0xbc21a093
v_madak_f32_e32 v86, v86, v81, 0x40d23f7c
v_cndmask_b32_e32 v82, v82, v83, vcc
v_mov_b32_e32 v83, 0xc1b38712
v_madak_f32_e32 v83, v83, v81, 0x43ed43a7
v_madak_f32_e32 v86, v81, v86, 0x42d9451f
v_madak_f32_e32 v83, v81, v83, 0x451f90ce
v_madak_f32_e32 v86, v81, v86, 0x43d6810b
v_madak_f32_e32 v83, v81, v83, 0x4547fdbb
v_madak_f32_e32 v86, v81, v86, 0x442158c9
v_madak_f32_e32 v83, v81, v83, 0x44c01759
v_madak_f32_e32 v86, v81, v86, 0x43d9486f
v_madak_f32_e32 v83, v81, v83, 0x43a2e571
v_madak_f32_e32 v86, v81, v86, 0x4309a863
v_madak_f32_e32 v83, v81, v83, 0x41f2b459
v_madak_f32_e32 v86, v81, v86, 0x419d35ce
v_cndmask_b32_e32 v83, v83, v86, vcc
v_mov_b32_e32 v86, 0xbb0df9c0
v_madak_f32_e32 v86, v86, v81, 0x3d1151b3
v_madak_f32_e32 v86, v81, v86, 0xbde31cc2
v_madak_f32_e32 v86, v81, v86, 0x3ea2fe54
v_madak_f32_e32 v86, v81, v86, 0xbebe9208
v_madak_f32_e32 v86, v81, v86, 0x3ed46805
v_madak_f32_e32 v86, v81, v86, 0xbb1acdc6
v_cndmask_b32_e64 v82, v82, v86, s[4:5]
v_mov_b32_e32 v86, 0x3c445aa3
v_madak_f32_e32 v86, v86, v81, 0x3c5f6e13
v_madak_f32_e32 v86, v81, v86, 0x3e013307
v_madak_f32_e32 v86, v81, v86, 0x3d931ae7
v_madak_f32_e32 v86, v81, v86, 0x3f0a5785
v_madak_f32_e32 v86, v81, v86, 0x3dd9f331
v_cndmask_b32_e64 v83, v83, v86, s[4:5]
v_mov_b32_e32 v86, 0xb684e21a
v_madak_f32_e32 v86, v86, v81, 0x390aee49
v_madak_f32_e32 v86, v81, v86, 0x3ba68116
v_madak_f32_e32 v86, v81, v86, 0x3d852a63
v_madak_f32_e32 v86, v81, v86, 0x3ecbbbce
v_cndmask_b32_e64 v83, v83, v86, s[8:9]
v_mad_f32 v83, v81, v83, 1.0
v_and_b32_e32 v90, s50, v45
v_mov_b32_e32 v91, 0xbf100000
v_cmp_gt_f32_e64 vcc, |v83|, v87
v_mov_b32_e32 v88, 0x2f800000
v_mad_f32 v91, v90, -v90, v91
v_cndmask_b32_e32 v89, 1.0, v88, vcc
v_cmp_gt_f32_e32 vcc, 0, v91
v_cndmask_b32_e64 v92, 0.5, -0.5, vcc
v_mov_b32_e32 v93, 0x3fb8aa3b
v_mac_f32_e32 v92, v93, v91
v_mov_b32_e32 v86, 0xb7c756b1
v_cvt_i32_f32_e32 v92, v92
v_madak_f32_e32 v86, v86, v81, 0xbbbd1489
v_madak_f32_e32 v86, v81, v86, 0xbce9528f
v_madak_f32_e32 v86, v81, v86, 0xbea66beb
v_mul_f32_e32 v83, v89, v83
v_madak_f32_e32 v81, v81, v86, 0x3e0375d4
v_rcp_f32_e32 v83, v83
v_cvt_f32_i32_e32 v86, v92
v_cndmask_b32_e64 v81, v82, v81, s[8:9]
v_mov_b32_e32 v82, 0xbf317180
v_mul_f32_e32 v81, v83, v81
v_mad_f32 v83, v82, v86, v91
v_mov_b32_e32 v94, 0xb717f7d1
v_mad_f32 v95, v94, v86, v83
v_mul_f32_e32 v96, v95, v95
v_mov_b32_e32 v97, 0xb5ddea0e
v_mov_b32_e32 v98, 0x3331bb4c
v_mad_f32 v99, v98, v96, v97
v_mov_b32_e32 v100, 0x388ab355
v_mad_f32 v99, v99, v96, v100
v_mov_b32_e32 v101, 0xbb360b61
v_mad_f32 v99, v99, v96, v101
v_mov_b32_e32 v102, 0x3e2aaaab
v_mad_f32 v99, v99, v96, v102
v_mad_f32 v96, -v96, v99, v95
v_mul_f32_e32 v95, v96, v95
v_sub_f32_e32 v96, 2.0, v96
v_cmp_gt_f32_e64 vcc, |v96|, v87
v_cndmask_b32_e32 v99, 1.0, v88, vcc
v_mul_f32_e32 v96, v99, v96
v_rcp_f32_e32 v96, v96
v_mul_f32_e32 v95, v96, v95
v_mul_f32_e32 v95, v95, v99
v_mad_f32 v86, -v86, v94, -v95
v_subrev_f32_e32 v83, v83, v86
v_lshlrev_b32_e32 v86, 23, v92
v_sub_f32_e32 v83, 1.0, v83
v_add_i32_e32 v83, vcc, v83, v86
v_mov_b32_e32 v86, 0xc2aeac4f
v_cmp_ge_f32_e32 vcc, v91, v86
v_mov_b32_e32 v92, 0x42b17218
v_cndmask_b32_e32 v83, 0, v83, vcc
v_cmp_lt_f32_e32 vcc, v91, v92
v_mov_b32_e32 v95, 0x7f800000
v_cndmask_b32_e32 v83, v95, v83, vcc
v_cmp_u_f32_e32 vcc, v91, v91
v_cndmask_b32_e32 v83, v83, v91, vcc
v_subrev_f32_e32 v91, v76, v90
v_mul_f32_e32 v96, v81, v89
v_add_f32_e32 v90, v76, v90
v_mad_f32 v90, v90, v91, v96
v_cmp_gt_f32_e32 vcc, 0, v90
v_cndmask_b32_e64 v91, 0.5, -0.5, vcc
v_mac_f32_e32 v91, v93, v90
v_cvt_i32_f32_e32 v91, v91
v_madak_f32_e32 v81, v89, v81, 0x3f58560b
v_cvt_f32_i32_e32 v93, v91
v_lshlrev_b32_e32 v91, 23, v91
v_mad_f32 v82, v82, v93, v90
v_mad_f32 v99, v94, v93, v82
v_mul_f32_e32 v103, v99, v99
v_mac_f32_e32 v97, v98, v103
v_mac_f32_e32 v100, v97, v103
v_mac_f32_e32 v101, v100, v103
v_mac_f32_e32 v102, v101, v103
v_mad_f32 v97, -v103, v102, v99
v_mul_f32_e32 v98, v97, v99
v_sub_f32_e32 v97, 2.0, v97
v_cmp_gt_f32_e64 vcc, |v97|, v87
v_cndmask_b32_e32 v99, 1.0, v88, vcc
v_mul_f32_e32 v97, v99, v97
v_rcp_f32_e32 v97, v97
v_mul_f32_e32 v97, v97, v98
v_mul_f32_e32 v97, v97, v99
v_mad_f32 v93, -v93, v94, -v97
v_subrev_f32_e32 v82, v82, v93
v_sub_f32_e32 v82, 1.0, v82
v_add_i32_e32 v82, vcc, v82, v91
v_cmp_ge_f32_e32 vcc, v90, v86
v_cndmask_b32_e32 v82, 0, v82, vcc
v_cmp_lt_f32_e32 vcc, v90, v92
v_cndmask_b32_e32 v82, v95, v82, vcc
v_cmp_u_f32_e32 vcc, v90, v90
v_cndmask_b32_e32 v82, v82, v90, vcc
v_mul_f32_e32 v82, v82, v83
v_mov_b32_e32 v83, 0x40c00000
v_cmp_gt_f32_e64 s[10:11], v83, v76
v_mov_b32_e32 v83, 0x31800000
v_cmp_gt_f32_e64 vcc, |v76|, v87
v_cmp_gt_f32_e64 s[12:13], v83, v76
v_cndmask_b32_e32 v83, 1.0, v88, vcc
v_mul_f32_e32 v76, v83, v76
v_rcp_f32_e32 v76, v76
v_cmp_u_f32_e32 vcc, v45, v45
v_mul_f32_e32 v76, v76, v82
v_mad_f32 v76, -v83, v76, 1.0
v_cndmask_b32_e64 v76, 1.0, v76, s[10:11]
v_cndmask_b32_e64 v76, v76, v81, s[4:5]
v_and_b32_e32 v81, s51, v45
v_or_b32_e32 v76, v81, v76
v_mad_f32 v81, v96, v45, v45
v_cndmask_b32_e64 v76, v76, v81, s[8:9]
v_mul_f32_e32 v81, 0x3f8375d4, v45
v_mac_f32_e32 v81, 0x41000000, v45
v_mul_f32_e32 v81, 0x3e000000, v81
v_cndmask_b32_e64 v76, v76, v81, s[12:13]
v_cndmask_b32_e32 v45, v76, v45, vcc
v_subrev_f32_e32 v45, v45, v74
v_mul_f32_e32 v74, s19, v74
v_mad_f32 v45, v79, v45, -v74
v_mul_f32_e32 v74, v80, v84
v_mac_f32_e32 v5, v45, v51
v_mul_f32_e32 v45, v85, v74
v_mad_f32 v45, v51, v77, -v45
v_mul_f32_e32 v51, v77, v51
v_mad_f32 v51, v74, v85, -v51
v_mad_f32 v78, v35, v51, v78
v_mad_f32 v75, v31, v51, v75
v_mac_f32_e32 v69, v27, v51
v_mad_f32 v44, v35, v45, v44
v_mad_f32 v43, v31, v45, v43
v_mac_f32_e32 v42, v27, v45
BB6_142: ; %Flow1219
; in Loop: Header=BB6_11 Depth=1
s_or_b64 exec, exec, s[38:39]
BB6_143: ; in Loop: Header=BB6_11 Depth=1
s_or_b64 exec, exec, s[34:35]
v_lshrrev_b32_e32 v27, 26, v65
v_and_b32_e32 v27, 1, v27
v_cmp_eq_u32_e32 vcc, 1, v27
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[34:35], exec, s[4:5]
; mask branch BB6_147
s_cbranch_execz BB6_147
BB6_144: ; in Loop: Header=BB6_11 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[79:82], v55 offset0:32 offset1:33
s_and_b64 s[4:5], exec, s[0:1]
v_cmp_ne_u32_e32 vcc, v63, v23
s_xor_b64 s[4:5], s[4:5], -1
s_or_b64 s[4:5], s[4:5], vcc
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v31, v71, v80
s_and_b64 s[8:9], exec, s[2:3]
s_or_b64 s[4:5], s[8:9], s[4:5]
v_subrev_f32_e32 v27, v70, v79
v_mul_f32_e32 v45, v31, v31
v_cndmask_b32_e64 v51, 0, 1.0, s[4:5]
v_subrev_f32_e32 v35, v72, v81
v_mac_f32_e32 v45, v27, v27
v_mac_f32_e32 v45, v35, v35
v_mul_f32_e32 v51, s22, v51
v_cmp_lt_f32_e32 vcc, v45, v51
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[38:39], exec, s[4:5]
; mask branch BB6_146
s_cbranch_execz BB6_146
BB6_145: ; in Loop: Header=BB6_11 Depth=1
v_max_f32_e32 v45, 0x34cd15ae, v45
v_mul_f32_e32 v80, v9, v45
v_mul_f32_e32 v76, v80, v80
v_mov_b32_e32 v77, 0x3a92b707
v_mov_b32_e32 v81, 0x3c739487
v_madak_f32_e32 v77, v77, v76, 0x3ded3cb2
v_mul_f32_e32 v51, v73, v82
v_mad_f32 v82, v77, v76, 1.0
v_mov_b32_e32 v77, 0xb2951928
v_madak_f32_e32 v81, v81, v76, 0x3f01e2bc
v_madak_f32_e32 v77, v77, v76, 0xb85ffb93
v_rsq_f32_e32 v79, v45
v_mac_f32_e32 v82, v80, v81
v_mov_b32_e32 v81, 0x35c55945
v_madak_f32_e32 v81, v81, v76, 0x3a83ca0c
v_madak_f32_e32 v77, v77, v76, 0xbc9ded90
s_mov_b32 m0, -1
v_lshrrev_b32_e32 v74, 26, v19
v_madak_f32_e32 v81, v81, v76, 0x3d8eaf3b
v_madak_f32_e32 v83, v77, v76, 0xbf409397
ds_read_b64 v[76:77], v56 offset:128
v_and_b32_e32 v74, 1, v74
v_cmp_eq_u32_e32 vcc, 1, v74
v_mac_f32_e32 v83, v80, v81
v_mul_f32_e32 v80, v79, v79
v_cndmask_b32_e64 v74, 0, 1.0, vcc
v_mul_f32_e32 v81, v80, v80
v_mul_f32_e32 v81, v74, v81
v_mul_f32_e32 v84, v80, v81
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v77, v68, v77
v_mul_f32_e32 v85, v84, v77
v_mad_f32 v81, v81, v80, s23
v_mad_f32 v85, v76, v67, -v85
v_mad_f32 v86, v84, v84, s26
v_mul_f32_e32 v76, v67, v76
v_mul_f32_e32 v81, 0xbe2aaaab, v81
v_mul_f32_e32 v77, v86, v77
v_mul_f32_e32 v76, v76, v81
v_mac_f32_e32 v76, 0x3daaaaaa, v77
v_rcp_f32_e32 v77, v82
v_mul_f32_e32 v45, s18, v45
v_mac_f32_e32 v8, v74, v76
v_mul_f32_e32 v45, v79, v45
v_mul_f32_e32 v77, v37, v77
v_mul_f32_e32 v76, v74, v80
v_mul_f32_e32 v77, v83, v77
v_mac_f32_e32 v77, v79, v76
v_and_b32_e32 v76, s27, v45
v_mov_b32_e32 v81, 0x3fa00000
v_cmp_gt_f32_e64 s[4:5], v81, v76
v_mul_f32_e32 v81, v76, v76
v_rcp_f32_e32 v82, v81
v_add_f32_e32 v83, -1.0, v76
v_mov_b32_e32 v86, 0xbd777f97
v_mov_b32_e32 v87, 0x6f800000
v_cndmask_b32_e64 v82, v82, v83, s[4:5]
v_mov_b32_e32 v83, 0x3f580000
v_cmp_gt_f32_e64 s[8:9], v83, v76
v_cndmask_b32_e64 v81, v82, v81, s[8:9]
v_mov_b32_e32 v83, 0xc11d077e
v_mov_b32_e32 v82, 0x4036db6e
v_madak_f32_e32 v83, v83, v81, 0xc2a2932b
v_cmp_gt_f32_e32 vcc, v82, v76
v_mov_b32_e32 v82, 0xc3f1c275
v_madak_f32_e32 v82, v82, v81, 0xc480230b
v_madak_f32_e32 v83, v81, v83, 0xc3389ae7
v_madak_f32_e32 v82, v81, v82, 0xc41f6441
v_madak_f32_e32 v83, v81, v83, 0xc322658c
v_madak_f32_e32 v82, v81, v82, 0xc320a2ea
v_madak_f32_e32 v83, v81, v83, 0xc2798057
v_madak_f32_e32 v82, v81, v82, 0xc18e104b
v_madak_f32_e32 v83, v81, v83, 0xc128f022
v_madak_f32_e32 v82, v81, v82, 0xbf4c9dd4
v_madak_f32_e32 v83, v81, v83, 0xbf31a0b7
v_madak_f32_e32 v82, v81, v82, 0xbc21a092
v_madak_f32_e32 v83, v81, v83, 0xbc21a093
v_madak_f32_e32 v86, v86, v81, 0x40d23f7c
v_cndmask_b32_e32 v82, v82, v83, vcc
v_mov_b32_e32 v83, 0xc1b38712
v_madak_f32_e32 v83, v83, v81, 0x43ed43a7
v_madak_f32_e32 v86, v81, v86, 0x42d9451f
v_madak_f32_e32 v83, v81, v83, 0x451f90ce
v_madak_f32_e32 v86, v81, v86, 0x43d6810b
v_madak_f32_e32 v83, v81, v83, 0x4547fdbb
v_madak_f32_e32 v86, v81, v86, 0x442158c9
v_madak_f32_e32 v83, v81, v83, 0x44c01759
v_madak_f32_e32 v86, v81, v86, 0x43d9486f
v_madak_f32_e32 v83, v81, v83, 0x43a2e571
v_madak_f32_e32 v86, v81, v86, 0x4309a863
v_madak_f32_e32 v83, v81, v83, 0x41f2b459
v_madak_f32_e32 v86, v81, v86, 0x419d35ce
v_cndmask_b32_e32 v83, v83, v86, vcc
v_mov_b32_e32 v86, 0xbb0df9c0
v_madak_f32_e32 v86, v86, v81, 0x3d1151b3
v_madak_f32_e32 v86, v81, v86, 0xbde31cc2
v_madak_f32_e32 v86, v81, v86, 0x3ea2fe54
v_madak_f32_e32 v86, v81, v86, 0xbebe9208
v_madak_f32_e32 v86, v81, v86, 0x3ed46805
v_madak_f32_e32 v86, v81, v86, 0xbb1acdc6
v_cndmask_b32_e64 v82, v82, v86, s[4:5]
v_mov_b32_e32 v86, 0x3c445aa3
v_madak_f32_e32 v86, v86, v81, 0x3c5f6e13
v_madak_f32_e32 v86, v81, v86, 0x3e013307
v_madak_f32_e32 v86, v81, v86, 0x3d931ae7
v_madak_f32_e32 v86, v81, v86, 0x3f0a5785
v_madak_f32_e32 v86, v81, v86, 0x3dd9f331
v_cndmask_b32_e64 v83, v83, v86, s[4:5]
v_mov_b32_e32 v86, 0xb684e21a
v_madak_f32_e32 v86, v86, v81, 0x390aee49
v_madak_f32_e32 v86, v81, v86, 0x3ba68116
v_madak_f32_e32 v86, v81, v86, 0x3d852a63
v_madak_f32_e32 v86, v81, v86, 0x3ecbbbce
v_cndmask_b32_e64 v83, v83, v86, s[8:9]
v_mad_f32 v83, v81, v83, 1.0
v_and_b32_e32 v90, s50, v45
v_mov_b32_e32 v91, 0xbf100000
v_cmp_gt_f32_e64 vcc, |v83|, v87
v_mov_b32_e32 v88, 0x2f800000
v_mad_f32 v91, v90, -v90, v91
v_cndmask_b32_e32 v89, 1.0, v88, vcc
v_cmp_gt_f32_e32 vcc, 0, v91
v_cndmask_b32_e64 v92, 0.5, -0.5, vcc
v_mov_b32_e32 v93, 0x3fb8aa3b
v_mac_f32_e32 v92, v93, v91
v_mov_b32_e32 v86, 0xb7c756b1
v_cvt_i32_f32_e32 v92, v92
v_madak_f32_e32 v86, v86, v81, 0xbbbd1489
v_madak_f32_e32 v86, v81, v86, 0xbce9528f
v_madak_f32_e32 v86, v81, v86, 0xbea66beb
v_mul_f32_e32 v83, v89, v83
v_madak_f32_e32 v81, v81, v86, 0x3e0375d4
v_rcp_f32_e32 v83, v83
v_cvt_f32_i32_e32 v86, v92
v_cndmask_b32_e64 v81, v82, v81, s[8:9]
v_mov_b32_e32 v82, 0xbf317180
v_mul_f32_e32 v81, v83, v81
v_mad_f32 v83, v82, v86, v91
v_mov_b32_e32 v94, 0xb717f7d1
v_mad_f32 v95, v94, v86, v83
v_mul_f32_e32 v96, v95, v95
v_mov_b32_e32 v97, 0xb5ddea0e
v_mov_b32_e32 v98, 0x3331bb4c
v_mad_f32 v99, v98, v96, v97
v_mov_b32_e32 v100, 0x388ab355
v_mad_f32 v99, v99, v96, v100
v_mov_b32_e32 v101, 0xbb360b61
v_mad_f32 v99, v99, v96, v101
v_mov_b32_e32 v102, 0x3e2aaaab
v_mad_f32 v99, v99, v96, v102
v_mad_f32 v96, -v96, v99, v95
v_mul_f32_e32 v95, v96, v95
v_sub_f32_e32 v96, 2.0, v96
v_cmp_gt_f32_e64 vcc, |v96|, v87
v_cndmask_b32_e32 v99, 1.0, v88, vcc
v_mul_f32_e32 v96, v99, v96
v_rcp_f32_e32 v96, v96
v_mul_f32_e32 v95, v96, v95
v_mul_f32_e32 v95, v95, v99
v_mad_f32 v86, -v86, v94, -v95
v_subrev_f32_e32 v83, v83, v86
v_lshlrev_b32_e32 v86, 23, v92
v_sub_f32_e32 v83, 1.0, v83
v_add_i32_e32 v83, vcc, v83, v86
v_mov_b32_e32 v86, 0xc2aeac4f
v_cmp_ge_f32_e32 vcc, v91, v86
v_mov_b32_e32 v92, 0x42b17218
v_cndmask_b32_e32 v83, 0, v83, vcc
v_cmp_lt_f32_e32 vcc, v91, v92
v_mov_b32_e32 v95, 0x7f800000
v_cndmask_b32_e32 v83, v95, v83, vcc
v_cmp_u_f32_e32 vcc, v91, v91
v_cndmask_b32_e32 v83, v83, v91, vcc
v_subrev_f32_e32 v91, v76, v90
v_mul_f32_e32 v96, v81, v89
v_add_f32_e32 v90, v76, v90
v_mad_f32 v90, v90, v91, v96
v_cmp_gt_f32_e32 vcc, 0, v90
v_cndmask_b32_e64 v91, 0.5, -0.5, vcc
v_mac_f32_e32 v91, v93, v90
v_cvt_i32_f32_e32 v91, v91
v_madak_f32_e32 v81, v89, v81, 0x3f58560b
v_cvt_f32_i32_e32 v93, v91
v_lshlrev_b32_e32 v91, 23, v91
v_mad_f32 v82, v82, v93, v90
v_mad_f32 v99, v94, v93, v82
v_mul_f32_e32 v103, v99, v99
v_mac_f32_e32 v97, v98, v103
v_mac_f32_e32 v100, v97, v103
v_mac_f32_e32 v101, v100, v103
v_mac_f32_e32 v102, v101, v103
v_mad_f32 v97, -v103, v102, v99
v_mul_f32_e32 v98, v97, v99
v_sub_f32_e32 v97, 2.0, v97
v_cmp_gt_f32_e64 vcc, |v97|, v87
v_cndmask_b32_e32 v99, 1.0, v88, vcc
v_mul_f32_e32 v97, v99, v97
v_rcp_f32_e32 v97, v97
v_mul_f32_e32 v97, v97, v98
v_mul_f32_e32 v97, v97, v99
v_mad_f32 v93, -v93, v94, -v97
v_subrev_f32_e32 v82, v82, v93
v_sub_f32_e32 v82, 1.0, v82
v_add_i32_e32 v82, vcc, v82, v91
v_cmp_ge_f32_e32 vcc, v90, v86
v_cndmask_b32_e32 v82, 0, v82, vcc
v_cmp_lt_f32_e32 vcc, v90, v92
v_cndmask_b32_e32 v82, v95, v82, vcc
v_cmp_u_f32_e32 vcc, v90, v90
v_cndmask_b32_e32 v82, v82, v90, vcc
v_mul_f32_e32 v82, v82, v83
v_mov_b32_e32 v83, 0x40c00000
v_cmp_gt_f32_e64 s[10:11], v83, v76
v_mov_b32_e32 v83, 0x31800000
v_cmp_gt_f32_e64 vcc, |v76|, v87
v_cmp_gt_f32_e64 s[12:13], v83, v76
v_cndmask_b32_e32 v83, 1.0, v88, vcc
v_mul_f32_e32 v76, v83, v76
v_rcp_f32_e32 v76, v76
v_cmp_u_f32_e32 vcc, v45, v45
v_mul_f32_e32 v76, v76, v82
v_mad_f32 v76, -v83, v76, 1.0
v_cndmask_b32_e64 v76, 1.0, v76, s[10:11]
v_cndmask_b32_e64 v76, v76, v81, s[4:5]
v_and_b32_e32 v81, s51, v45
v_or_b32_e32 v76, v81, v76
v_mad_f32 v81, v96, v45, v45
v_cndmask_b32_e64 v76, v76, v81, s[8:9]
v_mul_f32_e32 v81, 0x3f8375d4, v45
v_mac_f32_e32 v81, 0x41000000, v45
v_mul_f32_e32 v81, 0x3e000000, v81
v_cndmask_b32_e64 v76, v76, v81, s[12:13]
v_cndmask_b32_e32 v45, v76, v45, vcc
v_subrev_f32_e32 v45, v45, v74
v_mul_f32_e32 v74, s19, v74
v_mad_f32 v45, v79, v45, -v74
v_mul_f32_e32 v74, v80, v84
v_mac_f32_e32 v5, v45, v51
v_mul_f32_e32 v45, v85, v74
v_mad_f32 v45, v51, v77, -v45
v_mul_f32_e32 v51, v77, v51
v_mad_f32 v51, v74, v85, -v51
v_mad_f32 v78, v35, v51, v78
v_mad_f32 v75, v31, v51, v75
v_mac_f32_e32 v69, v27, v51
v_mad_f32 v34, v35, v45, v34
v_mad_f32 v33, v31, v45, v33
v_mac_f32_e32 v32, v27, v45
BB6_146: ; %Flow1218
; in Loop: Header=BB6_11 Depth=1
s_or_b64 exec, exec, s[38:39]
BB6_147: ; in Loop: Header=BB6_11 Depth=1
s_or_b64 exec, exec, s[34:35]
v_lshrrev_b32_e32 v27, 27, v65
v_and_b32_e32 v27, 1, v27
v_cmp_eq_u32_e32 vcc, 1, v27
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[34:35], exec, s[4:5]
; mask branch BB6_151
s_cbranch_execz BB6_151
BB6_148: ; in Loop: Header=BB6_11 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[79:82], v55 offset0:48 offset1:49
s_and_b64 s[4:5], exec, s[0:1]
v_cmp_ne_u32_e32 vcc, v62, v23
s_xor_b64 s[4:5], s[4:5], -1
s_or_b64 s[4:5], s[4:5], vcc
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v31, v71, v80
s_and_b64 s[8:9], exec, s[2:3]
s_or_b64 s[4:5], s[8:9], s[4:5]
v_subrev_f32_e32 v27, v70, v79
v_mul_f32_e32 v45, v31, v31
v_cndmask_b32_e64 v51, 0, 1.0, s[4:5]
v_subrev_f32_e32 v35, v72, v81
v_mac_f32_e32 v45, v27, v27
v_mac_f32_e32 v45, v35, v35
v_mul_f32_e32 v51, s22, v51
v_cmp_lt_f32_e32 vcc, v45, v51
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[38:39], exec, s[4:5]
; mask branch BB6_150
s_cbranch_execz BB6_150
BB6_149: ; in Loop: Header=BB6_11 Depth=1
v_max_f32_e32 v45, 0x34cd15ae, v45
v_mul_f32_e32 v80, v9, v45
v_mul_f32_e32 v76, v80, v80
v_mov_b32_e32 v77, 0x3a92b707
v_mov_b32_e32 v81, 0x3c739487
v_madak_f32_e32 v77, v77, v76, 0x3ded3cb2
v_mul_f32_e32 v51, v73, v82
v_mad_f32 v82, v77, v76, 1.0
v_mov_b32_e32 v77, 0xb2951928
v_madak_f32_e32 v81, v81, v76, 0x3f01e2bc
v_madak_f32_e32 v77, v77, v76, 0xb85ffb93
v_rsq_f32_e32 v79, v45
v_mac_f32_e32 v82, v80, v81
v_mov_b32_e32 v81, 0x35c55945
v_madak_f32_e32 v81, v81, v76, 0x3a83ca0c
v_madak_f32_e32 v77, v77, v76, 0xbc9ded90
s_mov_b32 m0, -1
v_lshrrev_b32_e32 v74, 27, v19
v_madak_f32_e32 v81, v81, v76, 0x3d8eaf3b
v_madak_f32_e32 v83, v77, v76, 0xbf409397
ds_read_b64 v[76:77], v56 offset:192
v_and_b32_e32 v74, 1, v74
v_cmp_eq_u32_e32 vcc, 1, v74
v_mac_f32_e32 v83, v80, v81
v_mul_f32_e32 v80, v79, v79
v_cndmask_b32_e64 v74, 0, 1.0, vcc
v_mul_f32_e32 v81, v80, v80
v_mul_f32_e32 v81, v74, v81
v_mul_f32_e32 v84, v80, v81
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v77, v68, v77
v_mul_f32_e32 v85, v84, v77
v_mad_f32 v81, v81, v80, s23
v_mad_f32 v85, v76, v67, -v85
v_mad_f32 v86, v84, v84, s26
v_mul_f32_e32 v76, v67, v76
v_mul_f32_e32 v81, 0xbe2aaaab, v81
v_mul_f32_e32 v77, v86, v77
v_mul_f32_e32 v76, v76, v81
v_mac_f32_e32 v76, 0x3daaaaaa, v77
v_rcp_f32_e32 v77, v82
v_mul_f32_e32 v45, s18, v45
v_mac_f32_e32 v8, v74, v76
v_mul_f32_e32 v45, v79, v45
v_mul_f32_e32 v77, v37, v77
v_mul_f32_e32 v76, v74, v80
v_mul_f32_e32 v77, v83, v77
v_mac_f32_e32 v77, v79, v76
v_and_b32_e32 v76, s27, v45
v_mov_b32_e32 v81, 0x3fa00000
v_cmp_gt_f32_e64 s[4:5], v81, v76
v_mul_f32_e32 v81, v76, v76
v_rcp_f32_e32 v82, v81
v_add_f32_e32 v83, -1.0, v76
v_mov_b32_e32 v86, 0xbd777f97
v_mov_b32_e32 v87, 0x6f800000
v_cndmask_b32_e64 v82, v82, v83, s[4:5]
v_mov_b32_e32 v83, 0x3f580000
v_cmp_gt_f32_e64 s[8:9], v83, v76
v_cndmask_b32_e64 v81, v82, v81, s[8:9]
v_mov_b32_e32 v83, 0xc11d077e
v_mov_b32_e32 v82, 0x4036db6e
v_madak_f32_e32 v83, v83, v81, 0xc2a2932b
v_cmp_gt_f32_e32 vcc, v82, v76
v_mov_b32_e32 v82, 0xc3f1c275
v_madak_f32_e32 v82, v82, v81, 0xc480230b
v_madak_f32_e32 v83, v81, v83, 0xc3389ae7
v_madak_f32_e32 v82, v81, v82, 0xc41f6441
v_madak_f32_e32 v83, v81, v83, 0xc322658c
v_madak_f32_e32 v82, v81, v82, 0xc320a2ea
v_madak_f32_e32 v83, v81, v83, 0xc2798057
v_madak_f32_e32 v82, v81, v82, 0xc18e104b
v_madak_f32_e32 v83, v81, v83, 0xc128f022
v_madak_f32_e32 v82, v81, v82, 0xbf4c9dd4
v_madak_f32_e32 v83, v81, v83, 0xbf31a0b7
v_madak_f32_e32 v82, v81, v82, 0xbc21a092
v_madak_f32_e32 v83, v81, v83, 0xbc21a093
v_madak_f32_e32 v86, v86, v81, 0x40d23f7c
v_cndmask_b32_e32 v82, v82, v83, vcc
v_mov_b32_e32 v83, 0xc1b38712
v_madak_f32_e32 v83, v83, v81, 0x43ed43a7
v_madak_f32_e32 v86, v81, v86, 0x42d9451f
v_madak_f32_e32 v83, v81, v83, 0x451f90ce
v_madak_f32_e32 v86, v81, v86, 0x43d6810b
v_madak_f32_e32 v83, v81, v83, 0x4547fdbb
v_madak_f32_e32 v86, v81, v86, 0x442158c9
v_madak_f32_e32 v83, v81, v83, 0x44c01759
v_madak_f32_e32 v86, v81, v86, 0x43d9486f
v_madak_f32_e32 v83, v81, v83, 0x43a2e571
v_madak_f32_e32 v86, v81, v86, 0x4309a863
v_madak_f32_e32 v83, v81, v83, 0x41f2b459
v_madak_f32_e32 v86, v81, v86, 0x419d35ce
v_cndmask_b32_e32 v83, v83, v86, vcc
v_mov_b32_e32 v86, 0xbb0df9c0
v_madak_f32_e32 v86, v86, v81, 0x3d1151b3
v_madak_f32_e32 v86, v81, v86, 0xbde31cc2
v_madak_f32_e32 v86, v81, v86, 0x3ea2fe54
v_madak_f32_e32 v86, v81, v86, 0xbebe9208
v_madak_f32_e32 v86, v81, v86, 0x3ed46805
v_madak_f32_e32 v86, v81, v86, 0xbb1acdc6
v_cndmask_b32_e64 v82, v82, v86, s[4:5]
v_mov_b32_e32 v86, 0x3c445aa3
v_madak_f32_e32 v86, v86, v81, 0x3c5f6e13
v_madak_f32_e32 v86, v81, v86, 0x3e013307
v_madak_f32_e32 v86, v81, v86, 0x3d931ae7
v_madak_f32_e32 v86, v81, v86, 0x3f0a5785
v_madak_f32_e32 v86, v81, v86, 0x3dd9f331
v_cndmask_b32_e64 v83, v83, v86, s[4:5]
v_mov_b32_e32 v86, 0xb684e21a
v_madak_f32_e32 v86, v86, v81, 0x390aee49
v_madak_f32_e32 v86, v81, v86, 0x3ba68116
v_madak_f32_e32 v86, v81, v86, 0x3d852a63
v_madak_f32_e32 v86, v81, v86, 0x3ecbbbce
v_cndmask_b32_e64 v83, v83, v86, s[8:9]
v_mad_f32 v83, v81, v83, 1.0
v_and_b32_e32 v90, s50, v45
v_mov_b32_e32 v91, 0xbf100000
v_cmp_gt_f32_e64 vcc, |v83|, v87
v_mov_b32_e32 v88, 0x2f800000
v_mad_f32 v91, v90, -v90, v91
v_cndmask_b32_e32 v89, 1.0, v88, vcc
v_cmp_gt_f32_e32 vcc, 0, v91
v_cndmask_b32_e64 v92, 0.5, -0.5, vcc
v_mov_b32_e32 v93, 0x3fb8aa3b
v_mac_f32_e32 v92, v93, v91
v_mov_b32_e32 v86, 0xb7c756b1
v_cvt_i32_f32_e32 v92, v92
v_madak_f32_e32 v86, v86, v81, 0xbbbd1489
v_madak_f32_e32 v86, v81, v86, 0xbce9528f
v_madak_f32_e32 v86, v81, v86, 0xbea66beb
v_mul_f32_e32 v83, v89, v83
v_madak_f32_e32 v81, v81, v86, 0x3e0375d4
v_rcp_f32_e32 v83, v83
v_cvt_f32_i32_e32 v86, v92
v_cndmask_b32_e64 v81, v82, v81, s[8:9]
v_mov_b32_e32 v82, 0xbf317180
v_mul_f32_e32 v81, v83, v81
v_mad_f32 v83, v82, v86, v91
v_mov_b32_e32 v94, 0xb717f7d1
v_mad_f32 v95, v94, v86, v83
v_mul_f32_e32 v96, v95, v95
v_mov_b32_e32 v97, 0xb5ddea0e
v_mov_b32_e32 v98, 0x3331bb4c
v_mad_f32 v99, v98, v96, v97
v_mov_b32_e32 v100, 0x388ab355
v_mad_f32 v99, v99, v96, v100
v_mov_b32_e32 v101, 0xbb360b61
v_mad_f32 v99, v99, v96, v101
v_mov_b32_e32 v102, 0x3e2aaaab
v_mad_f32 v99, v99, v96, v102
v_mad_f32 v96, -v96, v99, v95
v_mul_f32_e32 v95, v96, v95
v_sub_f32_e32 v96, 2.0, v96
v_cmp_gt_f32_e64 vcc, |v96|, v87
v_cndmask_b32_e32 v99, 1.0, v88, vcc
v_mul_f32_e32 v96, v99, v96
v_rcp_f32_e32 v96, v96
v_mul_f32_e32 v95, v96, v95
v_mul_f32_e32 v95, v95, v99
v_mad_f32 v86, -v86, v94, -v95
v_subrev_f32_e32 v83, v83, v86
v_lshlrev_b32_e32 v86, 23, v92
v_sub_f32_e32 v83, 1.0, v83
v_add_i32_e32 v83, vcc, v83, v86
v_mov_b32_e32 v86, 0xc2aeac4f
v_cmp_ge_f32_e32 vcc, v91, v86
v_mov_b32_e32 v92, 0x42b17218
v_cndmask_b32_e32 v83, 0, v83, vcc
v_cmp_lt_f32_e32 vcc, v91, v92
v_mov_b32_e32 v95, 0x7f800000
v_cndmask_b32_e32 v83, v95, v83, vcc
v_cmp_u_f32_e32 vcc, v91, v91
v_cndmask_b32_e32 v83, v83, v91, vcc
v_subrev_f32_e32 v91, v76, v90
v_mul_f32_e32 v96, v81, v89
v_add_f32_e32 v90, v76, v90
v_mad_f32 v90, v90, v91, v96
v_cmp_gt_f32_e32 vcc, 0, v90
v_cndmask_b32_e64 v91, 0.5, -0.5, vcc
v_mac_f32_e32 v91, v93, v90
v_cvt_i32_f32_e32 v91, v91
v_madak_f32_e32 v81, v89, v81, 0x3f58560b
v_cvt_f32_i32_e32 v93, v91
v_lshlrev_b32_e32 v91, 23, v91
v_mad_f32 v82, v82, v93, v90
v_mad_f32 v99, v94, v93, v82
v_mul_f32_e32 v103, v99, v99
v_mac_f32_e32 v97, v98, v103
v_mac_f32_e32 v100, v97, v103
v_mac_f32_e32 v101, v100, v103
v_mac_f32_e32 v102, v101, v103
v_mad_f32 v97, -v103, v102, v99
v_mul_f32_e32 v98, v97, v99
v_sub_f32_e32 v97, 2.0, v97
v_cmp_gt_f32_e64 vcc, |v97|, v87
v_cndmask_b32_e32 v99, 1.0, v88, vcc
v_mul_f32_e32 v97, v99, v97
v_rcp_f32_e32 v97, v97
v_mul_f32_e32 v97, v97, v98
v_mul_f32_e32 v97, v97, v99
v_mad_f32 v93, -v93, v94, -v97
v_subrev_f32_e32 v82, v82, v93
v_sub_f32_e32 v82, 1.0, v82
v_add_i32_e32 v82, vcc, v82, v91
v_cmp_ge_f32_e32 vcc, v90, v86
v_cndmask_b32_e32 v82, 0, v82, vcc
v_cmp_lt_f32_e32 vcc, v90, v92
v_cndmask_b32_e32 v82, v95, v82, vcc
v_cmp_u_f32_e32 vcc, v90, v90
v_cndmask_b32_e32 v82, v82, v90, vcc
v_mul_f32_e32 v82, v82, v83
v_mov_b32_e32 v83, 0x40c00000
v_cmp_gt_f32_e64 s[10:11], v83, v76
v_mov_b32_e32 v83, 0x31800000
v_cmp_gt_f32_e64 vcc, |v76|, v87
v_cmp_gt_f32_e64 s[12:13], v83, v76
v_cndmask_b32_e32 v83, 1.0, v88, vcc
v_mul_f32_e32 v76, v83, v76
v_rcp_f32_e32 v76, v76
v_cmp_u_f32_e32 vcc, v45, v45
v_mul_f32_e32 v76, v76, v82
v_mad_f32 v76, -v83, v76, 1.0
v_cndmask_b32_e64 v76, 1.0, v76, s[10:11]
v_cndmask_b32_e64 v76, v76, v81, s[4:5]
v_and_b32_e32 v81, s51, v45
v_or_b32_e32 v76, v81, v76
v_mad_f32 v81, v96, v45, v45
v_cndmask_b32_e64 v76, v76, v81, s[8:9]
v_mul_f32_e32 v81, 0x3f8375d4, v45
v_mac_f32_e32 v81, 0x41000000, v45
v_mul_f32_e32 v81, 0x3e000000, v81
v_cndmask_b32_e64 v76, v76, v81, s[12:13]
v_cndmask_b32_e32 v45, v76, v45, vcc
v_subrev_f32_e32 v45, v45, v74
v_mul_f32_e32 v74, s19, v74
v_mad_f32 v45, v79, v45, -v74
v_mul_f32_e32 v74, v80, v84
v_mac_f32_e32 v5, v45, v51
v_mul_f32_e32 v45, v85, v74
v_mad_f32 v45, v51, v77, -v45
v_mul_f32_e32 v51, v77, v51
v_mad_f32 v51, v74, v85, -v51
v_mad_f32 v78, v35, v51, v78
v_mad_f32 v75, v31, v51, v75
v_mac_f32_e32 v69, v27, v51
v_mad_f32 v30, v35, v45, v30
v_mad_f32 v29, v31, v45, v29
v_mac_f32_e32 v28, v27, v45
BB6_150: ; %Flow1217
; in Loop: Header=BB6_11 Depth=1
s_or_b64 exec, exec, s[38:39]
BB6_151: ; in Loop: Header=BB6_11 Depth=1
s_or_b64 exec, exec, s[34:35]
v_lshrrev_b32_e32 v27, 28, v65
v_and_b32_e32 v27, 1, v27
v_cmp_eq_u32_e32 vcc, 1, v27
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[34:35], exec, s[4:5]
; mask branch BB6_155
s_cbranch_execz BB6_155
BB6_152: ; in Loop: Header=BB6_11 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[79:82], v55 offset0:64 offset1:65
s_and_b64 s[4:5], exec, s[0:1]
v_cmp_ne_u32_e32 vcc, v61, v23
s_xor_b64 s[4:5], s[4:5], -1
s_or_b64 s[4:5], s[4:5], vcc
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v31, v71, v80
s_and_b64 s[8:9], exec, s[2:3]
s_or_b64 s[4:5], s[8:9], s[4:5]
v_subrev_f32_e32 v27, v70, v79
v_mul_f32_e32 v45, v31, v31
v_cndmask_b32_e64 v51, 0, 1.0, s[4:5]
v_subrev_f32_e32 v35, v72, v81
v_mac_f32_e32 v45, v27, v27
v_mac_f32_e32 v45, v35, v35
v_mul_f32_e32 v51, s22, v51
v_cmp_lt_f32_e32 vcc, v45, v51
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[38:39], exec, s[4:5]
; mask branch BB6_154
s_cbranch_execz BB6_154
BB6_153: ; in Loop: Header=BB6_11 Depth=1
v_max_f32_e32 v45, 0x34cd15ae, v45
v_mul_f32_e32 v80, v9, v45
v_mul_f32_e32 v76, v80, v80
v_mov_b32_e32 v77, 0x3a92b707
v_mov_b32_e32 v81, 0x3c739487
v_madak_f32_e32 v77, v77, v76, 0x3ded3cb2
v_mul_f32_e32 v51, v73, v82
v_mad_f32 v82, v77, v76, 1.0
v_mov_b32_e32 v77, 0xb2951928
v_madak_f32_e32 v81, v81, v76, 0x3f01e2bc
v_madak_f32_e32 v77, v77, v76, 0xb85ffb93
v_rsq_f32_e32 v79, v45
v_mac_f32_e32 v82, v80, v81
v_mov_b32_e32 v81, 0x35c55945
v_madak_f32_e32 v81, v81, v76, 0x3a83ca0c
v_madak_f32_e32 v77, v77, v76, 0xbc9ded90
s_mov_b32 m0, -1
v_lshrrev_b32_e32 v74, 28, v19
v_madak_f32_e32 v81, v81, v76, 0x3d8eaf3b
v_madak_f32_e32 v83, v77, v76, 0xbf409397
ds_read_b64 v[76:77], v56 offset:256
v_and_b32_e32 v74, 1, v74
v_cmp_eq_u32_e32 vcc, 1, v74
v_mac_f32_e32 v83, v80, v81
v_mul_f32_e32 v80, v79, v79
v_cndmask_b32_e64 v74, 0, 1.0, vcc
v_mul_f32_e32 v81, v80, v80
v_mul_f32_e32 v81, v74, v81
v_mul_f32_e32 v84, v80, v81
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v77, v68, v77
v_mul_f32_e32 v85, v84, v77
v_mad_f32 v81, v81, v80, s23
v_mad_f32 v85, v76, v67, -v85
v_mad_f32 v86, v84, v84, s26
v_mul_f32_e32 v76, v67, v76
v_mul_f32_e32 v81, 0xbe2aaaab, v81
v_mul_f32_e32 v77, v86, v77
v_mul_f32_e32 v76, v76, v81
v_mac_f32_e32 v76, 0x3daaaaaa, v77
v_rcp_f32_e32 v77, v82
v_mul_f32_e32 v45, s18, v45
v_mac_f32_e32 v8, v74, v76
v_mul_f32_e32 v45, v79, v45
v_mul_f32_e32 v77, v37, v77
v_mul_f32_e32 v76, v74, v80
v_mul_f32_e32 v77, v83, v77
v_mac_f32_e32 v77, v79, v76
v_and_b32_e32 v76, s27, v45
v_mov_b32_e32 v81, 0x3fa00000
v_cmp_gt_f32_e64 s[4:5], v81, v76
v_mul_f32_e32 v81, v76, v76
v_rcp_f32_e32 v82, v81
v_add_f32_e32 v83, -1.0, v76
v_mov_b32_e32 v86, 0xbd777f97
v_mov_b32_e32 v87, 0x6f800000
v_cndmask_b32_e64 v82, v82, v83, s[4:5]
v_mov_b32_e32 v83, 0x3f580000
v_cmp_gt_f32_e64 s[8:9], v83, v76
v_cndmask_b32_e64 v81, v82, v81, s[8:9]
v_mov_b32_e32 v83, 0xc11d077e
v_mov_b32_e32 v82, 0x4036db6e
v_madak_f32_e32 v83, v83, v81, 0xc2a2932b
v_cmp_gt_f32_e32 vcc, v82, v76
v_mov_b32_e32 v82, 0xc3f1c275
v_madak_f32_e32 v82, v82, v81, 0xc480230b
v_madak_f32_e32 v83, v81, v83, 0xc3389ae7
v_madak_f32_e32 v82, v81, v82, 0xc41f6441
v_madak_f32_e32 v83, v81, v83, 0xc322658c
v_madak_f32_e32 v82, v81, v82, 0xc320a2ea
v_madak_f32_e32 v83, v81, v83, 0xc2798057
v_madak_f32_e32 v82, v81, v82, 0xc18e104b
v_madak_f32_e32 v83, v81, v83, 0xc128f022
v_madak_f32_e32 v82, v81, v82, 0xbf4c9dd4
v_madak_f32_e32 v83, v81, v83, 0xbf31a0b7
v_madak_f32_e32 v82, v81, v82, 0xbc21a092
v_madak_f32_e32 v83, v81, v83, 0xbc21a093
v_madak_f32_e32 v86, v86, v81, 0x40d23f7c
v_cndmask_b32_e32 v82, v82, v83, vcc
v_mov_b32_e32 v83, 0xc1b38712
v_madak_f32_e32 v83, v83, v81, 0x43ed43a7
v_madak_f32_e32 v86, v81, v86, 0x42d9451f
v_madak_f32_e32 v83, v81, v83, 0x451f90ce
v_madak_f32_e32 v86, v81, v86, 0x43d6810b
v_madak_f32_e32 v83, v81, v83, 0x4547fdbb
v_madak_f32_e32 v86, v81, v86, 0x442158c9
v_madak_f32_e32 v83, v81, v83, 0x44c01759
v_madak_f32_e32 v86, v81, v86, 0x43d9486f
v_madak_f32_e32 v83, v81, v83, 0x43a2e571
v_madak_f32_e32 v86, v81, v86, 0x4309a863
v_madak_f32_e32 v83, v81, v83, 0x41f2b459
v_madak_f32_e32 v86, v81, v86, 0x419d35ce
v_cndmask_b32_e32 v83, v83, v86, vcc
v_mov_b32_e32 v86, 0xbb0df9c0
v_madak_f32_e32 v86, v86, v81, 0x3d1151b3
v_madak_f32_e32 v86, v81, v86, 0xbde31cc2
v_madak_f32_e32 v86, v81, v86, 0x3ea2fe54
v_madak_f32_e32 v86, v81, v86, 0xbebe9208
v_madak_f32_e32 v86, v81, v86, 0x3ed46805
v_madak_f32_e32 v86, v81, v86, 0xbb1acdc6
v_cndmask_b32_e64 v82, v82, v86, s[4:5]
v_mov_b32_e32 v86, 0x3c445aa3
v_madak_f32_e32 v86, v86, v81, 0x3c5f6e13
v_madak_f32_e32 v86, v81, v86, 0x3e013307
v_madak_f32_e32 v86, v81, v86, 0x3d931ae7
v_madak_f32_e32 v86, v81, v86, 0x3f0a5785
v_madak_f32_e32 v86, v81, v86, 0x3dd9f331
v_cndmask_b32_e64 v83, v83, v86, s[4:5]
v_mov_b32_e32 v86, 0xb684e21a
v_madak_f32_e32 v86, v86, v81, 0x390aee49
v_madak_f32_e32 v86, v81, v86, 0x3ba68116
v_madak_f32_e32 v86, v81, v86, 0x3d852a63
v_madak_f32_e32 v86, v81, v86, 0x3ecbbbce
v_cndmask_b32_e64 v83, v83, v86, s[8:9]
v_mad_f32 v83, v81, v83, 1.0
v_and_b32_e32 v90, s50, v45
v_mov_b32_e32 v91, 0xbf100000
v_cmp_gt_f32_e64 vcc, |v83|, v87
v_mov_b32_e32 v88, 0x2f800000
v_mad_f32 v91, v90, -v90, v91
v_cndmask_b32_e32 v89, 1.0, v88, vcc
v_cmp_gt_f32_e32 vcc, 0, v91
v_cndmask_b32_e64 v92, 0.5, -0.5, vcc
v_mov_b32_e32 v93, 0x3fb8aa3b
v_mac_f32_e32 v92, v93, v91
v_mov_b32_e32 v86, 0xb7c756b1
v_cvt_i32_f32_e32 v92, v92
v_madak_f32_e32 v86, v86, v81, 0xbbbd1489
v_madak_f32_e32 v86, v81, v86, 0xbce9528f
v_madak_f32_e32 v86, v81, v86, 0xbea66beb
v_mul_f32_e32 v83, v89, v83
v_madak_f32_e32 v81, v81, v86, 0x3e0375d4
v_rcp_f32_e32 v83, v83
v_cvt_f32_i32_e32 v86, v92
v_cndmask_b32_e64 v81, v82, v81, s[8:9]
v_mov_b32_e32 v82, 0xbf317180
v_mul_f32_e32 v81, v83, v81
v_mad_f32 v83, v82, v86, v91
v_mov_b32_e32 v94, 0xb717f7d1
v_mad_f32 v95, v94, v86, v83
v_mul_f32_e32 v96, v95, v95
v_mov_b32_e32 v97, 0xb5ddea0e
v_mov_b32_e32 v98, 0x3331bb4c
v_mad_f32 v99, v98, v96, v97
v_mov_b32_e32 v100, 0x388ab355
v_mad_f32 v99, v99, v96, v100
v_mov_b32_e32 v101, 0xbb360b61
v_mad_f32 v99, v99, v96, v101
v_mov_b32_e32 v102, 0x3e2aaaab
v_mad_f32 v99, v99, v96, v102
v_mad_f32 v96, -v96, v99, v95
v_mul_f32_e32 v95, v96, v95
v_sub_f32_e32 v96, 2.0, v96
v_cmp_gt_f32_e64 vcc, |v96|, v87
v_cndmask_b32_e32 v99, 1.0, v88, vcc
v_mul_f32_e32 v96, v99, v96
v_rcp_f32_e32 v96, v96
v_mul_f32_e32 v95, v96, v95
v_mul_f32_e32 v95, v95, v99
v_mad_f32 v86, -v86, v94, -v95
v_subrev_f32_e32 v83, v83, v86
v_lshlrev_b32_e32 v86, 23, v92
v_sub_f32_e32 v83, 1.0, v83
v_add_i32_e32 v83, vcc, v83, v86
v_mov_b32_e32 v86, 0xc2aeac4f
v_cmp_ge_f32_e32 vcc, v91, v86
v_mov_b32_e32 v92, 0x42b17218
v_cndmask_b32_e32 v83, 0, v83, vcc
v_cmp_lt_f32_e32 vcc, v91, v92
v_mov_b32_e32 v95, 0x7f800000
v_cndmask_b32_e32 v83, v95, v83, vcc
v_cmp_u_f32_e32 vcc, v91, v91
v_cndmask_b32_e32 v83, v83, v91, vcc
v_subrev_f32_e32 v91, v76, v90
v_mul_f32_e32 v96, v81, v89
v_add_f32_e32 v90, v76, v90
v_mad_f32 v90, v90, v91, v96
v_cmp_gt_f32_e32 vcc, 0, v90
v_cndmask_b32_e64 v91, 0.5, -0.5, vcc
v_mac_f32_e32 v91, v93, v90
v_cvt_i32_f32_e32 v91, v91
v_madak_f32_e32 v81, v89, v81, 0x3f58560b
v_cvt_f32_i32_e32 v93, v91
v_lshlrev_b32_e32 v91, 23, v91
v_mad_f32 v82, v82, v93, v90
v_mad_f32 v99, v94, v93, v82
v_mul_f32_e32 v103, v99, v99
v_mac_f32_e32 v97, v98, v103
v_mac_f32_e32 v100, v97, v103
v_mac_f32_e32 v101, v100, v103
v_mac_f32_e32 v102, v101, v103
v_mad_f32 v97, -v103, v102, v99
v_mul_f32_e32 v98, v97, v99
v_sub_f32_e32 v97, 2.0, v97
v_cmp_gt_f32_e64 vcc, |v97|, v87
v_cndmask_b32_e32 v99, 1.0, v88, vcc
v_mul_f32_e32 v97, v99, v97
v_rcp_f32_e32 v97, v97
v_mul_f32_e32 v97, v97, v98
v_mul_f32_e32 v97, v97, v99
v_mad_f32 v93, -v93, v94, -v97
v_subrev_f32_e32 v82, v82, v93
v_sub_f32_e32 v82, 1.0, v82
v_add_i32_e32 v82, vcc, v82, v91
v_cmp_ge_f32_e32 vcc, v90, v86
v_cndmask_b32_e32 v82, 0, v82, vcc
v_cmp_lt_f32_e32 vcc, v90, v92
v_cndmask_b32_e32 v82, v95, v82, vcc
v_cmp_u_f32_e32 vcc, v90, v90
v_cndmask_b32_e32 v82, v82, v90, vcc
v_mul_f32_e32 v82, v82, v83
v_mov_b32_e32 v83, 0x40c00000
v_cmp_gt_f32_e64 s[10:11], v83, v76
v_mov_b32_e32 v83, 0x31800000
v_cmp_gt_f32_e64 vcc, |v76|, v87
v_cmp_gt_f32_e64 s[12:13], v83, v76
v_cndmask_b32_e32 v83, 1.0, v88, vcc
v_mul_f32_e32 v76, v83, v76
v_rcp_f32_e32 v76, v76
v_cmp_u_f32_e32 vcc, v45, v45
v_mul_f32_e32 v76, v76, v82
v_mad_f32 v76, -v83, v76, 1.0
v_cndmask_b32_e64 v76, 1.0, v76, s[10:11]
v_cndmask_b32_e64 v76, v76, v81, s[4:5]
v_and_b32_e32 v81, s51, v45
v_or_b32_e32 v76, v81, v76
v_mad_f32 v81, v96, v45, v45
v_cndmask_b32_e64 v76, v76, v81, s[8:9]
v_mul_f32_e32 v81, 0x3f8375d4, v45
v_mac_f32_e32 v81, 0x41000000, v45
v_mul_f32_e32 v81, 0x3e000000, v81
v_cndmask_b32_e64 v76, v76, v81, s[12:13]
v_cndmask_b32_e32 v45, v76, v45, vcc
v_subrev_f32_e32 v45, v45, v74
v_mul_f32_e32 v74, s19, v74
v_mad_f32 v45, v79, v45, -v74
v_mul_f32_e32 v74, v80, v84
v_mac_f32_e32 v5, v45, v51
v_mul_f32_e32 v45, v85, v74
v_mad_f32 v45, v51, v77, -v45
v_mul_f32_e32 v51, v77, v51
v_mad_f32 v51, v74, v85, -v51
v_mad_f32 v78, v35, v51, v78
v_mad_f32 v75, v31, v51, v75
v_mac_f32_e32 v69, v27, v51
v_mad_f32 v26, v35, v45, v26
v_mad_f32 v25, v31, v45, v25
v_mac_f32_e32 v24, v27, v45
BB6_154: ; %Flow1216
; in Loop: Header=BB6_11 Depth=1
s_or_b64 exec, exec, s[38:39]
BB6_155: ; in Loop: Header=BB6_11 Depth=1
s_or_b64 exec, exec, s[34:35]
v_lshrrev_b32_e32 v27, 29, v65
v_and_b32_e32 v27, 1, v27
v_cmp_eq_u32_e32 vcc, 1, v27
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[34:35], exec, s[4:5]
; mask branch BB6_159
s_cbranch_execz BB6_159
BB6_156: ; in Loop: Header=BB6_11 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[79:82], v55 offset0:80 offset1:81
s_and_b64 s[4:5], exec, s[0:1]
v_cmp_ne_u32_e32 vcc, v60, v23
s_xor_b64 s[4:5], s[4:5], -1
s_or_b64 s[4:5], s[4:5], vcc
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v31, v71, v80
s_and_b64 s[8:9], exec, s[2:3]
s_or_b64 s[4:5], s[8:9], s[4:5]
v_subrev_f32_e32 v27, v70, v79
v_mul_f32_e32 v45, v31, v31
v_cndmask_b32_e64 v51, 0, 1.0, s[4:5]
v_subrev_f32_e32 v35, v72, v81
v_mac_f32_e32 v45, v27, v27
v_mac_f32_e32 v45, v35, v35
v_mul_f32_e32 v51, s22, v51
v_cmp_lt_f32_e32 vcc, v45, v51
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[38:39], exec, s[4:5]
; mask branch BB6_158
s_cbranch_execz BB6_158
BB6_157: ; in Loop: Header=BB6_11 Depth=1
v_max_f32_e32 v45, 0x34cd15ae, v45
v_mul_f32_e32 v80, v9, v45
v_mul_f32_e32 v76, v80, v80
v_mov_b32_e32 v77, 0x3a92b707
v_mov_b32_e32 v81, 0x3c739487
v_madak_f32_e32 v77, v77, v76, 0x3ded3cb2
v_mul_f32_e32 v51, v73, v82
v_mad_f32 v82, v77, v76, 1.0
v_mov_b32_e32 v77, 0xb2951928
v_madak_f32_e32 v81, v81, v76, 0x3f01e2bc
v_madak_f32_e32 v77, v77, v76, 0xb85ffb93
v_rsq_f32_e32 v79, v45
v_mac_f32_e32 v82, v80, v81
v_mov_b32_e32 v81, 0x35c55945
v_madak_f32_e32 v81, v81, v76, 0x3a83ca0c
v_madak_f32_e32 v77, v77, v76, 0xbc9ded90
s_mov_b32 m0, -1
v_lshrrev_b32_e32 v74, 29, v19
v_madak_f32_e32 v81, v81, v76, 0x3d8eaf3b
v_madak_f32_e32 v83, v77, v76, 0xbf409397
ds_read_b64 v[76:77], v56 offset:320
v_and_b32_e32 v74, 1, v74
v_cmp_eq_u32_e32 vcc, 1, v74
v_mac_f32_e32 v83, v80, v81
v_mul_f32_e32 v80, v79, v79
v_cndmask_b32_e64 v74, 0, 1.0, vcc
v_mul_f32_e32 v81, v80, v80
v_mul_f32_e32 v81, v74, v81
v_mul_f32_e32 v84, v80, v81
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v77, v68, v77
v_mul_f32_e32 v85, v84, v77
v_mad_f32 v81, v81, v80, s23
v_mad_f32 v85, v76, v67, -v85
v_mad_f32 v86, v84, v84, s26
v_mul_f32_e32 v76, v67, v76
v_mul_f32_e32 v81, 0xbe2aaaab, v81
v_mul_f32_e32 v77, v86, v77
v_mul_f32_e32 v76, v76, v81
v_mac_f32_e32 v76, 0x3daaaaaa, v77
v_rcp_f32_e32 v77, v82
v_mul_f32_e32 v45, s18, v45
v_mac_f32_e32 v8, v74, v76
v_mul_f32_e32 v45, v79, v45
v_mul_f32_e32 v77, v37, v77
v_mul_f32_e32 v76, v74, v80
v_mul_f32_e32 v77, v83, v77
v_mac_f32_e32 v77, v79, v76
v_and_b32_e32 v76, s27, v45
v_mov_b32_e32 v81, 0x3fa00000
v_cmp_gt_f32_e64 s[4:5], v81, v76
v_mul_f32_e32 v81, v76, v76
v_rcp_f32_e32 v82, v81
v_add_f32_e32 v83, -1.0, v76
v_mov_b32_e32 v86, 0xbd777f97
v_mov_b32_e32 v87, 0x6f800000
v_cndmask_b32_e64 v82, v82, v83, s[4:5]
v_mov_b32_e32 v83, 0x3f580000
v_cmp_gt_f32_e64 s[8:9], v83, v76
v_cndmask_b32_e64 v81, v82, v81, s[8:9]
v_mov_b32_e32 v83, 0xc11d077e
v_mov_b32_e32 v82, 0x4036db6e
v_madak_f32_e32 v83, v83, v81, 0xc2a2932b
v_cmp_gt_f32_e32 vcc, v82, v76
v_mov_b32_e32 v82, 0xc3f1c275
v_madak_f32_e32 v82, v82, v81, 0xc480230b
v_madak_f32_e32 v83, v81, v83, 0xc3389ae7
v_madak_f32_e32 v82, v81, v82, 0xc41f6441
v_madak_f32_e32 v83, v81, v83, 0xc322658c
v_madak_f32_e32 v82, v81, v82, 0xc320a2ea
v_madak_f32_e32 v83, v81, v83, 0xc2798057
v_madak_f32_e32 v82, v81, v82, 0xc18e104b
v_madak_f32_e32 v83, v81, v83, 0xc128f022
v_madak_f32_e32 v82, v81, v82, 0xbf4c9dd4
v_madak_f32_e32 v83, v81, v83, 0xbf31a0b7
v_madak_f32_e32 v82, v81, v82, 0xbc21a092
v_madak_f32_e32 v83, v81, v83, 0xbc21a093
v_madak_f32_e32 v86, v86, v81, 0x40d23f7c
v_cndmask_b32_e32 v82, v82, v83, vcc
v_mov_b32_e32 v83, 0xc1b38712
v_madak_f32_e32 v83, v83, v81, 0x43ed43a7
v_madak_f32_e32 v86, v81, v86, 0x42d9451f
v_madak_f32_e32 v83, v81, v83, 0x451f90ce
v_madak_f32_e32 v86, v81, v86, 0x43d6810b
v_madak_f32_e32 v83, v81, v83, 0x4547fdbb
v_madak_f32_e32 v86, v81, v86, 0x442158c9
v_madak_f32_e32 v83, v81, v83, 0x44c01759
v_madak_f32_e32 v86, v81, v86, 0x43d9486f
v_madak_f32_e32 v83, v81, v83, 0x43a2e571
v_madak_f32_e32 v86, v81, v86, 0x4309a863
v_madak_f32_e32 v83, v81, v83, 0x41f2b459
v_madak_f32_e32 v86, v81, v86, 0x419d35ce
v_cndmask_b32_e32 v83, v83, v86, vcc
v_mov_b32_e32 v86, 0xbb0df9c0
v_madak_f32_e32 v86, v86, v81, 0x3d1151b3
v_madak_f32_e32 v86, v81, v86, 0xbde31cc2
v_madak_f32_e32 v86, v81, v86, 0x3ea2fe54
v_madak_f32_e32 v86, v81, v86, 0xbebe9208
v_madak_f32_e32 v86, v81, v86, 0x3ed46805
v_madak_f32_e32 v86, v81, v86, 0xbb1acdc6
v_cndmask_b32_e64 v82, v82, v86, s[4:5]
v_mov_b32_e32 v86, 0x3c445aa3
v_madak_f32_e32 v86, v86, v81, 0x3c5f6e13
v_madak_f32_e32 v86, v81, v86, 0x3e013307
v_madak_f32_e32 v86, v81, v86, 0x3d931ae7
v_madak_f32_e32 v86, v81, v86, 0x3f0a5785
v_madak_f32_e32 v86, v81, v86, 0x3dd9f331
v_cndmask_b32_e64 v83, v83, v86, s[4:5]
v_mov_b32_e32 v86, 0xb684e21a
v_madak_f32_e32 v86, v86, v81, 0x390aee49
v_madak_f32_e32 v86, v81, v86, 0x3ba68116
v_madak_f32_e32 v86, v81, v86, 0x3d852a63
v_madak_f32_e32 v86, v81, v86, 0x3ecbbbce
v_cndmask_b32_e64 v83, v83, v86, s[8:9]
v_mad_f32 v83, v81, v83, 1.0
v_and_b32_e32 v90, s50, v45
v_mov_b32_e32 v91, 0xbf100000
v_cmp_gt_f32_e64 vcc, |v83|, v87
v_mov_b32_e32 v88, 0x2f800000
v_mad_f32 v91, v90, -v90, v91
v_cndmask_b32_e32 v89, 1.0, v88, vcc
v_cmp_gt_f32_e32 vcc, 0, v91
v_cndmask_b32_e64 v92, 0.5, -0.5, vcc
v_mov_b32_e32 v93, 0x3fb8aa3b
v_mac_f32_e32 v92, v93, v91
v_mov_b32_e32 v86, 0xb7c756b1
v_cvt_i32_f32_e32 v92, v92
v_madak_f32_e32 v86, v86, v81, 0xbbbd1489
v_madak_f32_e32 v86, v81, v86, 0xbce9528f
v_madak_f32_e32 v86, v81, v86, 0xbea66beb
v_mul_f32_e32 v83, v89, v83
v_madak_f32_e32 v81, v81, v86, 0x3e0375d4
v_rcp_f32_e32 v83, v83
v_cvt_f32_i32_e32 v86, v92
v_cndmask_b32_e64 v81, v82, v81, s[8:9]
v_mov_b32_e32 v82, 0xbf317180
v_mul_f32_e32 v81, v83, v81
v_mad_f32 v83, v82, v86, v91
v_mov_b32_e32 v94, 0xb717f7d1
v_mad_f32 v95, v94, v86, v83
v_mul_f32_e32 v96, v95, v95
v_mov_b32_e32 v97, 0xb5ddea0e
v_mov_b32_e32 v98, 0x3331bb4c
v_mad_f32 v99, v98, v96, v97
v_mov_b32_e32 v100, 0x388ab355
v_mad_f32 v99, v99, v96, v100
v_mov_b32_e32 v101, 0xbb360b61
v_mad_f32 v99, v99, v96, v101
v_mov_b32_e32 v102, 0x3e2aaaab
v_mad_f32 v99, v99, v96, v102
v_mad_f32 v96, -v96, v99, v95
v_mul_f32_e32 v95, v96, v95
v_sub_f32_e32 v96, 2.0, v96
v_cmp_gt_f32_e64 vcc, |v96|, v87
v_cndmask_b32_e32 v99, 1.0, v88, vcc
v_mul_f32_e32 v96, v99, v96
v_rcp_f32_e32 v96, v96
v_mul_f32_e32 v95, v96, v95
v_mul_f32_e32 v95, v95, v99
v_mad_f32 v86, -v86, v94, -v95
v_subrev_f32_e32 v83, v83, v86
v_lshlrev_b32_e32 v86, 23, v92
v_sub_f32_e32 v83, 1.0, v83
v_add_i32_e32 v83, vcc, v83, v86
v_mov_b32_e32 v86, 0xc2aeac4f
v_cmp_ge_f32_e32 vcc, v91, v86
v_mov_b32_e32 v92, 0x42b17218
v_cndmask_b32_e32 v83, 0, v83, vcc
v_cmp_lt_f32_e32 vcc, v91, v92
v_mov_b32_e32 v95, 0x7f800000
v_cndmask_b32_e32 v83, v95, v83, vcc
v_cmp_u_f32_e32 vcc, v91, v91
v_cndmask_b32_e32 v83, v83, v91, vcc
v_subrev_f32_e32 v91, v76, v90
v_mul_f32_e32 v96, v81, v89
v_add_f32_e32 v90, v76, v90
v_mad_f32 v90, v90, v91, v96
v_cmp_gt_f32_e32 vcc, 0, v90
v_cndmask_b32_e64 v91, 0.5, -0.5, vcc
v_mac_f32_e32 v91, v93, v90
v_cvt_i32_f32_e32 v91, v91
v_madak_f32_e32 v81, v89, v81, 0x3f58560b
v_cvt_f32_i32_e32 v93, v91
v_lshlrev_b32_e32 v91, 23, v91
v_mad_f32 v82, v82, v93, v90
v_mad_f32 v99, v94, v93, v82
v_mul_f32_e32 v103, v99, v99
v_mac_f32_e32 v97, v98, v103
v_mac_f32_e32 v100, v97, v103
v_mac_f32_e32 v101, v100, v103
v_mac_f32_e32 v102, v101, v103
v_mad_f32 v97, -v103, v102, v99
v_mul_f32_e32 v98, v97, v99
v_sub_f32_e32 v97, 2.0, v97
v_cmp_gt_f32_e64 vcc, |v97|, v87
v_cndmask_b32_e32 v99, 1.0, v88, vcc
v_mul_f32_e32 v97, v99, v97
v_rcp_f32_e32 v97, v97
v_mul_f32_e32 v97, v97, v98
v_mul_f32_e32 v97, v97, v99
v_mad_f32 v93, -v93, v94, -v97
v_subrev_f32_e32 v82, v82, v93
v_sub_f32_e32 v82, 1.0, v82
v_add_i32_e32 v82, vcc, v82, v91
v_cmp_ge_f32_e32 vcc, v90, v86
v_cndmask_b32_e32 v82, 0, v82, vcc
v_cmp_lt_f32_e32 vcc, v90, v92
v_cndmask_b32_e32 v82, v95, v82, vcc
v_cmp_u_f32_e32 vcc, v90, v90
v_cndmask_b32_e32 v82, v82, v90, vcc
v_mul_f32_e32 v82, v82, v83
v_mov_b32_e32 v83, 0x40c00000
v_cmp_gt_f32_e64 s[10:11], v83, v76
v_mov_b32_e32 v83, 0x31800000
v_cmp_gt_f32_e64 vcc, |v76|, v87
v_cmp_gt_f32_e64 s[12:13], v83, v76
v_cndmask_b32_e32 v83, 1.0, v88, vcc
v_mul_f32_e32 v76, v83, v76
v_rcp_f32_e32 v76, v76
v_cmp_u_f32_e32 vcc, v45, v45
v_mul_f32_e32 v76, v76, v82
v_mad_f32 v76, -v83, v76, 1.0
v_cndmask_b32_e64 v76, 1.0, v76, s[10:11]
v_cndmask_b32_e64 v76, v76, v81, s[4:5]
v_and_b32_e32 v81, s51, v45
v_or_b32_e32 v76, v81, v76
v_mad_f32 v81, v96, v45, v45
v_cndmask_b32_e64 v76, v76, v81, s[8:9]
v_mul_f32_e32 v81, 0x3f8375d4, v45
v_mac_f32_e32 v81, 0x41000000, v45
v_mul_f32_e32 v81, 0x3e000000, v81
v_cndmask_b32_e64 v76, v76, v81, s[12:13]
v_cndmask_b32_e32 v45, v76, v45, vcc
v_subrev_f32_e32 v45, v45, v74
v_mul_f32_e32 v74, s19, v74
v_mad_f32 v45, v79, v45, -v74
v_mul_f32_e32 v74, v80, v84
v_mac_f32_e32 v5, v45, v51
v_mul_f32_e32 v45, v85, v74
v_mad_f32 v45, v51, v77, -v45
v_mul_f32_e32 v51, v77, v51
v_mad_f32 v51, v74, v85, -v51
v_mad_f32 v78, v35, v51, v78
v_mad_f32 v75, v31, v51, v75
v_mac_f32_e32 v69, v27, v51
v_mad_f32 v22, v35, v45, v22
v_mad_f32 v21, v31, v45, v21
v_mac_f32_e32 v20, v27, v45
BB6_158: ; %Flow1215
; in Loop: Header=BB6_11 Depth=1
s_or_b64 exec, exec, s[38:39]
BB6_159: ; in Loop: Header=BB6_11 Depth=1
s_or_b64 exec, exec, s[34:35]
v_lshrrev_b32_e32 v27, 30, v65
v_and_b32_e32 v27, 1, v27
v_cmp_eq_u32_e32 vcc, 1, v27
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[34:35], exec, s[4:5]
; mask branch BB6_163
s_cbranch_execz BB6_163
BB6_160: ; in Loop: Header=BB6_11 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[79:82], v55 offset0:96 offset1:97
s_and_b64 s[4:5], exec, s[0:1]
v_cmp_ne_u32_e32 vcc, v59, v23
s_xor_b64 s[4:5], s[4:5], -1
s_or_b64 s[4:5], s[4:5], vcc
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v31, v71, v80
s_and_b64 s[8:9], exec, s[2:3]
s_or_b64 s[4:5], s[8:9], s[4:5]
v_subrev_f32_e32 v27, v70, v79
v_mul_f32_e32 v45, v31, v31
v_cndmask_b32_e64 v51, 0, 1.0, s[4:5]
v_subrev_f32_e32 v35, v72, v81
v_mac_f32_e32 v45, v27, v27
v_mac_f32_e32 v45, v35, v35
v_mul_f32_e32 v51, s22, v51
v_cmp_lt_f32_e32 vcc, v45, v51
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[38:39], exec, s[4:5]
; mask branch BB6_162
s_cbranch_execz BB6_162
BB6_161: ; in Loop: Header=BB6_11 Depth=1
v_max_f32_e32 v45, 0x34cd15ae, v45
v_mul_f32_e32 v80, v9, v45
v_mul_f32_e32 v76, v80, v80
v_mov_b32_e32 v77, 0x3a92b707
v_mov_b32_e32 v81, 0x3c739487
v_madak_f32_e32 v77, v77, v76, 0x3ded3cb2
v_mul_f32_e32 v51, v73, v82
v_mad_f32 v82, v77, v76, 1.0
v_mov_b32_e32 v77, 0xb2951928
v_madak_f32_e32 v81, v81, v76, 0x3f01e2bc
v_madak_f32_e32 v77, v77, v76, 0xb85ffb93
v_rsq_f32_e32 v79, v45
v_mac_f32_e32 v82, v80, v81
v_mov_b32_e32 v81, 0x35c55945
v_madak_f32_e32 v81, v81, v76, 0x3a83ca0c
v_madak_f32_e32 v77, v77, v76, 0xbc9ded90
s_mov_b32 m0, -1
v_lshrrev_b32_e32 v74, 30, v19
v_madak_f32_e32 v81, v81, v76, 0x3d8eaf3b
v_madak_f32_e32 v83, v77, v76, 0xbf409397
ds_read_b64 v[76:77], v56 offset:384
v_and_b32_e32 v74, 1, v74
v_cmp_eq_u32_e32 vcc, 1, v74
v_mac_f32_e32 v83, v80, v81
v_mul_f32_e32 v80, v79, v79
v_cndmask_b32_e64 v74, 0, 1.0, vcc
v_mul_f32_e32 v81, v80, v80
v_mul_f32_e32 v81, v74, v81
v_mul_f32_e32 v84, v80, v81
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v77, v68, v77
v_mul_f32_e32 v85, v84, v77
v_mad_f32 v81, v81, v80, s23
v_mad_f32 v85, v76, v67, -v85
v_mad_f32 v86, v84, v84, s26
v_mul_f32_e32 v76, v67, v76
v_mul_f32_e32 v81, 0xbe2aaaab, v81
v_mul_f32_e32 v77, v86, v77
v_mul_f32_e32 v76, v76, v81
v_mac_f32_e32 v76, 0x3daaaaaa, v77
v_rcp_f32_e32 v77, v82
v_mul_f32_e32 v45, s18, v45
v_mac_f32_e32 v8, v74, v76
v_mul_f32_e32 v45, v79, v45
v_mul_f32_e32 v77, v37, v77
v_mul_f32_e32 v76, v74, v80
v_mul_f32_e32 v77, v83, v77
v_mac_f32_e32 v77, v79, v76
v_and_b32_e32 v76, s27, v45
v_mov_b32_e32 v81, 0x3fa00000
v_cmp_gt_f32_e64 s[4:5], v81, v76
v_mul_f32_e32 v81, v76, v76
v_rcp_f32_e32 v82, v81
v_add_f32_e32 v83, -1.0, v76
v_mov_b32_e32 v86, 0xbd777f97
v_mov_b32_e32 v87, 0x6f800000
v_cndmask_b32_e64 v82, v82, v83, s[4:5]
v_mov_b32_e32 v83, 0x3f580000
v_cmp_gt_f32_e64 s[8:9], v83, v76
v_cndmask_b32_e64 v81, v82, v81, s[8:9]
v_mov_b32_e32 v83, 0xc11d077e
v_mov_b32_e32 v82, 0x4036db6e
v_madak_f32_e32 v83, v83, v81, 0xc2a2932b
v_cmp_gt_f32_e32 vcc, v82, v76
v_mov_b32_e32 v82, 0xc3f1c275
v_madak_f32_e32 v82, v82, v81, 0xc480230b
v_madak_f32_e32 v83, v81, v83, 0xc3389ae7
v_madak_f32_e32 v82, v81, v82, 0xc41f6441
v_madak_f32_e32 v83, v81, v83, 0xc322658c
v_madak_f32_e32 v82, v81, v82, 0xc320a2ea
v_madak_f32_e32 v83, v81, v83, 0xc2798057
v_madak_f32_e32 v82, v81, v82, 0xc18e104b
v_madak_f32_e32 v83, v81, v83, 0xc128f022
v_madak_f32_e32 v82, v81, v82, 0xbf4c9dd4
v_madak_f32_e32 v83, v81, v83, 0xbf31a0b7
v_madak_f32_e32 v82, v81, v82, 0xbc21a092
v_madak_f32_e32 v83, v81, v83, 0xbc21a093
v_madak_f32_e32 v86, v86, v81, 0x40d23f7c
v_cndmask_b32_e32 v82, v82, v83, vcc
v_mov_b32_e32 v83, 0xc1b38712
v_madak_f32_e32 v83, v83, v81, 0x43ed43a7
v_madak_f32_e32 v86, v81, v86, 0x42d9451f
v_madak_f32_e32 v83, v81, v83, 0x451f90ce
v_madak_f32_e32 v86, v81, v86, 0x43d6810b
v_madak_f32_e32 v83, v81, v83, 0x4547fdbb
v_madak_f32_e32 v86, v81, v86, 0x442158c9
v_madak_f32_e32 v83, v81, v83, 0x44c01759
v_madak_f32_e32 v86, v81, v86, 0x43d9486f
v_madak_f32_e32 v83, v81, v83, 0x43a2e571
v_madak_f32_e32 v86, v81, v86, 0x4309a863
v_madak_f32_e32 v83, v81, v83, 0x41f2b459
v_madak_f32_e32 v86, v81, v86, 0x419d35ce
v_cndmask_b32_e32 v83, v83, v86, vcc
v_mov_b32_e32 v86, 0xbb0df9c0
v_madak_f32_e32 v86, v86, v81, 0x3d1151b3
v_madak_f32_e32 v86, v81, v86, 0xbde31cc2
v_madak_f32_e32 v86, v81, v86, 0x3ea2fe54
v_madak_f32_e32 v86, v81, v86, 0xbebe9208
v_madak_f32_e32 v86, v81, v86, 0x3ed46805
v_madak_f32_e32 v86, v81, v86, 0xbb1acdc6
v_cndmask_b32_e64 v82, v82, v86, s[4:5]
v_mov_b32_e32 v86, 0x3c445aa3
v_madak_f32_e32 v86, v86, v81, 0x3c5f6e13
v_madak_f32_e32 v86, v81, v86, 0x3e013307
v_madak_f32_e32 v86, v81, v86, 0x3d931ae7
v_madak_f32_e32 v86, v81, v86, 0x3f0a5785
v_madak_f32_e32 v86, v81, v86, 0x3dd9f331
v_cndmask_b32_e64 v83, v83, v86, s[4:5]
v_mov_b32_e32 v86, 0xb684e21a
v_madak_f32_e32 v86, v86, v81, 0x390aee49
v_madak_f32_e32 v86, v81, v86, 0x3ba68116
v_madak_f32_e32 v86, v81, v86, 0x3d852a63
v_madak_f32_e32 v86, v81, v86, 0x3ecbbbce
v_cndmask_b32_e64 v83, v83, v86, s[8:9]
v_mad_f32 v83, v81, v83, 1.0
v_and_b32_e32 v90, s50, v45
v_mov_b32_e32 v91, 0xbf100000
v_cmp_gt_f32_e64 vcc, |v83|, v87
v_mov_b32_e32 v88, 0x2f800000
v_mad_f32 v91, v90, -v90, v91
v_cndmask_b32_e32 v89, 1.0, v88, vcc
v_cmp_gt_f32_e32 vcc, 0, v91
v_cndmask_b32_e64 v92, 0.5, -0.5, vcc
v_mov_b32_e32 v93, 0x3fb8aa3b
v_mac_f32_e32 v92, v93, v91
v_mov_b32_e32 v86, 0xb7c756b1
v_cvt_i32_f32_e32 v92, v92
v_madak_f32_e32 v86, v86, v81, 0xbbbd1489
v_madak_f32_e32 v86, v81, v86, 0xbce9528f
v_madak_f32_e32 v86, v81, v86, 0xbea66beb
v_mul_f32_e32 v83, v89, v83
v_madak_f32_e32 v81, v81, v86, 0x3e0375d4
v_rcp_f32_e32 v83, v83
v_cvt_f32_i32_e32 v86, v92
v_cndmask_b32_e64 v81, v82, v81, s[8:9]
v_mov_b32_e32 v82, 0xbf317180
v_mul_f32_e32 v81, v83, v81
v_mad_f32 v83, v82, v86, v91
v_mov_b32_e32 v94, 0xb717f7d1
v_mad_f32 v95, v94, v86, v83
v_mul_f32_e32 v96, v95, v95
v_mov_b32_e32 v97, 0xb5ddea0e
v_mov_b32_e32 v98, 0x3331bb4c
v_mad_f32 v99, v98, v96, v97
v_mov_b32_e32 v100, 0x388ab355
v_mad_f32 v99, v99, v96, v100
v_mov_b32_e32 v101, 0xbb360b61
v_mad_f32 v99, v99, v96, v101
v_mov_b32_e32 v102, 0x3e2aaaab
v_mad_f32 v99, v99, v96, v102
v_mad_f32 v96, -v96, v99, v95
v_mul_f32_e32 v95, v96, v95
v_sub_f32_e32 v96, 2.0, v96
v_cmp_gt_f32_e64 vcc, |v96|, v87
v_cndmask_b32_e32 v99, 1.0, v88, vcc
v_mul_f32_e32 v96, v99, v96
v_rcp_f32_e32 v96, v96
v_mul_f32_e32 v95, v96, v95
v_mul_f32_e32 v95, v95, v99
v_mad_f32 v86, -v86, v94, -v95
v_subrev_f32_e32 v83, v83, v86
v_lshlrev_b32_e32 v86, 23, v92
v_sub_f32_e32 v83, 1.0, v83
v_add_i32_e32 v83, vcc, v83, v86
v_mov_b32_e32 v86, 0xc2aeac4f
v_cmp_ge_f32_e32 vcc, v91, v86
v_mov_b32_e32 v92, 0x42b17218
v_cndmask_b32_e32 v83, 0, v83, vcc
v_cmp_lt_f32_e32 vcc, v91, v92
v_mov_b32_e32 v95, 0x7f800000
v_cndmask_b32_e32 v83, v95, v83, vcc
v_cmp_u_f32_e32 vcc, v91, v91
v_cndmask_b32_e32 v83, v83, v91, vcc
v_subrev_f32_e32 v91, v76, v90
v_mul_f32_e32 v96, v81, v89
v_add_f32_e32 v90, v76, v90
v_mad_f32 v90, v90, v91, v96
v_cmp_gt_f32_e32 vcc, 0, v90
v_cndmask_b32_e64 v91, 0.5, -0.5, vcc
v_mac_f32_e32 v91, v93, v90
v_cvt_i32_f32_e32 v91, v91
v_madak_f32_e32 v81, v89, v81, 0x3f58560b
v_cvt_f32_i32_e32 v93, v91
v_lshlrev_b32_e32 v91, 23, v91
v_mad_f32 v82, v82, v93, v90
v_mad_f32 v99, v94, v93, v82
v_mul_f32_e32 v103, v99, v99
v_mac_f32_e32 v97, v98, v103
v_mac_f32_e32 v100, v97, v103
v_mac_f32_e32 v101, v100, v103
v_mac_f32_e32 v102, v101, v103
v_mad_f32 v97, -v103, v102, v99
v_mul_f32_e32 v98, v97, v99
v_sub_f32_e32 v97, 2.0, v97
v_cmp_gt_f32_e64 vcc, |v97|, v87
v_cndmask_b32_e32 v99, 1.0, v88, vcc
v_mul_f32_e32 v97, v99, v97
v_rcp_f32_e32 v97, v97
v_mul_f32_e32 v97, v97, v98
v_mul_f32_e32 v97, v97, v99
v_mad_f32 v93, -v93, v94, -v97
v_subrev_f32_e32 v82, v82, v93
v_sub_f32_e32 v82, 1.0, v82
v_add_i32_e32 v82, vcc, v82, v91
v_cmp_ge_f32_e32 vcc, v90, v86
v_cndmask_b32_e32 v82, 0, v82, vcc
v_cmp_lt_f32_e32 vcc, v90, v92
v_cndmask_b32_e32 v82, v95, v82, vcc
v_cmp_u_f32_e32 vcc, v90, v90
v_cndmask_b32_e32 v82, v82, v90, vcc
v_mul_f32_e32 v82, v82, v83
v_mov_b32_e32 v83, 0x40c00000
v_cmp_gt_f32_e64 s[10:11], v83, v76
v_mov_b32_e32 v83, 0x31800000
v_cmp_gt_f32_e64 vcc, |v76|, v87
v_cmp_gt_f32_e64 s[12:13], v83, v76
v_cndmask_b32_e32 v83, 1.0, v88, vcc
v_mul_f32_e32 v76, v83, v76
v_rcp_f32_e32 v76, v76
v_cmp_u_f32_e32 vcc, v45, v45
v_mul_f32_e32 v76, v76, v82
v_mad_f32 v76, -v83, v76, 1.0
v_cndmask_b32_e64 v76, 1.0, v76, s[10:11]
v_cndmask_b32_e64 v76, v76, v81, s[4:5]
v_and_b32_e32 v81, s51, v45
v_or_b32_e32 v76, v81, v76
v_mad_f32 v81, v96, v45, v45
v_cndmask_b32_e64 v76, v76, v81, s[8:9]
v_mul_f32_e32 v81, 0x3f8375d4, v45
v_mac_f32_e32 v81, 0x41000000, v45
v_mul_f32_e32 v81, 0x3e000000, v81
v_cndmask_b32_e64 v76, v76, v81, s[12:13]
v_cndmask_b32_e32 v45, v76, v45, vcc
v_subrev_f32_e32 v45, v45, v74
v_mul_f32_e32 v74, s19, v74
v_mad_f32 v45, v79, v45, -v74
v_mul_f32_e32 v74, v80, v84
v_mac_f32_e32 v5, v45, v51
v_mul_f32_e32 v45, v85, v74
v_mad_f32 v45, v51, v77, -v45
v_mul_f32_e32 v51, v77, v51
v_mad_f32 v51, v74, v85, -v51
v_mad_f32 v78, v35, v51, v78
v_mad_f32 v75, v31, v51, v75
v_mac_f32_e32 v69, v27, v51
v_mad_f32 v18, v35, v45, v18
v_mad_f32 v17, v31, v45, v17
v_mac_f32_e32 v16, v27, v45
BB6_162: ; %Flow1214
; in Loop: Header=BB6_11 Depth=1
s_or_b64 exec, exec, s[38:39]
BB6_163: ; in Loop: Header=BB6_11 Depth=1
s_or_b64 exec, exec, s[34:35]
v_cmp_gt_i32_e32 vcc, 0, v65
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[12:13], exec, s[4:5]
; mask branch BB6_167
s_cbranch_execz BB6_167
BB6_164: ; in Loop: Header=BB6_11 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[79:82], v55 offset0:112 offset1:113
s_and_b64 s[4:5], exec, s[0:1]
v_cmp_ne_u32_e32 vcc, v38, v23
s_xor_b64 s[4:5], s[4:5], -1
s_or_b64 s[4:5], s[4:5], vcc
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v27, v71, v80
s_and_b64 s[8:9], exec, s[2:3]
s_or_b64 s[4:5], s[8:9], s[4:5]
v_subrev_f32_e32 v23, v70, v79
v_mul_f32_e32 v35, v27, v27
v_cndmask_b32_e64 v45, 0, 1.0, s[4:5]
v_subrev_f32_e32 v31, v72, v81
v_mac_f32_e32 v35, v23, v23
v_mac_f32_e32 v35, v31, v31
v_mul_f32_e32 v45, s22, v45
v_cmp_lt_f32_e32 vcc, v35, v45
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[34:35], exec, s[4:5]
; mask branch BB6_166
s_cbranch_execz BB6_166
BB6_165: ; in Loop: Header=BB6_11 Depth=1
v_cmp_gt_i32_e32 vcc, 0, v19
v_max_f32_e32 v19, 0x34cd15ae, v35
v_rsq_f32_e32 v35, v19
s_mov_b32 m0, -1
v_cndmask_b32_e64 v51, 0, 1.0, vcc
v_mul_f32_e32 v45, v73, v82
v_mul_f32_e32 v65, v35, v35
v_mul_f32_e32 v70, v65, v65
v_mul_f32_e32 v72, v51, v70
ds_read_b64 v[70:71], v56 offset:448
v_mov_b32_e32 v76, 0x3a92b707
v_mov_b32_e32 v77, 0x3c739487
v_mov_b32_e32 v79, 0x35c55945
v_mov_b32_e32 v81, 0x3fb8aa3b
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v68, v68, v71
v_mul_f32_e32 v71, v65, v72
v_mul_f32_e32 v73, v71, v68
v_mad_f32 v73, v70, v67, -v73
v_mul_f32_e32 v67, v67, v70
v_mul_f32_e32 v70, v9, v19
v_mul_f32_e32 v74, v70, v70
v_madak_f32_e32 v76, v76, v74, 0x3ded3cb2
v_madak_f32_e32 v77, v77, v74, 0x3f01e2bc
v_mad_f32 v76, v76, v74, 1.0
v_mac_f32_e32 v76, v70, v77
v_mov_b32_e32 v77, 0xb2951928
v_madak_f32_e32 v77, v77, v74, 0xb85ffb93
v_madak_f32_e32 v79, v79, v74, 0x3a83ca0c
v_madak_f32_e32 v77, v77, v74, 0xbc9ded90
v_madak_f32_e32 v79, v79, v74, 0x3d8eaf3b
v_madak_f32_e32 v74, v77, v74, 0xbf409397
v_mac_f32_e32 v74, v70, v79
v_mad_f32 v70, v71, v71, s26
v_mul_f32_e32 v68, v70, v68
v_mad_f32 v70, v72, v65, s23
v_mul_f32_e32 v70, 0xbe2aaaab, v70
v_mul_f32_e32 v67, v67, v70
v_mac_f32_e32 v67, 0x3daaaaaa, v68
v_mac_f32_e32 v8, v51, v67
v_rcp_f32_e32 v67, v76
v_mul_f32_e32 v19, s18, v19
v_mul_f32_e32 v19, v35, v19
v_mul_f32_e32 v68, v51, v65
v_mul_f32_e32 v67, v37, v67
v_mul_f32_e32 v67, v74, v67
v_mac_f32_e32 v67, v35, v68
v_and_b32_e32 v68, s27, v19
v_mov_b32_e32 v70, 0x3fa00000
v_cmp_gt_f32_e64 s[4:5], v70, v68
v_mul_f32_e32 v70, v68, v68
v_rcp_f32_e32 v72, v70
v_add_f32_e32 v74, -1.0, v68
v_mov_b32_e32 v76, 0xbd777f97
v_and_b32_e32 v77, s50, v19
v_cndmask_b32_e64 v72, v72, v74, s[4:5]
v_mov_b32_e32 v74, 0x3f580000
v_cmp_gt_f32_e64 s[8:9], v74, v68
v_cndmask_b32_e64 v70, v72, v70, s[8:9]
v_mov_b32_e32 v74, 0xc11d077e
v_mov_b32_e32 v72, 0x4036db6e
v_madak_f32_e32 v74, v74, v70, 0xc2a2932b
v_cmp_gt_f32_e32 vcc, v72, v68
v_mov_b32_e32 v72, 0xc3f1c275
v_madak_f32_e32 v72, v72, v70, 0xc480230b
v_madak_f32_e32 v74, v70, v74, 0xc3389ae7
v_madak_f32_e32 v72, v70, v72, 0xc41f6441
v_madak_f32_e32 v74, v70, v74, 0xc322658c
v_madak_f32_e32 v72, v70, v72, 0xc320a2ea
v_madak_f32_e32 v74, v70, v74, 0xc2798057
v_madak_f32_e32 v72, v70, v72, 0xc18e104b
v_madak_f32_e32 v74, v70, v74, 0xc128f022
v_madak_f32_e32 v72, v70, v72, 0xbf4c9dd4
v_madak_f32_e32 v74, v70, v74, 0xbf31a0b7
v_madak_f32_e32 v72, v70, v72, 0xbc21a092
v_madak_f32_e32 v74, v70, v74, 0xbc21a093
v_madak_f32_e32 v76, v76, v70, 0x40d23f7c
v_cndmask_b32_e32 v72, v72, v74, vcc
v_mov_b32_e32 v74, 0xc1b38712
v_madak_f32_e32 v74, v74, v70, 0x43ed43a7
v_madak_f32_e32 v76, v70, v76, 0x42d9451f
v_madak_f32_e32 v74, v70, v74, 0x451f90ce
v_madak_f32_e32 v76, v70, v76, 0x43d6810b
v_madak_f32_e32 v74, v70, v74, 0x4547fdbb
v_madak_f32_e32 v76, v70, v76, 0x442158c9
v_madak_f32_e32 v74, v70, v74, 0x44c01759
v_madak_f32_e32 v76, v70, v76, 0x43d9486f
v_madak_f32_e32 v74, v70, v74, 0x43a2e571
v_madak_f32_e32 v76, v70, v76, 0x4309a863
v_madak_f32_e32 v74, v70, v74, 0x41f2b459
v_madak_f32_e32 v76, v70, v76, 0x419d35ce
v_cndmask_b32_e32 v74, v74, v76, vcc
v_mov_b32_e32 v76, 0xbb0df9c0
v_madak_f32_e32 v76, v76, v70, 0x3d1151b3
v_madak_f32_e32 v76, v70, v76, 0xbde31cc2
v_madak_f32_e32 v76, v70, v76, 0x3ea2fe54
v_madak_f32_e32 v76, v70, v76, 0xbebe9208
v_mov_b32_e32 v79, 0xbf100000
v_madak_f32_e32 v76, v70, v76, 0x3ed46805
v_mad_f32 v79, v77, -v77, v79
v_madak_f32_e32 v76, v70, v76, 0xbb1acdc6
v_cmp_gt_f32_e64 s[10:11], 0, v79
v_cndmask_b32_e64 v80, 0.5, -0.5, s[10:11]
v_cndmask_b32_e64 v72, v72, v76, s[4:5]
v_mov_b32_e32 v76, 0x3c445aa3
v_madak_f32_e32 v76, v76, v70, 0x3c5f6e13
v_mac_f32_e32 v80, v81, v79
v_madak_f32_e32 v76, v70, v76, 0x3e013307
v_cvt_i32_f32_e32 v80, v80
v_madak_f32_e32 v76, v70, v76, 0x3d931ae7
v_madak_f32_e32 v76, v70, v76, 0x3f0a5785
v_madak_f32_e32 v76, v70, v76, 0x3dd9f331
v_cndmask_b32_e64 v74, v74, v76, s[4:5]
v_mov_b32_e32 v76, 0xb7c756b1
v_cvt_f32_i32_e32 v83, v80
v_madak_f32_e32 v76, v76, v70, 0xbbbd1489
v_madak_f32_e32 v76, v70, v76, 0xbce9528f
v_madak_f32_e32 v76, v70, v76, 0xbea66beb
v_mov_b32_e32 v84, 0xbf317180
v_madak_f32_e32 v76, v70, v76, 0x3e0375d4
v_mad_f32 v85, v84, v83, v79
v_mov_b32_e32 v86, 0xb717f7d1
v_cndmask_b32_e64 v72, v72, v76, s[8:9]
v_mov_b32_e32 v76, 0xb684e21a
v_mad_f32 v87, v86, v83, v85
v_madak_f32_e32 v76, v76, v70, 0x390aee49
v_mul_f32_e32 v88, v87, v87
v_mov_b32_e32 v89, 0xb5ddea0e
v_mov_b32_e32 v90, 0x3331bb4c
v_madak_f32_e32 v76, v70, v76, 0x3ba68116
v_mad_f32 v91, v90, v88, v89
v_mov_b32_e32 v92, 0x388ab355
v_madak_f32_e32 v76, v70, v76, 0x3d852a63
v_mad_f32 v91, v91, v88, v92
v_mov_b32_e32 v93, 0xbb360b61
v_madak_f32_e32 v76, v70, v76, 0x3ecbbbce
v_mad_f32 v91, v91, v88, v93
v_mov_b32_e32 v94, 0x3e2aaaab
v_cndmask_b32_e64 v74, v74, v76, s[8:9]
v_mad_f32 v91, v91, v88, v94
v_mad_f32 v88, -v88, v91, v87
v_mad_f32 v70, v70, v74, 1.0
v_mov_b32_e32 v74, 0x6f800000
v_cmp_gt_f32_e64 vcc, |v70|, v74
v_mov_b32_e32 v76, 0x2f800000
v_sub_f32_e32 v91, 2.0, v88
v_cndmask_b32_e32 v82, 1.0, v76, vcc
v_cmp_gt_f32_e64 vcc, |v91|, v74
v_cndmask_b32_e32 v95, 1.0, v76, vcc
v_mul_f32_e32 v91, v95, v91
v_rcp_f32_e32 v91, v91
v_mul_f32_e32 v70, v82, v70
v_mul_f32_e32 v87, v88, v87
v_rcp_f32_e32 v70, v70
v_mul_f32_e32 v87, v91, v87
v_mul_f32_e32 v87, v87, v95
v_mad_f32 v83, -v83, v86, -v87
v_subrev_f32_e32 v83, v85, v83
v_mul_f32_e32 v70, v70, v72
v_mul_f32_e32 v72, v70, v82
v_subrev_f32_e32 v85, v68, v77
v_add_f32_e32 v77, v68, v77
v_sub_f32_e32 v83, 1.0, v83
v_lshlrev_b32_e32 v80, 23, v80
v_add_i32_e32 v80, vcc, v83, v80
v_mad_f32 v77, v77, v85, v72
v_cmp_gt_f32_e32 vcc, 0, v77
v_cndmask_b32_e64 v85, 0.5, -0.5, vcc
v_mac_f32_e32 v85, v81, v77
v_cvt_i32_f32_e32 v81, v85
v_mov_b32_e32 v83, 0xc2aeac4f
v_cmp_ge_f32_e32 vcc, v79, v83
v_mov_b32_e32 v85, 0x42b17218
v_cvt_f32_i32_e32 v87, v81
v_cndmask_b32_e32 v80, 0, v80, vcc
v_cmp_lt_f32_e32 vcc, v79, v85
v_mov_b32_e32 v88, 0x7f800000
v_mad_f32 v84, v84, v87, v77
v_mad_f32 v91, v86, v87, v84
v_mul_f32_e32 v95, v91, v91
v_mac_f32_e32 v89, v90, v95
v_mac_f32_e32 v92, v89, v95
v_mac_f32_e32 v93, v92, v95
v_mac_f32_e32 v94, v93, v95
v_mad_f32 v89, -v95, v94, v91
v_sub_f32_e32 v90, 2.0, v89
v_cndmask_b32_e32 v80, v88, v80, vcc
v_cmp_gt_f32_e64 vcc, |v90|, v74
v_cndmask_b32_e32 v92, 1.0, v76, vcc
v_mul_f32_e32 v90, v92, v90
v_rcp_f32_e32 v90, v90
v_cmp_u_f32_e32 vcc, v79, v79
v_cndmask_b32_e32 v79, v80, v79, vcc
v_mul_f32_e32 v80, v89, v91
v_mul_f32_e32 v80, v90, v80
v_mul_f32_e32 v80, v80, v92
v_mad_f32 v80, -v87, v86, -v80
v_subrev_f32_e32 v80, v84, v80
v_sub_f32_e32 v80, 1.0, v80
v_lshlrev_b32_e32 v81, 23, v81
v_add_i32_e32 v80, vcc, v80, v81
v_cmp_ge_f32_e32 vcc, v77, v83
v_cndmask_b32_e32 v80, 0, v80, vcc
v_cmp_lt_f32_e64 s[10:11], v77, v85
v_cndmask_b32_e64 v80, v88, v80, s[10:11]
v_cmp_u_f32_e32 vcc, v77, v77
v_cndmask_b32_e32 v77, v80, v77, vcc
v_cmp_gt_f32_e64 vcc, |v68|, v74
v_cndmask_b32_e32 v74, 1.0, v76, vcc
v_mul_f32_e32 v76, v74, v68
v_rcp_f32_e32 v76, v76
v_mul_f32_e32 v77, v77, v79
v_mov_b32_e32 v79, 0x40c00000
v_cmp_gt_f32_e32 vcc, v79, v68
v_mov_b32_e32 v79, 0x31800000
v_cmp_gt_f32_e64 s[10:11], v79, v68
v_mul_f32_e32 v68, v76, v77
v_mad_f32 v68, -v74, v68, 1.0
v_cndmask_b32_e32 v68, 1.0, v68, vcc
v_madak_f32_e32 v70, v82, v70, 0x3f58560b
v_cndmask_b32_e64 v68, v68, v70, s[4:5]
v_and_b32_e32 v70, s51, v19
v_or_b32_e32 v68, v70, v68
v_mad_f32 v70, v72, v19, v19
v_cndmask_b32_e64 v68, v68, v70, s[8:9]
v_mul_f32_e32 v70, 0x3f8375d4, v19
v_mac_f32_e32 v70, 0x41000000, v19
v_mul_f32_e32 v70, 0x3e000000, v70
v_cndmask_b32_e64 v68, v68, v70, s[10:11]
v_cmp_u_f32_e32 vcc, v19, v19
v_cndmask_b32_e32 v19, v68, v19, vcc
v_subrev_f32_e32 v19, v19, v51
v_mul_f32_e32 v51, s19, v51
v_mad_f32 v19, v35, v19, -v51
v_mul_f32_e32 v35, v65, v71
v_mac_f32_e32 v5, v19, v45
v_mul_f32_e32 v19, v73, v35
v_mad_f32 v19, v45, v67, -v19
v_mul_f32_e32 v45, v67, v45
v_mad_f32 v35, v35, v73, -v45
v_mad_f32 v78, v31, v35, v78
v_mad_f32 v75, v27, v35, v75
v_mac_f32_e32 v69, v23, v35
v_mad_f32 v15, v31, v19, v15
v_mad_f32 v14, v27, v19, v14
v_mac_f32_e32 v13, v23, v19
BB6_166: ; %Flow1213
; in Loop: Header=BB6_11 Depth=1
s_or_b64 exec, exec, s[34:35]
BB6_167: ; in Loop: Header=BB6_11 Depth=1
s_or_b64 exec, exec, s[12:13]
s_mov_b32 m0, -1
v_cmp_gt_i32_e32 vcc, 3, v2
ds_write_b32 v6, v69
ds_write_b32 v7, v75
ds_write_b32 v12, v78
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[4:5], exec, s[4:5]
s_waitcnt lgkmcnt(0)
; mask branch BB6_173
s_cbranch_execz BB6_173
BB6_168: ; in Loop: Header=BB6_11 Depth=1
v_lshlrev_b32_e32 v23, 6, v2
v_add_i32_e32 v19, vcc, v11, v23
v_lshlrev_b32_e32 v19, 2, v19
v_add_i32_e32 v27, vcc, s15, v19
s_mov_b32 m0, -1
ds_read_b32 v19, v27
v_add_i32_e32 v31, vcc, 8, v11
v_or_b32_e32 v35, 1, v11
v_cmp_lt_i32_e32 vcc, v35, v31
s_and_saveexec_b64 s[8:9], vcc
s_xor_b64 s[8:9], exec, s[8:9]
s_waitcnt lgkmcnt(0)
; mask branch BB6_170
s_cbranch_execz BB6_170
BB6_169: ; in Loop: Header=BB6_11 Depth=1
s_mov_b32 m0, -1
ds_read2_b32 v[67:68], v27 offset0:1 offset1:2
v_or_b32_e32 v31, 3, v11
v_add_i32_e32 v23, vcc, v31, v23
v_lshlrev_b32_e32 v23, 2, v23
ds_read2_b32 v[69:70], v27 offset0:3 offset1:4
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v19, v19, v67
v_add_i32_e32 v23, vcc, s15, v23
v_add_f32_e32 v19, v68, v19
ds_read2_b32 v[67:68], v23 offset0:2 offset1:3
ds_read_b32 v27, v27 offset:28
v_add_f32_e32 v19, v69, v19
v_add_f32_e32 v19, v70, v19
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v19, v67, v19
v_add_f32_e32 v19, v68, v19
v_add_f32_e32 v19, v27, v19
BB6_170: ; %._crit_edge.i
; in Loop: Header=BB6_11 Depth=1
s_or_b64 exec, exec, s[8:9]
v_mul_lo_i32 v23, v66, 3
v_mov_b32_e32 v27, s29
s_mov_b64 s[8:9], s[28:29]
s_mov_b64 s[10:11], s[46:47]
v_add_i32_e32 v65, vcc, v23, v2
v_ashrrev_i32_e32 v66, 31, v65
v_lshl_b64 v[67:68], v[65:66], 2
v_add_i32_e32 v65, vcc, s28, v67
v_addc_u32_e32 v66, vcc, v68, v27, vcc
buffer_load_dword v68, v[67:68], s[8:11], 0 addr64
s_mov_b64 s[8:9], 0
s_waitcnt vmcnt(0)
BB6_171: ; Parent Loop BB6_11 Depth=1
; => This Inner Loop Header: Depth=2
v_add_f32_e32 v67, v19, v68
v_mov_b32_e32 v70, v68
v_mov_b32_e32 v69, v67
buffer_atomic_cmpswap v[69:70], v[65:66], s[44:47], 0 addr64 glc
v_mov_b32_e32 v23, -1
v_mov_b32_e32 v23, 0xf000
s_waitcnt vmcnt(0) expcnt(0)
v_cmp_eq_u32_e32 vcc, v69, v68
s_or_b64 s[8:9], vcc, s[8:9]
v_mov_b32_e32 v68, v69
s_andn2_b64 exec, exec, s[8:9]
s_cbranch_execnz BB6_171
; BB#172: ; %Flow1211
; in Loop: Header=BB6_11 Depth=1
s_or_b64 exec, exec, s[8:9]
BB6_173: ; %Flow1212
; in Loop: Header=BB6_11 Depth=1
s_or_b64 exec, exec, s[4:5]
BB6_174: ; %Flow1221
; in Loop: Header=BB6_11 Depth=1
s_or_b64 exec, exec, s[30:31]
BB6_175: ; %Flow1255
; in Loop: Header=BB6_11 Depth=1
s_or_b64 exec, exec, s[52:53]
v_add_i32_e32 v57, vcc, 1, v57
v_addc_u32_e32 v58, vcc, 0, v58, vcc
v_cmp_ne_u32_e32 vcc, v57, v39
s_and_b64 vcc, exec, vcc
s_cbranch_vccnz BB6_11
BB6_176: ; %Flow1257
s_mov_b32 m0, -1
ds_write_b32 v6, v48
ds_write_b32 v7, v49
ds_write_b32 v12, v50
s_waitcnt lgkmcnt(0)
s_barrier
s_load_dword s2, s[6:7], 0x32
s_and_b64 s[0:1], exec, s[0:1]
v_lshlrev_b32_e32 v10, 2, v40
s_xor_b64 s[0:1], s[0:1], -1
v_mov_b32_e32 v3, 0
s_waitcnt lgkmcnt(0)
v_cmp_ne_u32_e64 s[2:3], s2, 0
s_and_b64 s[2:3], s[2:3], s[0:1]
v_lshlrev_b32_e32 v9, 6, v36
v_add_i32_e32 v10, vcc, s15, v10
v_add_i32_e32 v23, vcc, 64, v2
v_add_i32_e32 v19, vcc, 0x80, v2
v_cmp_gt_i32_e64 s[0:1], 4, v1
s_and_saveexec_b64 s[4:5], s[0:1]
s_xor_b64 s[4:5], exec, s[4:5]
; mask branch BB6_187
s_cbranch_execz BB6_187
BB6_177:
s_mov_b32 m0, -1
ds_read_b32 v3, v10 offset:128
ds_read_b32 v27, v10
v_add_i32_e32 v31, vcc, v11, v23
v_lshlrev_b32_e32 v31, 2, v31
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v3, v3, v27
ds_write_b32 v10, v3
v_add_i32_e32 v27, vcc, s15, v31
s_waitcnt lgkmcnt(0)
ds_read_b32 v3, v27 offset:128
ds_read_b32 v31, v10 offset:256
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v3, v3, v31
ds_write_b32 v10, v3 offset:256
s_waitcnt lgkmcnt(0)
v_add_i32_e32 v3, vcc, v11, v19
v_lshlrev_b32_e32 v3, 2, v3
v_add_i32_e32 v31, vcc, s15, v3
ds_read_b32 v3, v31 offset:128
ds_read_b32 v35, v10 offset:512
v_cmp_lt_i32_e32 vcc, 1, v1
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v3, v3, v35
v_mov_b32_e32 v35, 0
ds_write_b32 v10, v3 offset:512
s_waitcnt lgkmcnt(0)
; implicit-def: %VGPR3
s_and_saveexec_b64 s[6:7], vcc
s_xor_b64 s[6:7], exec, s[6:7]
; mask branch BB6_179
BB6_178:
v_cmp_eq_u32_e32 vcc, 2, v1
v_mov_b32_e32 v3, 0
v_cndmask_b32_e64 v35, 0, -1, vcc
BB6_179: ; %Flow1208
s_or_saveexec_b64 s[6:7], s[6:7]
s_xor_b64 exec, exec, s[6:7]
; mask branch BB6_181
s_cbranch_execz BB6_181
BB6_180: ; %.thread85.i
s_mov_b32 m0, -1
ds_read_b32 v35, v10 offset:64
ds_read_b32 v36, v10
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v35, v35, v36
ds_write_b32 v10, v35
ds_read_b32 v27, v27 offset:64
s_waitcnt lgkmcnt(0)
ds_read_b32 v35, v10 offset:256
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v27, v27, v35
ds_write_b32 v10, v27 offset:256
s_waitcnt lgkmcnt(0)
ds_read_b32 v27, v31 offset:64
ds_read_b32 v31, v10 offset:512
v_mov_b32_e32 v35, -1
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v27, v27, v31
ds_write_b32 v10, v27 offset:512
s_waitcnt lgkmcnt(0)
BB6_181: ; %Flow1209
s_or_b64 exec, exec, s[6:7]
v_cmp_ne_u32_e32 vcc, 0, v35
s_and_saveexec_b64 s[6:7], vcc
s_xor_b64 s[6:7], exec, s[6:7]
; mask branch BB6_186
s_cbranch_execz BB6_186
BB6_182:
v_add_i32_e32 v3, vcc, v9, v2
v_mul_lo_i32 v3, v3, 3
v_mov_b32_e32 v27, 0xe0
v_mad_i32_i24 v27, v27, v1, v10
s_mov_b32 m0, -1
v_add_i32_e32 v35, vcc, v1, v3
v_ashrrev_i32_e32 v36, 31, v35
v_lshl_b64 v[37:38], v[35:36], 2
v_add_i32_e32 v35, vcc, s28, v37
v_mov_b32_e32 v3, s29
s_mov_b32 s11, 0xf000
s_mov_b32 s10, 0
s_mov_b64 s[8:9], s[28:29]
ds_read_b32 v31, v27
ds_read_b32 v27, v27 offset:32
v_addc_u32_e32 v36, vcc, v38, v3, vcc
buffer_load_dword v38, v[37:38], s[8:11], 0 addr64
s_mov_b64 s[8:9], 0
s_mov_b64 s[12:13], s[8:9]
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v27, v31, v27
s_waitcnt vmcnt(0)
BB6_183: ; =>This Inner Loop Header: Depth=1
v_add_f32_e32 v37, v27, v38
v_mov_b32_e32 v40, v38
v_mov_b32_e32 v39, v37
buffer_atomic_cmpswap v[39:40], v[35:36], s[8:11], 0 addr64 glc
v_mov_b32_e32 v3, -1
v_mov_b32_e32 v3, 0xf000
s_waitcnt vmcnt(0) expcnt(0)
v_cmp_eq_u32_e32 vcc, v39, v38
s_or_b64 s[12:13], vcc, s[12:13]
v_mov_b32_e32 v38, v39
s_andn2_b64 exec, exec, s[12:13]
s_cbranch_execnz BB6_183
; BB#184: ; %atomicAdd_g_f.exit.i
s_or_b64 exec, exec, s[12:13]
s_and_b64 s[8:9], exec, s[2:3]
v_cndmask_b32_e64 v31, 0, 1, s[8:9]
v_cmp_ne_u32_e32 vcc, 1, v31
v_mov_b32_e32 v3, 0
s_and_b64 vcc, exec, vcc
s_cbranch_vccnz BB6_186
; BB#185:
v_mov_b32_e32 v3, v27
BB6_186: ; %Flow1210
s_or_b64 exec, exec, s[6:7]
BB6_187: ; %reduce_force_i_pow2.exit
s_or_b64 exec, exec, s[4:5]
s_mov_b32 m0, -1
s_barrier
ds_write_b32 v6, v42
ds_write_b32 v7, v43
ds_write_b32 v12, v44
s_waitcnt lgkmcnt(0)
s_barrier
s_and_saveexec_b64 s[4:5], s[0:1]
s_xor_b64 s[4:5], exec, s[4:5]
; mask branch BB6_198
s_cbranch_execz BB6_198
BB6_188:
s_mov_b32 m0, -1
ds_read_b32 v27, v10 offset:128
ds_read_b32 v31, v10
v_add_i32_e32 v35, vcc, v11, v23
v_lshlrev_b32_e32 v35, 2, v35
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v27, v27, v31
ds_write_b32 v10, v27
s_waitcnt lgkmcnt(0)
v_add_i32_e32 v27, vcc, s15, v35
ds_read_b32 v31, v27 offset:128
ds_read_b32 v35, v10 offset:256
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v31, v31, v35
ds_write_b32 v10, v31 offset:256
s_waitcnt lgkmcnt(0)
v_add_i32_e32 v31, vcc, v11, v19
v_lshlrev_b32_e32 v31, 2, v31
v_add_i32_e32 v31, vcc, s15, v31
ds_read_b32 v35, v31 offset:128
ds_read_b32 v36, v10 offset:512
v_cmp_lt_i32_e32 vcc, 1, v1
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v35, v35, v36
ds_write_b32 v10, v35 offset:512
s_waitcnt lgkmcnt(0)
v_mov_b32_e32 v35, 0
s_and_saveexec_b64 s[6:7], vcc
s_xor_b64 s[6:7], exec, s[6:7]
; mask branch BB6_190
BB6_189:
v_cmp_eq_u32_e32 vcc, 2, v1
v_cndmask_b32_e64 v35, 0, -1, vcc
BB6_190: ; %Flow1205
s_or_saveexec_b64 s[6:7], s[6:7]
s_xor_b64 exec, exec, s[6:7]
; mask branch BB6_192
s_cbranch_execz BB6_192
BB6_191: ; %.thread85.i508
s_mov_b32 m0, -1
ds_read_b32 v35, v10 offset:64
ds_read_b32 v36, v10
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v35, v35, v36
ds_write_b32 v10, v35
ds_read_b32 v27, v27 offset:64
s_waitcnt lgkmcnt(0)
ds_read_b32 v35, v10 offset:256
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v27, v27, v35
ds_write_b32 v10, v27 offset:256
s_waitcnt lgkmcnt(0)
ds_read_b32 v27, v31 offset:64
ds_read_b32 v31, v10 offset:512
v_mov_b32_e32 v35, -1
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v27, v27, v31
ds_write_b32 v10, v27 offset:512
s_waitcnt lgkmcnt(0)
BB6_192: ; %Flow1206
s_or_b64 exec, exec, s[6:7]
v_cmp_ne_u32_e32 vcc, 0, v35
s_and_saveexec_b64 s[6:7], vcc
s_xor_b64 s[6:7], exec, s[6:7]
; mask branch BB6_197
s_cbranch_execz BB6_197
BB6_193:
v_or_b32_e32 v27, 8, v9
v_add_i32_e32 v27, vcc, v27, v2
v_mul_lo_i32 v31, v27, 3
v_mov_b32_e32 v27, 0xe0
v_mad_i32_i24 v27, v27, v1, v10
s_mov_b32 m0, -1
ds_read_b32 v35, v27
ds_read_b32 v27, v27 offset:32
s_mov_b32 s11, 0xf000
s_mov_b32 s10, 0
s_mov_b64 s[8:9], s[28:29]
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v27, v35, v27
v_add_i32_e32 v35, vcc, v1, v31
v_ashrrev_i32_e32 v36, 31, v35
v_lshl_b64 v[37:38], v[35:36], 2
v_add_i32_e32 v35, vcc, s28, v37
v_mov_b32_e32 v31, s29
v_addc_u32_e32 v36, vcc, v38, v31, vcc
buffer_load_dword v38, v[37:38], s[8:11], 0 addr64
s_mov_b64 s[8:9], 0
s_mov_b64 s[12:13], s[8:9]
s_waitcnt vmcnt(0)
BB6_194: ; =>This Inner Loop Header: Depth=1
v_add_f32_e32 v37, v27, v38
v_mov_b32_e32 v40, v38
v_mov_b32_e32 v39, v37
buffer_atomic_cmpswap v[39:40], v[35:36], s[8:11], 0 addr64 glc
v_mov_b32_e32 v31, -1
v_mov_b32_e32 v31, 0xf000
s_waitcnt vmcnt(0) expcnt(0)
v_cmp_eq_u32_e32 vcc, v39, v38
s_or_b64 s[12:13], vcc, s[12:13]
v_mov_b32_e32 v38, v39
s_andn2_b64 exec, exec, s[12:13]
s_cbranch_execnz BB6_194
; BB#195: ; %atomicAdd_g_f.exit.i496
s_or_b64 exec, exec, s[12:13]
s_and_b64 s[8:9], exec, s[2:3]
v_cndmask_b32_e64 v31, 0, 1, s[8:9]
v_cmp_ne_u32_e32 vcc, 1, v31
s_and_b64 vcc, exec, vcc
s_cbranch_vccnz BB6_197
; BB#196:
v_add_f32_e32 v3, v27, v3
BB6_197: ; %Flow1207
s_or_b64 exec, exec, s[6:7]
BB6_198: ; %reduce_force_i_pow2.exit510
s_or_b64 exec, exec, s[4:5]
s_mov_b32 m0, -1
s_barrier
ds_write_b32 v6, v32
ds_write_b32 v7, v33
ds_write_b32 v12, v34
s_waitcnt lgkmcnt(0)
s_barrier
s_and_saveexec_b64 s[4:5], s[0:1]
s_xor_b64 s[4:5], exec, s[4:5]
; mask branch BB6_209
s_cbranch_execz BB6_209
BB6_199:
s_mov_b32 m0, -1
ds_read_b32 v27, v10 offset:128
ds_read_b32 v31, v10
v_add_i32_e32 v32, vcc, v11, v23
v_lshlrev_b32_e32 v32, 2, v32
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v27, v27, v31
ds_write_b32 v10, v27
s_waitcnt lgkmcnt(0)
v_add_i32_e32 v27, vcc, s15, v32
ds_read_b32 v31, v27 offset:128
ds_read_b32 v32, v10 offset:256
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v31, v31, v32
ds_write_b32 v10, v31 offset:256
s_waitcnt lgkmcnt(0)
v_add_i32_e32 v31, vcc, v11, v19
v_lshlrev_b32_e32 v31, 2, v31
v_add_i32_e32 v31, vcc, s15, v31
ds_read_b32 v32, v31 offset:128
ds_read_b32 v33, v10 offset:512
v_cmp_lt_i32_e32 vcc, 1, v1
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v32, v32, v33
ds_write_b32 v10, v32 offset:512
s_waitcnt lgkmcnt(0)
v_mov_b32_e32 v32, 0
s_and_saveexec_b64 s[6:7], vcc
s_xor_b64 s[6:7], exec, s[6:7]
; mask branch BB6_201
BB6_200:
v_cmp_eq_u32_e32 vcc, 2, v1
v_cndmask_b32_e64 v32, 0, -1, vcc
BB6_201: ; %Flow1202
s_or_saveexec_b64 s[6:7], s[6:7]
s_xor_b64 exec, exec, s[6:7]
; mask branch BB6_203
s_cbranch_execz BB6_203
BB6_202: ; %.thread85.i459
s_mov_b32 m0, -1
ds_read_b32 v32, v10 offset:64
ds_read_b32 v33, v10
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v32, v32, v33
ds_write_b32 v10, v32
ds_read_b32 v27, v27 offset:64
s_waitcnt lgkmcnt(0)
ds_read_b32 v32, v10 offset:256
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v27, v27, v32
ds_write_b32 v10, v27 offset:256
s_waitcnt lgkmcnt(0)
ds_read_b32 v27, v31 offset:64
ds_read_b32 v31, v10 offset:512
v_mov_b32_e32 v32, -1
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v27, v27, v31
ds_write_b32 v10, v27 offset:512
s_waitcnt lgkmcnt(0)
BB6_203: ; %Flow1203
s_or_b64 exec, exec, s[6:7]
v_cmp_ne_u32_e32 vcc, 0, v32
s_and_saveexec_b64 s[6:7], vcc
s_xor_b64 s[6:7], exec, s[6:7]
; mask branch BB6_208
s_cbranch_execz BB6_208
BB6_204:
v_or_b32_e32 v27, 16, v9
v_add_i32_e32 v27, vcc, v27, v2
v_mul_lo_i32 v31, v27, 3
v_mov_b32_e32 v27, 0xe0
v_mad_i32_i24 v27, v27, v1, v10
s_mov_b32 m0, -1
ds_read_b32 v32, v27
ds_read_b32 v27, v27 offset:32
v_add_i32_e32 v31, vcc, v1, v31
s_mov_b32 s11, 0xf000
s_mov_b32 s10, 0
s_mov_b64 s[8:9], s[28:29]
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v27, v32, v27
v_ashrrev_i32_e32 v32, 31, v31
v_lshl_b64 v[33:34], v[31:32], 2
v_add_i32_e32 v31, vcc, s28, v33
v_mov_b32_e32 v32, s29
v_addc_u32_e32 v32, vcc, v34, v32, vcc
buffer_load_dword v34, v[33:34], s[8:11], 0 addr64
s_mov_b64 s[8:9], 0
s_mov_b64 s[12:13], s[8:9]
s_waitcnt vmcnt(0)
BB6_205: ; =>This Inner Loop Header: Depth=1
v_mov_b32_e32 v33, -1
v_add_f32_e32 v33, v27, v34
v_mov_b32_e32 v36, v34
v_mov_b32_e32 v35, v33
buffer_atomic_cmpswap v[35:36], v[31:32], s[8:11], 0 addr64 glc
v_mov_b32_e32 v33, 0xf000
s_waitcnt vmcnt(0) expcnt(0)
v_cmp_eq_u32_e32 vcc, v35, v34
s_or_b64 s[12:13], vcc, s[12:13]
v_mov_b32_e32 v34, v35
s_andn2_b64 exec, exec, s[12:13]
s_cbranch_execnz BB6_205
; BB#206: ; %atomicAdd_g_f.exit.i447
s_or_b64 exec, exec, s[12:13]
s_and_b64 s[8:9], exec, s[2:3]
v_cndmask_b32_e64 v31, 0, 1, s[8:9]
v_cmp_ne_u32_e32 vcc, 1, v31
s_and_b64 vcc, exec, vcc
s_cbranch_vccnz BB6_208
; BB#207:
v_add_f32_e32 v3, v27, v3
BB6_208: ; %Flow1204
s_or_b64 exec, exec, s[6:7]
BB6_209: ; %reduce_force_i_pow2.exit461
s_or_b64 exec, exec, s[4:5]
s_mov_b32 m0, -1
s_barrier
ds_write_b32 v6, v28
ds_write_b32 v7, v29
ds_write_b32 v12, v30
s_waitcnt lgkmcnt(0)
s_barrier
s_and_saveexec_b64 s[4:5], s[0:1]
s_xor_b64 s[4:5], exec, s[4:5]
; mask branch BB6_220
s_cbranch_execz BB6_220
BB6_210:
s_mov_b32 m0, -1
ds_read_b32 v27, v10 offset:128
ds_read_b32 v28, v10
v_add_i32_e32 v29, vcc, v11, v23
v_lshlrev_b32_e32 v29, 2, v29
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v27, v27, v28
ds_write_b32 v10, v27
s_waitcnt lgkmcnt(0)
v_add_i32_e32 v27, vcc, s15, v29
ds_read_b32 v28, v27 offset:128
ds_read_b32 v29, v10 offset:256
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v28, v28, v29
ds_write_b32 v10, v28 offset:256
s_waitcnt lgkmcnt(0)
v_add_i32_e32 v28, vcc, v11, v19
v_lshlrev_b32_e32 v28, 2, v28
v_add_i32_e32 v28, vcc, s15, v28
ds_read_b32 v29, v28 offset:128
ds_read_b32 v30, v10 offset:512
v_cmp_lt_i32_e32 vcc, 1, v1
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v29, v29, v30
ds_write_b32 v10, v29 offset:512
s_waitcnt lgkmcnt(0)
v_mov_b32_e32 v29, 0
s_and_saveexec_b64 s[6:7], vcc
s_xor_b64 s[6:7], exec, s[6:7]
; mask branch BB6_212
BB6_211:
v_cmp_eq_u32_e32 vcc, 2, v1
v_cndmask_b32_e64 v29, 0, -1, vcc
BB6_212: ; %Flow1199
s_or_saveexec_b64 s[6:7], s[6:7]
s_xor_b64 exec, exec, s[6:7]
; mask branch BB6_214
s_cbranch_execz BB6_214
BB6_213: ; %.thread85.i410
s_mov_b32 m0, -1
ds_read_b32 v29, v10 offset:64
ds_read_b32 v30, v10
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v29, v29, v30
ds_write_b32 v10, v29
ds_read_b32 v27, v27 offset:64
s_waitcnt lgkmcnt(0)
ds_read_b32 v29, v10 offset:256
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v27, v27, v29
ds_write_b32 v10, v27 offset:256
s_waitcnt lgkmcnt(0)
ds_read_b32 v27, v28 offset:64
ds_read_b32 v28, v10 offset:512
v_mov_b32_e32 v29, -1
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v27, v27, v28
ds_write_b32 v10, v27 offset:512
s_waitcnt lgkmcnt(0)
BB6_214: ; %Flow1200
s_or_b64 exec, exec, s[6:7]
v_cmp_ne_u32_e32 vcc, 0, v29
s_and_saveexec_b64 s[6:7], vcc
s_xor_b64 s[6:7], exec, s[6:7]
; mask branch BB6_219
s_cbranch_execz BB6_219
BB6_215:
v_or_b32_e32 v27, 24, v9
v_add_i32_e32 v27, vcc, v27, v2
v_mul_lo_i32 v28, v27, 3
v_mov_b32_e32 v27, 0xe0
v_mad_i32_i24 v27, v27, v1, v10
s_mov_b32 m0, -1
ds_read_b32 v29, v27
ds_read_b32 v27, v27 offset:32
v_add_i32_e32 v28, vcc, v1, v28
s_mov_b32 s11, 0xf000
s_mov_b32 s10, 0
s_mov_b64 s[8:9], s[28:29]
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v27, v29, v27
v_ashrrev_i32_e32 v29, 31, v28
v_lshl_b64 v[30:31], v[28:29], 2
v_add_i32_e32 v28, vcc, s28, v30
v_mov_b32_e32 v29, s29
v_addc_u32_e32 v29, vcc, v31, v29, vcc
buffer_load_dword v31, v[30:31], s[8:11], 0 addr64
s_mov_b64 s[8:9], 0
s_mov_b64 s[12:13], s[8:9]
s_waitcnt vmcnt(0)
BB6_216: ; =>This Inner Loop Header: Depth=1
v_mov_b32_e32 v30, -1
v_add_f32_e32 v30, v27, v31
v_mov_b32_e32 v33, v31
v_mov_b32_e32 v32, v30
buffer_atomic_cmpswap v[32:33], v[28:29], s[8:11], 0 addr64 glc
v_mov_b32_e32 v30, 0xf000
s_waitcnt vmcnt(0) expcnt(0)
v_cmp_eq_u32_e32 vcc, v32, v31
s_or_b64 s[12:13], vcc, s[12:13]
v_mov_b32_e32 v31, v32
s_andn2_b64 exec, exec, s[12:13]
s_cbranch_execnz BB6_216
; BB#217: ; %atomicAdd_g_f.exit.i398
s_or_b64 exec, exec, s[12:13]
s_and_b64 s[8:9], exec, s[2:3]
v_cndmask_b32_e64 v28, 0, 1, s[8:9]
v_cmp_ne_u32_e32 vcc, 1, v28
s_and_b64 vcc, exec, vcc
s_cbranch_vccnz BB6_219
; BB#218:
v_add_f32_e32 v3, v27, v3
BB6_219: ; %Flow1201
s_or_b64 exec, exec, s[6:7]
BB6_220: ; %reduce_force_i_pow2.exit412
s_or_b64 exec, exec, s[4:5]
s_mov_b32 m0, -1
s_barrier
ds_write_b32 v6, v24
ds_write_b32 v7, v25
ds_write_b32 v12, v26
s_waitcnt lgkmcnt(0)
s_barrier
s_and_saveexec_b64 s[4:5], s[0:1]
s_xor_b64 s[4:5], exec, s[4:5]
; mask branch BB6_231
s_cbranch_execz BB6_231
BB6_221:
s_mov_b32 m0, -1
ds_read_b32 v24, v10 offset:128
ds_read_b32 v25, v10
v_add_i32_e32 v26, vcc, v11, v23
v_lshlrev_b32_e32 v26, 2, v26
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v24, v24, v25
ds_write_b32 v10, v24
s_waitcnt lgkmcnt(0)
v_add_i32_e32 v24, vcc, s15, v26
ds_read_b32 v25, v24 offset:128
ds_read_b32 v26, v10 offset:256
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v25, v25, v26
ds_write_b32 v10, v25 offset:256
s_waitcnt lgkmcnt(0)
v_add_i32_e32 v25, vcc, v11, v19
v_lshlrev_b32_e32 v25, 2, v25
v_add_i32_e32 v25, vcc, s15, v25
ds_read_b32 v26, v25 offset:128
ds_read_b32 v27, v10 offset:512
v_cmp_lt_i32_e32 vcc, 1, v1
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v26, v26, v27
ds_write_b32 v10, v26 offset:512
s_waitcnt lgkmcnt(0)
v_mov_b32_e32 v26, 0
s_and_saveexec_b64 s[6:7], vcc
s_xor_b64 s[6:7], exec, s[6:7]
; mask branch BB6_223
BB6_222:
v_cmp_eq_u32_e32 vcc, 2, v1
v_cndmask_b32_e64 v26, 0, -1, vcc
BB6_223: ; %Flow1196
s_or_saveexec_b64 s[6:7], s[6:7]
s_xor_b64 exec, exec, s[6:7]
; mask branch BB6_225
s_cbranch_execz BB6_225
BB6_224: ; %.thread85.i361
s_mov_b32 m0, -1
ds_read_b32 v26, v10 offset:64
ds_read_b32 v27, v10
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v26, v26, v27
ds_write_b32 v10, v26
ds_read_b32 v24, v24 offset:64
s_waitcnt lgkmcnt(0)
ds_read_b32 v26, v10 offset:256
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v24, v24, v26
ds_write_b32 v10, v24 offset:256
s_waitcnt lgkmcnt(0)
ds_read_b32 v24, v25 offset:64
ds_read_b32 v25, v10 offset:512
v_mov_b32_e32 v26, -1
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v24, v24, v25
ds_write_b32 v10, v24 offset:512
s_waitcnt lgkmcnt(0)
BB6_225: ; %Flow1197
s_or_b64 exec, exec, s[6:7]
v_cmp_ne_u32_e32 vcc, 0, v26
s_and_saveexec_b64 s[6:7], vcc
s_xor_b64 s[6:7], exec, s[6:7]
; mask branch BB6_230
s_cbranch_execz BB6_230
BB6_226:
v_or_b32_e32 v24, 32, v9
v_add_i32_e32 v24, vcc, v24, v2
v_mul_lo_i32 v25, v24, 3
v_mov_b32_e32 v24, 0xe0
v_mad_i32_i24 v24, v24, v1, v10
s_mov_b32 m0, -1
ds_read_b32 v26, v24
ds_read_b32 v24, v24 offset:32
v_add_i32_e32 v25, vcc, v1, v25
s_mov_b32 s11, 0xf000
s_mov_b32 s10, 0
s_mov_b64 s[8:9], s[28:29]
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v24, v26, v24
v_ashrrev_i32_e32 v26, 31, v25
v_lshl_b64 v[27:28], v[25:26], 2
v_add_i32_e32 v25, vcc, s28, v27
v_mov_b32_e32 v26, s29
v_addc_u32_e32 v26, vcc, v28, v26, vcc
buffer_load_dword v28, v[27:28], s[8:11], 0 addr64
s_mov_b64 s[8:9], 0
s_mov_b64 s[12:13], s[8:9]
s_waitcnt vmcnt(0)
BB6_227: ; =>This Inner Loop Header: Depth=1
v_mov_b32_e32 v27, -1
v_add_f32_e32 v27, v24, v28
v_mov_b32_e32 v30, v28
v_mov_b32_e32 v29, v27
buffer_atomic_cmpswap v[29:30], v[25:26], s[8:11], 0 addr64 glc
v_mov_b32_e32 v27, 0xf000
s_waitcnt vmcnt(0) expcnt(0)
v_cmp_eq_u32_e32 vcc, v29, v28
s_or_b64 s[12:13], vcc, s[12:13]
v_mov_b32_e32 v28, v29
s_andn2_b64 exec, exec, s[12:13]
s_cbranch_execnz BB6_227
; BB#228: ; %atomicAdd_g_f.exit.i349
s_or_b64 exec, exec, s[12:13]
s_and_b64 s[8:9], exec, s[2:3]
v_cndmask_b32_e64 v25, 0, 1, s[8:9]
v_cmp_ne_u32_e32 vcc, 1, v25
s_and_b64 vcc, exec, vcc
s_cbranch_vccnz BB6_230
; BB#229:
v_add_f32_e32 v3, v24, v3
BB6_230: ; %Flow1198
s_or_b64 exec, exec, s[6:7]
BB6_231: ; %reduce_force_i_pow2.exit363
s_or_b64 exec, exec, s[4:5]
s_mov_b32 m0, -1
s_barrier
ds_write_b32 v6, v20
ds_write_b32 v7, v21
ds_write_b32 v12, v22
s_waitcnt lgkmcnt(0)
s_barrier
s_and_saveexec_b64 s[4:5], s[0:1]
s_xor_b64 s[4:5], exec, s[4:5]
; mask branch BB6_242
s_cbranch_execz BB6_242
BB6_232:
s_mov_b32 m0, -1
ds_read_b32 v20, v10 offset:128
ds_read_b32 v21, v10
v_add_i32_e32 v22, vcc, v11, v23
v_lshlrev_b32_e32 v22, 2, v22
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v20, v20, v21
ds_write_b32 v10, v20
s_waitcnt lgkmcnt(0)
v_add_i32_e32 v20, vcc, s15, v22
ds_read_b32 v21, v20 offset:128
ds_read_b32 v22, v10 offset:256
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v21, v21, v22
ds_write_b32 v10, v21 offset:256
s_waitcnt lgkmcnt(0)
v_add_i32_e32 v21, vcc, v11, v19
v_lshlrev_b32_e32 v21, 2, v21
v_add_i32_e32 v21, vcc, s15, v21
ds_read_b32 v22, v21 offset:128
ds_read_b32 v24, v10 offset:512
v_cmp_lt_i32_e32 vcc, 1, v1
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v22, v22, v24
ds_write_b32 v10, v22 offset:512
s_waitcnt lgkmcnt(0)
v_mov_b32_e32 v22, 0
s_and_saveexec_b64 s[6:7], vcc
s_xor_b64 s[6:7], exec, s[6:7]
; mask branch BB6_234
BB6_233:
v_cmp_eq_u32_e32 vcc, 2, v1
v_cndmask_b32_e64 v22, 0, -1, vcc
BB6_234: ; %Flow1193
s_or_saveexec_b64 s[6:7], s[6:7]
s_xor_b64 exec, exec, s[6:7]
; mask branch BB6_236
s_cbranch_execz BB6_236
BB6_235: ; %.thread85.i312
s_mov_b32 m0, -1
ds_read_b32 v22, v10 offset:64
ds_read_b32 v24, v10
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v22, v22, v24
ds_write_b32 v10, v22
ds_read_b32 v20, v20 offset:64
s_waitcnt lgkmcnt(0)
ds_read_b32 v22, v10 offset:256
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v20, v20, v22
ds_write_b32 v10, v20 offset:256
s_waitcnt lgkmcnt(0)
ds_read_b32 v20, v21 offset:64
ds_read_b32 v21, v10 offset:512
v_mov_b32_e32 v22, -1
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v20, v20, v21
ds_write_b32 v10, v20 offset:512
s_waitcnt lgkmcnt(0)
BB6_236: ; %Flow1194
s_or_b64 exec, exec, s[6:7]
v_cmp_ne_u32_e32 vcc, 0, v22
s_and_saveexec_b64 s[6:7], vcc
s_xor_b64 s[6:7], exec, s[6:7]
; mask branch BB6_241
s_cbranch_execz BB6_241
BB6_237:
v_or_b32_e32 v20, 40, v9
v_add_i32_e32 v20, vcc, v20, v2
v_mul_lo_i32 v21, v20, 3
v_mov_b32_e32 v20, 0xe0
v_mad_i32_i24 v20, v20, v1, v10
s_mov_b32 m0, -1
ds_read_b32 v22, v20
ds_read_b32 v20, v20 offset:32
v_add_i32_e32 v21, vcc, v1, v21
v_mov_b32_e32 v25, s29
s_mov_b32 s11, 0xf000
s_mov_b32 s10, 0
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v20, v22, v20
v_ashrrev_i32_e32 v22, 31, v21
v_lshl_b64 v[21:22], v[21:22], 2
v_add_i32_e32 v24, vcc, s28, v21
s_mov_b64 s[8:9], s[28:29]
v_addc_u32_e32 v25, vcc, v22, v25, vcc
buffer_load_dword v22, v[21:22], s[8:11], 0 addr64
s_mov_b64 s[8:9], 0
s_mov_b64 s[12:13], s[8:9]
s_waitcnt vmcnt(0)
BB6_238: ; =>This Inner Loop Header: Depth=1
v_mov_b32_e32 v21, -1
v_add_f32_e32 v21, v20, v22
v_mov_b32_e32 v27, v22
v_mov_b32_e32 v26, v21
buffer_atomic_cmpswap v[26:27], v[24:25], s[8:11], 0 addr64 glc
v_mov_b32_e32 v21, 0xf000
s_waitcnt vmcnt(0) expcnt(0)
v_cmp_eq_u32_e32 vcc, v26, v22
s_or_b64 s[12:13], vcc, s[12:13]
v_mov_b32_e32 v22, v26
s_andn2_b64 exec, exec, s[12:13]
s_cbranch_execnz BB6_238
; BB#239: ; %atomicAdd_g_f.exit.i300
s_or_b64 exec, exec, s[12:13]
s_and_b64 s[8:9], exec, s[2:3]
v_cndmask_b32_e64 v21, 0, 1, s[8:9]
v_cmp_ne_u32_e32 vcc, 1, v21
s_and_b64 vcc, exec, vcc
s_cbranch_vccnz BB6_241
; BB#240:
v_add_f32_e32 v3, v20, v3
BB6_241: ; %Flow1195
s_or_b64 exec, exec, s[6:7]
BB6_242: ; %reduce_force_i_pow2.exit314
s_or_b64 exec, exec, s[4:5]
s_mov_b32 m0, -1
s_barrier
ds_write_b32 v6, v16
ds_write_b32 v7, v17
ds_write_b32 v12, v18
s_waitcnt lgkmcnt(0)
s_barrier
s_and_saveexec_b64 s[4:5], s[0:1]
s_xor_b64 s[4:5], exec, s[4:5]
; mask branch BB6_253
s_cbranch_execz BB6_253
BB6_243:
s_mov_b32 m0, -1
ds_read_b32 v16, v10 offset:128
ds_read_b32 v17, v10
v_add_i32_e32 v18, vcc, v11, v23
v_lshlrev_b32_e32 v18, 2, v18
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v16, v16, v17
ds_write_b32 v10, v16
s_waitcnt lgkmcnt(0)
v_add_i32_e32 v16, vcc, s15, v18
ds_read_b32 v17, v16 offset:128
ds_read_b32 v18, v10 offset:256
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v17, v17, v18
ds_write_b32 v10, v17 offset:256
s_waitcnt lgkmcnt(0)
v_add_i32_e32 v17, vcc, v11, v19
v_lshlrev_b32_e32 v17, 2, v17
v_add_i32_e32 v17, vcc, s15, v17
ds_read_b32 v18, v17 offset:128
ds_read_b32 v20, v10 offset:512
v_cmp_lt_i32_e32 vcc, 1, v1
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v18, v18, v20
ds_write_b32 v10, v18 offset:512
s_waitcnt lgkmcnt(0)
v_mov_b32_e32 v18, 0
s_and_saveexec_b64 s[6:7], vcc
s_xor_b64 s[6:7], exec, s[6:7]
; mask branch BB6_245
BB6_244:
v_cmp_eq_u32_e32 vcc, 2, v1
v_cndmask_b32_e64 v18, 0, -1, vcc
BB6_245: ; %Flow1190
s_or_saveexec_b64 s[6:7], s[6:7]
s_xor_b64 exec, exec, s[6:7]
; mask branch BB6_247
s_cbranch_execz BB6_247
BB6_246: ; %.thread85.i263
s_mov_b32 m0, -1
ds_read_b32 v18, v10 offset:64
ds_read_b32 v20, v10
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v18, v18, v20
ds_write_b32 v10, v18
ds_read_b32 v16, v16 offset:64
s_waitcnt lgkmcnt(0)
ds_read_b32 v18, v10 offset:256
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v16, v16, v18
ds_write_b32 v10, v16 offset:256
s_waitcnt lgkmcnt(0)
ds_read_b32 v16, v17 offset:64
ds_read_b32 v17, v10 offset:512
v_mov_b32_e32 v18, -1
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v16, v16, v17
ds_write_b32 v10, v16 offset:512
s_waitcnt lgkmcnt(0)
BB6_247: ; %Flow1191
s_or_b64 exec, exec, s[6:7]
v_cmp_ne_u32_e32 vcc, 0, v18
s_and_saveexec_b64 s[6:7], vcc
s_xor_b64 s[6:7], exec, s[6:7]
; mask branch BB6_252
s_cbranch_execz BB6_252
BB6_248:
v_or_b32_e32 v16, 48, v9
v_add_i32_e32 v16, vcc, v16, v2
v_mul_lo_i32 v17, v16, 3
v_mov_b32_e32 v16, 0xe0
v_mad_i32_i24 v16, v16, v1, v10
s_mov_b32 m0, -1
ds_read_b32 v18, v16
ds_read_b32 v16, v16 offset:32
v_add_i32_e32 v17, vcc, v1, v17
v_mov_b32_e32 v20, s29
s_mov_b32 s11, 0xf000
s_mov_b32 s10, 0
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v16, v18, v16
v_ashrrev_i32_e32 v18, 31, v17
v_lshl_b64 v[17:18], v[17:18], 2
v_add_i32_e32 v24, vcc, s28, v17
s_mov_b64 s[8:9], s[28:29]
v_addc_u32_e32 v25, vcc, v18, v20, vcc
buffer_load_dword v18, v[17:18], s[8:11], 0 addr64
s_mov_b64 s[8:9], 0
s_mov_b64 s[12:13], s[8:9]
s_waitcnt vmcnt(0)
BB6_249: ; =>This Inner Loop Header: Depth=1
v_mov_b32_e32 v17, -1
v_add_f32_e32 v17, v16, v18
v_mov_b32_e32 v21, v18
v_mov_b32_e32 v20, v17
buffer_atomic_cmpswap v[20:21], v[24:25], s[8:11], 0 addr64 glc
v_mov_b32_e32 v17, 0xf000
s_waitcnt vmcnt(0) expcnt(0)
v_cmp_eq_u32_e32 vcc, v20, v18
s_or_b64 s[12:13], vcc, s[12:13]
v_mov_b32_e32 v18, v20
s_andn2_b64 exec, exec, s[12:13]
s_cbranch_execnz BB6_249
; BB#250: ; %atomicAdd_g_f.exit.i251
s_or_b64 exec, exec, s[12:13]
s_and_b64 s[8:9], exec, s[2:3]
v_cndmask_b32_e64 v17, 0, 1, s[8:9]
v_cmp_ne_u32_e32 vcc, 1, v17
s_and_b64 vcc, exec, vcc
s_cbranch_vccnz BB6_252
; BB#251:
v_add_f32_e32 v3, v16, v3
BB6_252: ; %Flow1192
s_or_b64 exec, exec, s[6:7]
BB6_253: ; %reduce_force_i_pow2.exit265
s_or_b64 exec, exec, s[4:5]
s_mov_b32 m0, -1
s_barrier
ds_write_b32 v6, v13
ds_write_b32 v7, v14
ds_write_b32 v12, v15
s_waitcnt lgkmcnt(0)
s_barrier
s_and_saveexec_b64 s[4:5], s[0:1]
s_xor_b64 s[0:1], exec, s[4:5]
; mask branch BB6_264
s_cbranch_execz BB6_264
BB6_254:
s_mov_b32 m0, -1
ds_read_b32 v12, v10 offset:128
ds_read_b32 v13, v10
v_add_i32_e32 v14, vcc, v11, v23
v_lshlrev_b32_e32 v14, 2, v14
v_add_i32_e32 v11, vcc, v11, v19
v_lshlrev_b32_e32 v11, 2, v11
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v12, v12, v13
ds_write_b32 v10, v12
s_waitcnt lgkmcnt(0)
v_add_i32_e32 v12, vcc, s15, v14
ds_read_b32 v13, v12 offset:128
ds_read_b32 v14, v10 offset:256
v_add_i32_e32 v11, vcc, s15, v11
v_cmp_lt_i32_e32 vcc, 1, v1
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v13, v13, v14
ds_write_b32 v10, v13 offset:256
s_waitcnt lgkmcnt(0)
ds_read_b32 v13, v11 offset:128
ds_read_b32 v14, v10 offset:512
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v13, v13, v14
ds_write_b32 v10, v13 offset:512
s_waitcnt lgkmcnt(0)
v_mov_b32_e32 v13, 0
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[4:5], exec, s[4:5]
; mask branch BB6_256
BB6_255:
v_cmp_eq_u32_e32 vcc, 2, v1
v_cndmask_b32_e64 v13, 0, -1, vcc
BB6_256: ; %Flow1187
s_or_saveexec_b64 s[4:5], s[4:5]
s_xor_b64 exec, exec, s[4:5]
; mask branch BB6_258
s_cbranch_execz BB6_258
BB6_257: ; %.thread85.i214
s_mov_b32 m0, -1
ds_read_b32 v13, v10 offset:64
ds_read_b32 v14, v10
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v13, v13, v14
ds_write_b32 v10, v13
ds_read_b32 v12, v12 offset:64
s_waitcnt lgkmcnt(0)
ds_read_b32 v13, v10 offset:256
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v12, v12, v13
ds_write_b32 v10, v12 offset:256
ds_read_b32 v11, v11 offset:64
s_waitcnt lgkmcnt(0)
ds_read_b32 v12, v10 offset:512
v_mov_b32_e32 v13, -1
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v11, v11, v12
ds_write_b32 v10, v11 offset:512
s_waitcnt lgkmcnt(0)
BB6_258: ; %Flow1188
s_or_b64 exec, exec, s[4:5]
v_cmp_ne_u32_e32 vcc, 0, v13
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[4:5], exec, s[4:5]
; mask branch BB6_263
s_cbranch_execz BB6_263
BB6_259:
v_or_b32_e32 v9, 56, v9
v_add_i32_e32 v2, vcc, v9, v2
v_mul_lo_i32 v9, v2, 3
v_mov_b32_e32 v2, 0xe0
v_mad_i32_i24 v2, v2, v1, v10
s_mov_b32 m0, -1
ds_read_b32 v10, v2
ds_read_b32 v2, v2 offset:32
v_add_i32_e32 v9, vcc, v1, v9
s_mov_b32 s31, 0xf000
s_mov_b32 s30, 0
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v2, v10, v2
v_ashrrev_i32_e32 v10, 31, v9
v_lshl_b64 v[11:12], v[9:10], 2
v_add_i32_e32 v9, vcc, s28, v11
v_mov_b32_e32 v10, s29
v_addc_u32_e32 v10, vcc, v12, v10, vcc
buffer_load_dword v12, v[11:12], s[28:31], 0 addr64
s_mov_b64 s[28:29], 0
s_mov_b64 s[6:7], s[28:29]
s_waitcnt vmcnt(0)
BB6_260: ; =>This Inner Loop Header: Depth=1
v_mov_b32_e32 v11, -1
v_add_f32_e32 v11, v2, v12
v_mov_b32_e32 v14, v12
v_mov_b32_e32 v13, v11
buffer_atomic_cmpswap v[13:14], v[9:10], s[28:31], 0 addr64 glc
v_mov_b32_e32 v11, 0xf000
s_waitcnt vmcnt(0) expcnt(0)
v_cmp_eq_u32_e32 vcc, v13, v12
s_or_b64 s[6:7], vcc, s[6:7]
v_mov_b32_e32 v12, v13
s_andn2_b64 exec, exec, s[6:7]
s_cbranch_execnz BB6_260
; BB#261: ; %atomicAdd_g_f.exit.i202
s_or_b64 exec, exec, s[6:7]
s_and_b64 s[6:7], exec, s[2:3]
v_cndmask_b32_e64 v9, 0, 1, s[6:7]
v_cmp_ne_u32_e32 vcc, 1, v9
s_and_b64 vcc, exec, vcc
s_cbranch_vccnz BB6_263
; BB#262:
v_add_f32_e32 v3, v2, v3
BB6_263: ; %Flow1189
s_or_b64 exec, exec, s[4:5]
BB6_264: ; %reduce_force_i_pow2.exit216
s_or_b64 exec, exec, s[0:1]
s_barrier
v_cmp_gt_u32_e32 vcc, 3, v1
s_and_b64 s[0:1], exec, s[2:3]
s_and_b64 s[0:1], vcc, s[0:1]
s_and_saveexec_b64 s[2:3], s[0:1]
s_xor_b64 s[0:1], exec, s[2:3]
; mask branch BB6_268
s_cbranch_execz BB6_268
BB6_265:
v_add_i32_e32 v1, vcc, v4, v1
v_mov_b32_e32 v2, 0
v_lshl_b64 v[1:2], v[1:2], 2
v_add_i32_e32 v9, vcc, s24, v1
v_mov_b32_e32 v4, s25
s_mov_b32 s27, 0xf000
s_mov_b32 s26, 0
v_addc_u32_e32 v10, vcc, v2, v4, vcc
buffer_load_dword v2, v[1:2], s[24:27], 0 addr64
s_mov_b64 s[24:25], 0
s_mov_b64 s[2:3], s[24:25]
s_waitcnt vmcnt(0)
BB6_266: ; =>This Inner Loop Header: Depth=1
v_mov_b32_e32 v1, -1
v_add_f32_e32 v1, v3, v2
v_mov_b32_e32 v12, v2
v_mov_b32_e32 v11, v1
buffer_atomic_cmpswap v[11:12], v[9:10], s[24:27], 0 addr64 glc
v_mov_b32_e32 v1, 0xf000
s_waitcnt vmcnt(0) expcnt(0)
v_cmp_eq_u32_e32 vcc, v11, v2
s_or_b64 s[2:3], vcc, s[2:3]
v_mov_b32_e32 v2, v11
s_andn2_b64 exec, exec, s[2:3]
s_cbranch_execnz BB6_266
; BB#267: ; %Flow1185
s_or_b64 exec, exec, s[2:3]
BB6_268: ; %Flow1186
s_or_b64 exec, exec, s[0:1]
v_and_b32_e32 v1, 0x7ffffdf, v0
s_mov_b32 m0, -1
v_cmp_gt_u32_e32 vcc, 16, v1
ds_write_b32 v6, v8
ds_write_b32 v7, v5
s_and_saveexec_b64 s[0:1], vcc
s_xor_b64 s[0:1], exec, s[0:1]
s_waitcnt lgkmcnt(0)
; mask branch BB6_282
s_cbranch_execz BB6_282
BB6_269:
v_and_b32_e32 v0, 32, v0
v_lshlrev_b32_e32 v0, 2, v0
v_add_i32_e32 v0, vcc, s14, v0
v_add_i32_e32 v0, vcc, 0x620, v0
v_lshlrev_b32_e32 v2, 2, v1
v_add_i32_e32 v2, vcc, v2, v0
s_mov_b32 m0, -1
ds_read_b32 v3, v2 offset:64
ds_read_b32 v4, v2
v_cmp_gt_u32_e32 vcc, 8, v1
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v3, v3, v4
ds_write_b32 v2, v3
s_waitcnt lgkmcnt(0)
ds_read_b32 v3, v2 offset:320
ds_read_b32 v4, v2 offset:256
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v3, v3, v4
ds_write_b32 v2, v3 offset:256
s_and_saveexec_b64 s[2:3], vcc
s_xor_b64 s[2:3], exec, s[2:3]
s_waitcnt lgkmcnt(0)
; mask branch BB6_281
s_cbranch_execz BB6_281
BB6_270:
s_mov_b32 m0, -1
ds_read_b32 v3, v2 offset:32
ds_read_b32 v4, v2
v_cmp_gt_u32_e32 vcc, 4, v1
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v3, v3, v4
ds_write_b32 v2, v3
s_waitcnt lgkmcnt(0)
ds_read_b32 v3, v2 offset:288
ds_read_b32 v4, v2 offset:256
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v3, v3, v4
ds_write_b32 v2, v3 offset:256
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[4:5], exec, s[4:5]
s_waitcnt lgkmcnt(0)
; mask branch BB6_280
s_cbranch_execz BB6_280
BB6_271:
s_mov_b32 m0, -1
ds_read_b32 v3, v2 offset:16
ds_read_b32 v4, v2
v_cmp_gt_u32_e32 vcc, 2, v1
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v3, v3, v4
ds_write_b32 v2, v3
s_waitcnt lgkmcnt(0)
ds_read_b32 v3, v2 offset:272
ds_read_b32 v4, v2 offset:256
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v3, v3, v4
ds_write_b32 v2, v3 offset:256
s_and_saveexec_b64 s[6:7], vcc
s_xor_b64 s[6:7], exec, s[6:7]
s_waitcnt lgkmcnt(0)
; mask branch BB6_279
s_cbranch_execz BB6_279
BB6_272:
s_mov_b32 m0, -1
ds_read_b32 v3, v2 offset:8
ds_read_b32 v4, v2
v_cmp_eq_u32_e32 vcc, 0, v1
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v3, v3, v4
ds_write_b32 v2, v3
s_waitcnt lgkmcnt(0)
ds_read_b32 v3, v2 offset:264
ds_read_b32 v4, v2 offset:256
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v3, v3, v4
ds_write_b32 v2, v3 offset:256
s_and_saveexec_b64 s[8:9], vcc
s_xor_b64 s[8:9], exec, s[8:9]
s_waitcnt lgkmcnt(0)
; mask branch BB6_278
s_cbranch_execz BB6_278
BB6_273:
s_mov_b32 m0, -1
s_mov_b32 s23, 0xf000
s_mov_b32 s22, -1
ds_read_b32 v2, v0
ds_read_b32 v3, v0 offset:4
ds_read_b32 v1, v0 offset:256
ds_read_b32 v0, v0 offset:260
buffer_load_dword v4, off, s[20:23], 0
s_mov_b64 s[10:11], 0
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v2, v2, v3
s_waitcnt vmcnt(0)
BB6_274: ; =>This Inner Loop Header: Depth=1
v_add_f32_e32 v3, v4, v2
v_mov_b32_e32 v6, v4
v_mov_b32_e32 v5, v3
buffer_atomic_cmpswap v[5:6], off, s[20:23], 0 glc
s_waitcnt vmcnt(0) expcnt(0)
v_cmp_eq_u32_e32 vcc, v5, v4
s_or_b64 s[10:11], vcc, s[10:11]
v_mov_b32_e32 v4, v5
s_andn2_b64 exec, exec, s[10:11]
s_cbranch_execnz BB6_274
; BB#275: ; %atomicAdd_g_f.exit.i161
s_or_b64 exec, exec, s[10:11]
s_mov_b32 s19, 0xf000
s_mov_b32 s18, -1
buffer_load_dword v2, off, s[16:19], 0
v_add_f32_e32 v0, v1, v0
s_mov_b64 s[10:11], 0
s_waitcnt vmcnt(0)
BB6_276: ; =>This Inner Loop Header: Depth=1
v_add_f32_e32 v1, v2, v0
v_mov_b32_e32 v4, v2
v_mov_b32_e32 v3, v1
buffer_atomic_cmpswap v[3:4], off, s[16:19], 0 glc
s_waitcnt vmcnt(0) expcnt(0)
v_cmp_eq_u32_e32 vcc, v3, v2
s_or_b64 s[10:11], vcc, s[10:11]
v_mov_b32_e32 v2, v3
s_andn2_b64 exec, exec, s[10:11]
s_cbranch_execnz BB6_276
; BB#277: ; %Flow
s_or_b64 exec, exec, s[10:11]
BB6_278: ; %Flow1181
s_or_b64 exec, exec, s[8:9]
BB6_279: ; %Flow1182
s_or_b64 exec, exec, s[6:7]
BB6_280: ; %Flow1183
s_or_b64 exec, exec, s[4:5]
BB6_281: ; %Flow1184
s_or_b64 exec, exec, s[2:3]
BB6_282: ; %reduce_energy_pow2.exit
s_or_b64 exec, exec, s[0:1]
s_endpgm
.Lfunc_end6:
.size nbnxn_kernel_ElecEw_VdwLJCombGeom_VF_opencl, .Lfunc_end6-nbnxn_kernel_ElecEw_VdwLJCombGeom_VF_opencl
.section .AMDGPU.csdata
; Kernel info:
; codeLenInByte = 62464
; NumSgprs: 58
; NumVgprs: 104
; FloatMode: 192
; IeeeMode: 1
; ScratchSize: 0
; LDSByteSize: 0 bytes/workgroup (compile time only)
; SGPRBlocks: 7
; VGPRBlocks: 25
; NumSGPRsForWavesPerEU: 58
; NumVGPRsForWavesPerEU: 104
; ReservedVGPRFirst: 0
; ReservedVGPRCount: 0
; COMPUTE_PGM_RSRC2:USER_SGPR: 8
; COMPUTE_PGM_RSRC2:TGID_X_EN: 1
; COMPUTE_PGM_RSRC2:TGID_Y_EN: 0
; COMPUTE_PGM_RSRC2:TGID_Z_EN: 0
; COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 1
.section .AMDGPU.config
.long 47176
.long 11272666
.long 47180
.long 2192
.long 47200
.long 0
.long 4
.long 0
.long 8
.long 0
.text
.globl nbnxn_kernel_ElecEwTwinCut_VdwLJCombGeom_VF_opencl
.p2align 8
.type nbnxn_kernel_ElecEwTwinCut_VdwLJCombGeom_VF_opencl,@function
.amdgpu_hsa_kernel nbnxn_kernel_ElecEwTwinCut_VdwLJCombGeom_VF_opencl
nbnxn_kernel_ElecEwTwinCut_VdwLJCombGeom_VF_opencl: ; @nbnxn_kernel_ElecEwTwinCut_VdwLJCombGeom_VF_opencl
.amd_kernel_code_t
amd_code_version_major = 1
amd_code_version_minor = 0
amd_machine_kind = 1
amd_machine_version_major = 7
amd_machine_version_minor = 0
amd_machine_version_stepping = 1
kernel_code_entry_byte_offset = 256
kernel_code_prefetch_byte_size = 0
max_scratch_backing_memory_byte_size = 0
granulated_workitem_vgpr_count = 26
granulated_wavefront_sgpr_count = 7
priority = 0
float_mode = 192
priv = 0
enable_dx10_clamp = 1
debug_mode = 0
enable_ieee_mode = 1
enable_sgpr_private_segment_wave_byte_offset = 0
user_sgpr_count = 8
enable_sgpr_workgroup_id_x = 1
enable_sgpr_workgroup_id_y = 0
enable_sgpr_workgroup_id_z = 0
enable_sgpr_workgroup_info = 0
enable_vgpr_workitem_id = 1
enable_exception_msb = 0
granulated_lds_size = 0
enable_exception = 0
enable_sgpr_private_segment_buffer = 1
enable_sgpr_dispatch_ptr = 1
enable_sgpr_queue_ptr = 0
enable_sgpr_kernarg_segment_ptr = 1
enable_sgpr_dispatch_id = 0
enable_sgpr_flat_scratch_init = 0
enable_sgpr_private_segment_size = 0
enable_sgpr_grid_workgroup_count_x = 0
enable_sgpr_grid_workgroup_count_y = 0
enable_sgpr_grid_workgroup_count_z = 0
enable_ordered_append_gds = 0
private_element_size = 1
is_ptr64 = 1
is_dynamic_callstack = 0
is_debug_enabled = 0
is_xnack_enabled = 0
workitem_private_segment_byte_size = 0
workgroup_group_segment_byte_size = 0
gds_segment_byte_size = 0
kernarg_segment_byte_size = 232
workgroup_fbarrier_count = 0
wavefront_sgpr_count = 60
workitem_vgpr_count = 107
reserved_vgpr_first = 0
reserved_vgpr_count = 0
reserved_sgpr_first = 0
reserved_sgpr_count = 0
debug_wavefront_private_segment_offset_sgpr = 0
debug_private_segment_buffer_sgpr = 0
kernarg_segment_alignment = 4
group_segment_alignment = 4
private_segment_alignment = 4
wavefront_size = 6
call_convention = -1
runtime_loader_kernel_symbol = 0
.end_amd_kernel_code_t
; BB#0:
s_load_dwordx2 s[20:21], s[6:7], 0x2c
s_mov_b32 s9, 0
s_lshl_b64 s[0:1], s[8:9], 4
v_mov_b32_e32 v4, s1
v_mov_b32_e32 v3, s0
s_mov_b32 s23, 0xf000
s_mov_b32 s22, s9
s_waitcnt lgkmcnt(0)
buffer_load_dwordx4 v[36:39], v[3:4], s[20:23], 0 addr64
v_mov_b32_e32 v2, v0
s_load_dwordx2 s[0:1], s[6:7], 0x24
s_load_dwordx2 s[32:33], s[6:7], 0x18
s_mov_b64 s[34:35], s[22:23]
s_mov_b64 s[2:3], s[22:23]
s_load_dword s14, s[6:7], 0x33
s_load_dwordx2 s[36:37], s[6:7], 0x22
s_mov_b32 m0, -1
s_mov_b64 s[38:39], s[22:23]
s_load_dword s18, s[6:7], 0x5
s_waitcnt vmcnt(0)
v_lshlrev_b32_e32 v41, 3, v36
v_mul_lo_i32 v4, v37, 3
v_add_i32_e32 v0, vcc, v1, v41
v_lshlrev_b32_e32 v0, 3, v0
v_add_i32_e32 v9, vcc, v2, v0
v_ashrrev_i32_e32 v10, 31, v9
v_ashrrev_i32_e32 v5, 31, v4
v_lshl_b64 v[11:12], v[4:5], 2
v_lshl_b64 v[6:7], v[9:10], 4
s_waitcnt lgkmcnt(0)
buffer_load_dwordx4 v[5:8], v[6:7], s[32:35], 0 addr64
buffer_load_dwordx2 v[13:14], v[11:12], s[0:3], 0 addr64
buffer_load_dword v0, v[11:12], s[0:3], 0 addr64 offset:8
s_load_dword s2, s[6:7], 0x2
v_lshlrev_b32_e32 v11, 3, v1
v_add_i32_e32 v40, vcc, v2, v11
s_load_dword s0, s[4:5], 0x1
s_add_i32 s4, s14, 0x420
s_waitcnt lgkmcnt(0)
s_and_b32 s0, s0, 0xffff
s_waitcnt vmcnt(1)
v_add_f32_e32 v15, v6, v14
v_add_f32_e32 v14, v5, v13
s_waitcnt vmcnt(0)
v_add_f32_e32 v5, v7, v0
v_lshlrev_b32_e32 v0, 4, v40
v_add_i32_e32 v3, vcc, s14, v0
v_mul_f32_e32 v6, s2, v8
ds_write2_b64 v3, v[14:15], v[5:6] offset1:1
s_waitcnt lgkmcnt(0)
v_lshl_b64 v[5:6], v[9:10], 3
buffer_load_dwordx2 v[5:6], v[5:6], s[36:39], 0 addr64
v_mad_u32_u24 v0, s0, v1, v2
v_lshlrev_b32_e32 v7, 3, v40
v_add_i32_e32 v7, vcc, s4, v7
v_or_b32_e32 v3, 32, v0
v_lshrrev_b32_e32 v46, 5, v0
v_cmp_eq_u32_e32 vcc, 32, v3
s_waitcnt vmcnt(0)
ds_write_b64 v7, v[5:6]
s_and_saveexec_b64 s[0:1], vcc
s_xor_b64 s[0:1], exec, s[0:1]
s_waitcnt lgkmcnt(0)
; mask branch BB7_2
BB7_1:
v_lshlrev_b32_e32 v3, 2, v46
v_add_i32_e32 v3, vcc, s14, v3
v_mov_b32_e32 v5, 0
s_mov_b32 m0, -1
ds_write_b32 v3, v5 offset:2336
s_waitcnt lgkmcnt(0)
BB7_2:
s_or_b64 exec, exec, s[0:1]
s_barrier
s_load_dwordx2 s[40:41], s[6:7], 0x2e
v_cmp_ne_u32_e32 vcc, 22, v37
s_and_b64 vcc, exec, vcc
v_mov_b32_e32 v3, 0
v_cmp_eq_u32_e64 s[0:1], 22, v37
s_waitcnt lgkmcnt(0)
s_mov_b64 vcc, vcc
s_cbranch_vccnz BB7_5
; BB#3:
v_ashrrev_i32_e32 v6, 31, v38
v_mov_b32_e32 v5, v38
s_mov_b32 s43, 0xf000
s_mov_b32 s42, 0
v_lshl_b64 v[5:6], v[5:6], 5
buffer_load_dword v5, v[5:6], s[40:43], 0 addr64
s_waitcnt vmcnt(0)
v_cmp_ne_u32_e32 vcc, v5, v41
s_and_b64 vcc, exec, vcc
v_mov_b32_e32 v5, v3
s_cbranch_vccnz BB7_6
; BB#4: ; %.preheader561.preheader
v_lshlrev_b32_e32 v5, 4, v2
v_add_i32_e32 v9, vcc, s14, v5
s_mov_b32 m0, -1
ds_read2_b64 v[5:8], v9 offset0:1 offset1:17
s_waitcnt lgkmcnt(0)
v_mov_b32_e32 v5, 0x41000000
ds_read2_b64 v[12:15], v9 offset0:33 offset1:49
v_mul_f32_e32 v5, s2, v5
ds_read2_b64 v[16:19], v9 offset0:65 offset1:81
v_mul_f32_e32 v10, v6, v6
v_mov_b32_e32 v6, 0x6f800000
v_cmp_lt_f32_e64 vcc, v6, |v5|
v_mov_b32_e32 v6, 0x2f800000
s_waitcnt lgkmcnt(0)
v_cndmask_b32_e32 v12, 1.0, v6, vcc
v_mac_f32_e32 v10, v8, v8
v_mul_f32_e32 v5, v12, v5
v_mac_f32_e32 v10, v13, v13
v_rcp_f32_e32 v13, v5
ds_read2_b64 v[5:8], v9 offset0:97 offset1:113
v_mac_f32_e32 v10, v15, v15
v_mac_f32_e32 v10, v17, v17
v_mac_f32_e32 v10, v19, v19
s_waitcnt lgkmcnt(0)
v_mac_f32_e32 v10, v6, v6
v_mac_f32_e32 v10, v8, v8
v_mul_f32_e32 v5, v13, v10
v_mov_b32_e32 v6, 0xbf106ebb
v_mul_f32_e32 v5, v5, v12
v_mul_f32_e32 v6, s18, v6
v_mul_f32_e32 v5, v5, v6
s_branch BB7_6
BB7_5:
v_mov_b32_e32 v5, v3
BB7_6: ; %.preheader560
s_load_dwordx2 s[28:29], s[6:7], 0x1a
v_cmp_lt_i32_e32 vcc, v38, v39
v_mov_b32_e32 v17, -1
s_and_b64 vcc, exec, vcc
s_waitcnt lgkmcnt(0)
s_mov_b64 vcc, vcc
s_cbranch_vccnz BB7_8
; BB#7: ; %.preheader560.._crit_edge_crit_edge
v_mov_b32_e32 v8, 0
v_lshlrev_b32_e32 v6, 2, v0
v_mov_b32_e32 v9, v8
v_mov_b32_e32 v10, v8
v_add_i32_e32 v7, vcc, s14, v6
v_mov_b32_e32 v51, v11
v_mov_b32_e32 v16, v11
v_add_i32_e32 v6, vcc, 0x620, v7
v_add_i32_e32 v12, vcc, 0x820, v7
v_add_i32_e32 v7, vcc, 0x720, v7
v_mov_b32_e32 v17, 0
v_mov_b32_e32 v50, v10
v_mov_b32_e32 v49, v9
v_mov_b32_e32 v48, v8
v_mov_b32_e32 v15, v10
v_mov_b32_e32 v14, v9
v_mov_b32_e32 v13, v8
s_branch BB7_9
BB7_8:
; implicit-def: %VGPR8
; implicit-def: %VGPR48_VGPR49_VGPR50_VGPR51
; implicit-def: %VGPR6
; implicit-def: %VGPR12
; implicit-def: %VGPR13_VGPR14_VGPR15_VGPR16
; implicit-def: %VGPR7
BB7_9: ; %Flow1256
s_load_dwordx2 s[24:25], s[6:7], 0x20
s_load_dwordx2 s[20:21], s[6:7], 0x1c
s_load_dwordx2 s[16:17], s[6:7], 0x1e
v_cmp_ne_u32_e32 vcc, 0, v17
v_cndmask_b32_e64 v9, 0, 1, vcc
v_mov_b32_e32 v42, v48
v_mov_b32_e32 v32, v48
v_mov_b32_e32 v28, v48
v_mov_b32_e32 v24, v48
v_mov_b32_e32 v20, v48
v_mov_b32_e32 v16, v48
v_cmp_ne_u32_e32 vcc, 1, v9
s_movk_i32 s5, 0x620
s_add_i32 s15, s14, s5
s_and_b64 vcc, exec, vcc
v_mov_b32_e32 v43, v49
v_mov_b32_e32 v44, v50
v_mov_b32_e32 v45, v51
v_mov_b32_e32 v33, v49
v_mov_b32_e32 v34, v50
v_mov_b32_e32 v35, v51
v_mov_b32_e32 v29, v49
v_mov_b32_e32 v30, v50
v_mov_b32_e32 v31, v51
v_mov_b32_e32 v25, v49
v_mov_b32_e32 v26, v50
v_mov_b32_e32 v27, v51
v_mov_b32_e32 v21, v49
v_mov_b32_e32 v22, v50
v_mov_b32_e32 v23, v51
v_mov_b32_e32 v17, v49
v_mov_b32_e32 v18, v50
v_mov_b32_e32 v19, v51
s_waitcnt lgkmcnt(0)
s_mov_b64 vcc, vcc
s_cbranch_vccnz BB7_176
; BB#10: ; %.lr.ph
v_or_b32_e32 v6, 4, v1
v_mov_b32_e32 v13, 0
v_cmp_eq_u32_e32 vcc, 4, v6
v_cmp_gt_u32_e64 s[2:3], 4, v2
s_and_b64 s[48:49], s[2:3], vcc
v_add_i32_e32 v6, vcc, v1, v2
v_and_b32_e32 v8, 4, v1
s_load_dwordx2 s[22:23], s[6:7], 0x30
s_load_dword s19, s[6:7], 0x6
s_load_dword s26, s[6:7], 0x9
s_load_dword s27, s[6:7], 0xa
s_load_dword s42, s[6:7], 0xf
s_load_dword s43, s[6:7], 0x12
v_mov_b32_e32 v14, v13
v_mov_b32_e32 v15, v13
v_mov_b32_e32 v19, v16
s_add_i32 s8, s14, 0x400
v_lshlrev_b32_e32 v6, 2, v6
v_lshlrev_b32_e32 v8, 2, v8
v_add_i32_e32 v10, vcc, s8, v6
v_lshlrev_b32_e32 v6, 2, v0
v_add_i32_e32 v54, vcc, s8, v8
v_lshlrev_b32_e32 v8, 4, v2
v_mov_b32_e32 v18, v15
v_mov_b32_e32 v17, v14
v_mov_b32_e32 v16, v13
v_add_i32_e32 v12, vcc, s14, v6
v_mov_b32_e32 v23, v16
v_mov_b32_e32 v27, v16
v_mov_b32_e32 v31, v16
v_mov_b32_e32 v35, v16
v_mov_b32_e32 v45, v16
v_mov_b32_e32 v51, v16
v_mul_f32_e64 v9, s18, s18
v_mov_b32_e32 v47, 0
v_add_i32_e32 v55, vcc, s14, v8
v_lshlrev_b32_e32 v8, 3, v2
v_add_i32_e32 v56, vcc, s4, v8
s_mov_b32 s46, 0
v_and_b32_e32 v52, 31, v0
v_mov_b32_e32 v53, v47
v_cmp_gt_u32_e64 s[2:3], v1, v2
v_mul_f32_e32 v37, s18, v9
v_add_i32_e32 v6, vcc, s5, v12
v_add_i32_e32 v7, vcc, 0x720, v12
v_add_i32_e32 v12, vcc, 0x820, v12
s_mov_b32 s47, 0xf000
s_mov_b64 s[44:45], 0
s_brev_b32 s50, -2
s_mov_b32 s51, 0x7ffff000
s_brev_b32 s52, 1
v_ashrrev_i32_e32 v58, 31, v38
v_mov_b32_e32 v57, v38
v_or_b32_e32 v38, 7, v41
v_or_b32_e32 v59, 6, v41
v_or_b32_e32 v60, 5, v41
v_or_b32_e32 v61, 4, v41
v_or_b32_e32 v62, 3, v41
v_or_b32_e32 v63, 2, v41
v_or_b32_e32 v64, 1, v41
v_mov_b32_e32 v22, v15
v_mov_b32_e32 v21, v14
v_mov_b32_e32 v20, v13
v_mov_b32_e32 v26, v15
v_mov_b32_e32 v25, v14
v_mov_b32_e32 v24, v13
v_mov_b32_e32 v30, v15
v_mov_b32_e32 v29, v14
v_mov_b32_e32 v28, v13
v_mov_b32_e32 v34, v15
v_mov_b32_e32 v33, v14
v_mov_b32_e32 v32, v13
v_mov_b32_e32 v44, v15
v_mov_b32_e32 v43, v14
v_mov_b32_e32 v42, v13
v_mov_b32_e32 v50, v15
v_mov_b32_e32 v49, v14
v_mov_b32_e32 v48, v13
v_mov_b32_e32 v8, v13
s_waitcnt lgkmcnt(0)
BB7_11: ; =>This Loop Header: Depth=1
; Child Loop BB7_51 Depth 2
; Child Loop BB7_91 Depth 2
; Child Loop BB7_131 Depth 2
; Child Loop BB7_171 Depth 2
v_lshl_b64 v[65:66], v[57:58], 5
v_add_i32_e32 v67, vcc, s40, v65
v_mov_b32_e32 v19, s41
v_addc_u32_e32 v66, vcc, v66, v19, vcc
v_lshl_b64 v[68:69], v[46:47], 3
v_add_i32_e32 v67, vcc, v67, v68
v_addc_u32_e32 v68, vcc, v66, v69, vcc
buffer_load_dwordx2 v[65:66], v[67:68], s[44:47], 0 addr64 offset:16
s_waitcnt vmcnt(0)
v_cmp_ne_u32_e32 vcc, 0, v65
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[54:55], exec, s[4:5]
; mask branch BB7_175
s_cbranch_execz BB7_175
BB7_12: ; in Loop: Header=BB7_11 Depth=1
v_ashrrev_i32_e32 v68, 31, v66
v_mov_b32_e32 v67, v66
v_lshl_b64 v[66:67], v[67:68], 7
v_add_i32_e32 v68, vcc, s22, v66
v_mov_b32_e32 v19, s23
v_addc_u32_e32 v67, vcc, v67, v19, vcc
v_lshl_b64 v[69:70], v[52:53], 2
v_add_i32_e32 v68, vcc, v68, v69
v_addc_u32_e32 v69, vcc, v67, v70, vcc
buffer_load_dword v19, v[68:69], s[44:47], 0 addr64
s_and_saveexec_b64 s[4:5], s[48:49]
s_xor_b64 s[4:5], exec, s[4:5]
s_waitcnt vmcnt(0)
; mask branch BB7_14
s_cbranch_execz BB7_14
BB7_13: ; in Loop: Header=BB7_11 Depth=1
v_lshl_b64 v[66:67], v[57:58], 5
v_add_i32_e32 v68, vcc, s40, v66
v_mov_b32_e32 v23, s41
v_addc_u32_e32 v67, vcc, v67, v23, vcc
v_lshl_b64 v[69:70], v[2:3], 2
v_add_i32_e32 v68, vcc, v68, v69
v_addc_u32_e32 v69, vcc, v67, v70, vcc
buffer_load_dword v23, v[68:69], s[44:47], 0 addr64
s_mov_b32 m0, -1
s_waitcnt vmcnt(0)
ds_write_b32 v10, v23
s_waitcnt lgkmcnt(0)
BB7_14: ; %.preheader.preheader
; in Loop: Header=BB7_11 Depth=1
s_or_b64 exec, exec, s[4:5]
v_and_b32_e32 v23, 0xff, v65
v_cmp_ne_u32_e32 vcc, 0, v23
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[56:57], exec, s[4:5]
; mask branch BB7_54
s_cbranch_execz BB7_54
BB7_15: ; in Loop: Header=BB7_11 Depth=1
s_mov_b32 m0, -1
ds_read_b32 v23, v54
s_mov_b64 s[34:35], s[46:47]
s_mov_b64 s[38:39], s[46:47]
s_waitcnt lgkmcnt(0)
v_lshlrev_b32_e32 v27, 3, v23
v_add_i32_e32 v66, vcc, v27, v1
v_ashrrev_i32_e32 v67, 31, v66
v_lshl_b64 v[68:69], v[66:67], 4
v_lshl_b64 v[70:71], v[66:67], 3
buffer_load_dwordx4 v[75:78], v[68:69], s[32:35], 0 addr64
buffer_load_dwordx2 v[67:68], v[70:71], s[36:39], 0 addr64
v_mov_b32_e32 v69, 0
v_and_b32_e32 v27, 1, v65
v_cmp_eq_u32_e32 vcc, 1, v27
v_mov_b32_e32 v71, v69
v_mov_b32_e32 v74, v69
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[30:31], exec, s[4:5]
s_waitcnt vmcnt(0)
; mask branch BB7_19
s_cbranch_execz BB7_19
BB7_16: ; in Loop: Header=BB7_11 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[79:82], v55 offset1:1
s_and_b64 s[4:5], exec, s[0:1]
v_cmp_ne_u32_e32 vcc, v41, v23
s_xor_b64 s[4:5], s[4:5], -1
s_or_b64 s[4:5], s[4:5], vcc
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v27, v76, v80
s_and_b64 s[8:9], exec, s[2:3]
s_or_b64 s[4:5], s[8:9], s[4:5]
v_subrev_f32_e32 v31, v75, v79
v_mul_f32_e32 v45, v27, v27
v_cndmask_b32_e64 v51, 0, 1.0, s[4:5]
v_subrev_f32_e32 v35, v77, v81
v_mac_f32_e32 v45, v31, v31
v_mov_b32_e32 v69, 0
v_mac_f32_e32 v45, v35, v35
v_mul_f32_e32 v51, s26, v51
v_cmp_lt_f32_e32 vcc, v45, v51
v_mov_b32_e32 v71, v69
v_mov_b32_e32 v74, v69
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[34:35], exec, s[4:5]
; mask branch BB7_18
s_cbranch_execz BB7_18
BB7_17: ; in Loop: Header=BB7_11 Depth=1
v_max_f32_e32 v45, 0x34cd15ae, v45
v_rsq_f32_e32 v71, v45
v_and_b32_e32 v69, 1, v19
v_cmp_eq_u32_e32 vcc, 1, v69
v_cndmask_b32_e64 v72, 0, 1.0, vcc
v_mul_f32_e32 v73, v71, v71
v_mul_f32_e32 v69, v73, v73
s_mov_b32 m0, -1
v_mul_f32_e32 v74, v72, v69
ds_read_b64 v[69:70], v56
v_mul_f32_e32 v79, v73, v74
v_mad_f32 v74, v74, v73, s42
v_mad_f32 v81, v79, v79, s43
v_mul_f32_e32 v74, 0xbe2aaaab, v74
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v70, v68, v70
v_mul_f32_e32 v80, v79, v70
v_mad_f32 v80, v69, v67, -v80
v_mul_f32_e32 v69, v67, v69
v_mul_f32_e32 v70, v81, v70
v_mul_f32_e32 v69, v69, v74
v_cmp_gt_f32_e32 vcc, s27, v45
v_mac_f32_e32 v69, 0x3daaaaaa, v70
v_cndmask_b32_e64 v70, 0, 1.0, vcc
v_mul_f32_e32 v74, v72, v70
v_mac_f32_e32 v8, v69, v74
v_mul_f32_e32 v69, v9, v45
v_mul_f32_e32 v74, v69, v69
v_mov_b32_e32 v81, 0x3a92b707
v_madak_f32_e32 v81, v81, v74, 0x3ded3cb2
v_mul_f32_e32 v51, v78, v82
v_mov_b32_e32 v82, 0x3c739487
v_madak_f32_e32 v82, v82, v74, 0x3f01e2bc
v_mad_f32 v81, v81, v74, 1.0
v_mac_f32_e32 v81, v69, v82
v_mov_b32_e32 v82, 0xb2951928
v_madak_f32_e32 v82, v82, v74, 0xb85ffb93
v_mov_b32_e32 v83, 0x35c55945
v_mul_f32_e32 v45, s18, v45
v_madak_f32_e32 v83, v83, v74, 0x3a83ca0c
v_mul_f32_e32 v45, v71, v45
v_madak_f32_e32 v82, v82, v74, 0xbc9ded90
v_madak_f32_e32 v83, v83, v74, 0x3d8eaf3b
v_madak_f32_e32 v74, v82, v74, 0xbf409397
v_and_b32_e32 v82, s50, v45
v_mov_b32_e32 v84, 0x3fa00000
v_cmp_gt_f32_e64 s[4:5], v84, v82
v_mul_f32_e32 v84, v82, v82
v_rcp_f32_e32 v85, v84
v_add_f32_e32 v86, -1.0, v82
v_mov_b32_e32 v87, 0xbd777f97
v_and_b32_e32 v89, s51, v45
v_cndmask_b32_e64 v85, v85, v86, s[4:5]
v_mov_b32_e32 v86, 0x3f580000
v_cmp_gt_f32_e64 s[8:9], v86, v82
v_cndmask_b32_e64 v84, v85, v84, s[8:9]
v_mov_b32_e32 v86, 0xc11d077e
v_mov_b32_e32 v85, 0x4036db6e
v_madak_f32_e32 v86, v86, v84, 0xc2a2932b
v_cmp_gt_f32_e32 vcc, v85, v82
v_mov_b32_e32 v85, 0xc3f1c275
v_madak_f32_e32 v85, v85, v84, 0xc480230b
v_madak_f32_e32 v86, v84, v86, 0xc3389ae7
v_madak_f32_e32 v85, v84, v85, 0xc41f6441
v_madak_f32_e32 v86, v84, v86, 0xc322658c
v_madak_f32_e32 v85, v84, v85, 0xc320a2ea
v_madak_f32_e32 v86, v84, v86, 0xc2798057
v_madak_f32_e32 v85, v84, v85, 0xc18e104b
v_madak_f32_e32 v86, v84, v86, 0xc128f022
v_madak_f32_e32 v85, v84, v85, 0xbf4c9dd4
v_madak_f32_e32 v86, v84, v86, 0xbf31a0b7
v_madak_f32_e32 v85, v84, v85, 0xbc21a092
v_madak_f32_e32 v86, v84, v86, 0xbc21a093
v_madak_f32_e32 v87, v87, v84, 0x40d23f7c
v_cndmask_b32_e32 v85, v85, v86, vcc
v_mov_b32_e32 v86, 0xc1b38712
v_madak_f32_e32 v86, v86, v84, 0x43ed43a7
v_madak_f32_e32 v87, v84, v87, 0x42d9451f
v_madak_f32_e32 v86, v84, v86, 0x451f90ce
v_madak_f32_e32 v87, v84, v87, 0x43d6810b
v_madak_f32_e32 v86, v84, v86, 0x4547fdbb
v_madak_f32_e32 v87, v84, v87, 0x442158c9
v_madak_f32_e32 v86, v84, v86, 0x44c01759
v_madak_f32_e32 v87, v84, v87, 0x43d9486f
v_madak_f32_e32 v86, v84, v86, 0x43a2e571
v_madak_f32_e32 v87, v84, v87, 0x4309a863
v_madak_f32_e32 v86, v84, v86, 0x41f2b459
v_madak_f32_e32 v87, v84, v87, 0x419d35ce
v_cndmask_b32_e32 v86, v86, v87, vcc
v_mov_b32_e32 v87, 0xbb0df9c0
v_madak_f32_e32 v87, v87, v84, 0x3d1151b3
v_madak_f32_e32 v87, v84, v87, 0xbde31cc2
v_madak_f32_e32 v87, v84, v87, 0x3ea2fe54
v_madak_f32_e32 v87, v84, v87, 0xbebe9208
v_madak_f32_e32 v87, v84, v87, 0x3ed46805
v_madak_f32_e32 v87, v84, v87, 0xbb1acdc6
v_cndmask_b32_e64 v85, v85, v87, s[4:5]
v_mov_b32_e32 v87, 0x3c445aa3
v_madak_f32_e32 v87, v87, v84, 0x3c5f6e13
v_madak_f32_e32 v87, v84, v87, 0x3e013307
v_madak_f32_e32 v87, v84, v87, 0x3d931ae7
v_madak_f32_e32 v87, v84, v87, 0x3f0a5785
v_madak_f32_e32 v87, v84, v87, 0x3dd9f331
v_cndmask_b32_e64 v86, v86, v87, s[4:5]
v_mov_b32_e32 v87, 0xb7c756b1
v_madak_f32_e32 v87, v87, v84, 0xbbbd1489
v_madak_f32_e32 v87, v84, v87, 0xbce9528f
v_madak_f32_e32 v87, v84, v87, 0xbea66beb
v_madak_f32_e32 v87, v84, v87, 0x3e0375d4
v_cndmask_b32_e64 v85, v85, v87, s[8:9]
v_mov_b32_e32 v87, 0xb684e21a
v_madak_f32_e32 v87, v87, v84, 0x390aee49
v_madak_f32_e32 v87, v84, v87, 0x3ba68116
v_madak_f32_e32 v87, v84, v87, 0x3d852a63
v_madak_f32_e32 v87, v84, v87, 0x3ecbbbce
v_cndmask_b32_e64 v86, v86, v87, s[8:9]
v_mad_f32 v84, v84, v86, 1.0
v_mov_b32_e32 v86, 0x6f800000
v_cmp_gt_f32_e64 vcc, |v84|, v86
v_mov_b32_e32 v87, 0x2f800000
v_cndmask_b32_e32 v88, 1.0, v87, vcc
v_mul_f32_e32 v84, v88, v84
v_mov_b32_e32 v90, 0xbf100000
v_rcp_f32_e32 v84, v84
v_mad_f32 v90, v89, -v89, v90
v_cmp_gt_f32_e32 vcc, 0, v90
v_cndmask_b32_e64 v91, 0.5, -0.5, vcc
v_mov_b32_e32 v92, 0x3fb8aa3b
v_mac_f32_e32 v91, v92, v90
v_mul_f32_e32 v84, v84, v85
v_cvt_i32_f32_e32 v91, v91
v_subrev_f32_e32 v105, v82, v89
v_mul_f32_e32 v93, v84, v88
v_add_f32_e32 v89, v82, v89
v_mad_f32 v89, v89, v105, v93
v_cmp_gt_f32_e64 s[10:11], 0, v89
v_cndmask_b32_e64 v105, 0.5, -0.5, s[10:11]
v_cvt_f32_i32_e32 v85, v91
v_mac_f32_e32 v105, v92, v89
v_cvt_i32_f32_e32 v92, v105
v_mov_b32_e32 v94, 0xbf317180
v_mad_f32 v95, v94, v85, v90
v_mov_b32_e32 v96, 0xb717f7d1
v_mad_f32 v97, v96, v85, v95
v_mul_f32_e32 v98, v97, v97
v_mov_b32_e32 v99, 0xb5ddea0e
v_mov_b32_e32 v100, 0x3331bb4c
v_cvt_f32_i32_e32 v106, v92
v_mad_f32 v101, v100, v98, v99
v_mov_b32_e32 v102, 0x388ab355
v_mad_f32 v101, v101, v98, v102
v_mov_b32_e32 v103, 0xbb360b61
v_mad_f32 v101, v101, v98, v103
v_mov_b32_e32 v104, 0x3e2aaaab
v_mad_f32 v101, v101, v98, v104
v_mac_f32_e32 v74, v69, v83
v_mad_f32 v83, v94, v106, v89
v_mad_f32 v98, -v98, v101, v97
v_mad_f32 v94, v96, v106, v83
v_mul_f32_e32 v69, v98, v97
v_mul_f32_e32 v97, v94, v94
v_mac_f32_e32 v99, v100, v97
v_sub_f32_e32 v101, 2.0, v98
v_mac_f32_e32 v102, v99, v97
v_cmp_gt_f32_e64 vcc, |v101|, v86
v_mac_f32_e32 v103, v102, v97
v_mac_f32_e32 v104, v103, v97
v_cndmask_b32_e32 v105, 1.0, v87, vcc
v_mul_f32_e32 v101, v105, v101
v_mad_f32 v97, -v97, v104, v94
v_rcp_f32_e32 v101, v101
v_sub_f32_e32 v98, 2.0, v97
v_cmp_gt_f32_e64 vcc, |v98|, v86
v_rcp_f32_e32 v81, v81
v_cndmask_b32_e32 v99, 1.0, v87, vcc
v_mul_f32_e32 v98, v99, v98
v_mul_f32_e32 v69, v101, v69
v_rcp_f32_e32 v98, v98
v_mul_f32_e32 v69, v69, v105
v_mad_f32 v69, -v85, v96, -v69
v_mul_f32_e32 v81, v37, v81
v_mul_f32_e32 v85, v97, v94
v_subrev_f32_e32 v69, v95, v69
v_mul_f32_e32 v74, v74, v81
v_mul_f32_e32 v81, v72, v73
v_mul_f32_e32 v85, v98, v85
v_mac_f32_e32 v74, v71, v81
v_lshlrev_b32_e32 v81, 23, v91
v_sub_f32_e32 v69, 1.0, v69
v_mul_f32_e32 v85, v85, v99
v_add_i32_e32 v69, vcc, v69, v81
v_mov_b32_e32 v81, 0xc2aeac4f
v_mad_f32 v85, -v106, v96, -v85
v_cmp_ge_f32_e32 vcc, v90, v81
v_mov_b32_e32 v91, 0x42b17218
v_subrev_f32_e32 v83, v83, v85
v_cndmask_b32_e32 v69, 0, v69, vcc
v_cmp_lt_f32_e32 vcc, v90, v91
v_mov_b32_e32 v94, 0x7f800000
v_cndmask_b32_e32 v69, v94, v69, vcc
v_cmp_u_f32_e32 vcc, v90, v90
v_sub_f32_e32 v83, 1.0, v83
v_lshlrev_b32_e32 v85, 23, v92
v_cndmask_b32_e32 v69, v69, v90, vcc
v_add_i32_e32 v83, vcc, v83, v85
v_cmp_ge_f32_e32 vcc, v89, v81
v_cndmask_b32_e32 v81, 0, v83, vcc
v_cmp_lt_f32_e32 vcc, v89, v91
v_cndmask_b32_e32 v81, v94, v81, vcc
v_cmp_u_f32_e32 vcc, v89, v89
v_cndmask_b32_e32 v81, v81, v89, vcc
v_mul_f32_e32 v69, v81, v69
v_mov_b32_e32 v81, 0x40c00000
v_cmp_gt_f32_e64 s[10:11], v81, v82
v_mov_b32_e32 v81, 0x31800000
v_cmp_gt_f32_e64 vcc, |v82|, v86
v_cmp_gt_f32_e64 s[12:13], v81, v82
v_cndmask_b32_e32 v81, 1.0, v87, vcc
v_mul_f32_e32 v82, v81, v82
v_rcp_f32_e32 v82, v82
v_cmp_u_f32_e32 vcc, v45, v45
v_mul_f32_e32 v69, v82, v69
v_mad_f32 v69, -v81, v69, 1.0
v_madak_f32_e32 v81, v88, v84, 0x3f58560b
v_cndmask_b32_e64 v69, 1.0, v69, s[10:11]
v_cndmask_b32_e64 v69, v69, v81, s[4:5]
v_and_b32_e32 v81, s52, v45
v_or_b32_e32 v69, v81, v69
v_mad_f32 v81, v93, v45, v45
v_cndmask_b32_e64 v69, v69, v81, s[8:9]
v_mul_f32_e32 v81, 0x3f8375d4, v45
v_mac_f32_e32 v81, 0x41000000, v45
v_mul_f32_e32 v81, 0x3e000000, v81
v_cndmask_b32_e64 v69, v69, v81, s[12:13]
v_cndmask_b32_e32 v45, v69, v45, vcc
v_subrev_f32_e32 v45, v45, v72
v_mul_f32_e32 v69, s19, v72
v_mad_f32 v45, v71, v45, -v69
v_mul_f32_e32 v69, v70, v73
v_mul_f32_e32 v69, v79, v69
v_mac_f32_e32 v5, v45, v51
v_mul_f32_e32 v45, v80, v69
v_mad_f32 v45, v51, v74, -v45
v_mul_f32_e32 v51, v74, v51
v_mad_f32 v51, v69, v80, -v51
v_mad_f32 v50, v51, -v35, v50
v_mad_f32 v49, v51, -v27, v49
v_mad_f32 v48, v51, -v31, v48
v_mul_f32_e64 v74, v45, -v35
v_mul_f32_e64 v71, v45, -v27
v_mul_f32_e64 v69, v45, -v31
BB7_18: ; %Flow1253
; in Loop: Header=BB7_11 Depth=1
s_or_b64 exec, exec, s[34:35]
BB7_19: ; in Loop: Header=BB7_11 Depth=1
s_or_b64 exec, exec, s[30:31]
v_lshrrev_b32_e32 v27, 1, v65
v_and_b32_e32 v27, 1, v27
v_cmp_eq_u32_e32 vcc, 1, v27
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[12:13], exec, s[4:5]
; mask branch BB7_23
s_cbranch_execz BB7_23
BB7_20: ; in Loop: Header=BB7_11 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[79:82], v55 offset0:16 offset1:17
s_and_b64 s[4:5], exec, s[0:1]
v_cmp_ne_u32_e32 vcc, v64, v23
s_xor_b64 s[4:5], s[4:5], -1
s_or_b64 s[4:5], s[4:5], vcc
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v31, v76, v80
s_and_b64 s[8:9], exec, s[2:3]
s_or_b64 s[4:5], s[8:9], s[4:5]
v_subrev_f32_e32 v27, v75, v79
v_mul_f32_e32 v45, v31, v31
v_cndmask_b32_e64 v51, 0, 1.0, s[4:5]
v_subrev_f32_e32 v35, v77, v81
v_mac_f32_e32 v45, v27, v27
v_mac_f32_e32 v45, v35, v35
v_mul_f32_e32 v51, s26, v51
v_cmp_lt_f32_e32 vcc, v45, v51
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[30:31], exec, s[4:5]
; mask branch BB7_22
s_cbranch_execz BB7_22
BB7_21: ; in Loop: Header=BB7_11 Depth=1
v_max_f32_e32 v45, 0x34cd15ae, v45
v_rsq_f32_e32 v79, v45
s_mov_b32 m0, -1
v_lshrrev_b32_e32 v70, 1, v19
ds_read_b64 v[72:73], v56 offset:64
v_and_b32_e32 v70, 1, v70
v_cmp_eq_u32_e32 vcc, 1, v70
v_mul_f32_e32 v80, v79, v79
v_cndmask_b32_e64 v70, 0, 1.0, vcc
v_mul_f32_e32 v81, v80, v80
v_mul_f32_e32 v81, v70, v81
v_mul_f32_e32 v51, v78, v82
v_mul_f32_e32 v82, v80, v81
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v73, v68, v73
v_mul_f32_e32 v83, v82, v73
v_mad_f32 v81, v81, v80, s42
v_mad_f32 v83, v72, v67, -v83
v_mad_f32 v84, v82, v82, s43
v_mul_f32_e32 v72, v67, v72
v_mul_f32_e32 v81, 0xbe2aaaab, v81
v_mul_f32_e32 v73, v84, v73
v_mul_f32_e32 v72, v72, v81
v_cmp_gt_f32_e32 vcc, s27, v45
v_mac_f32_e32 v72, 0x3daaaaaa, v73
v_cndmask_b32_e64 v73, 0, 1.0, vcc
v_mul_f32_e32 v81, v70, v73
v_mac_f32_e32 v8, v72, v81
v_mul_f32_e32 v72, v9, v45
v_mul_f32_e32 v81, v72, v72
v_mov_b32_e32 v84, 0x3a92b707
v_madak_f32_e32 v84, v84, v81, 0x3ded3cb2
v_mov_b32_e32 v85, 0x3c739487
v_madak_f32_e32 v85, v85, v81, 0x3f01e2bc
v_mad_f32 v84, v84, v81, 1.0
v_mac_f32_e32 v84, v72, v85
v_mov_b32_e32 v85, 0xb2951928
v_madak_f32_e32 v85, v85, v81, 0xb85ffb93
v_mov_b32_e32 v86, 0x35c55945
v_madak_f32_e32 v86, v86, v81, 0x3a83ca0c
v_madak_f32_e32 v85, v85, v81, 0xbc9ded90
v_madak_f32_e32 v86, v86, v81, 0x3d8eaf3b
v_madak_f32_e32 v81, v85, v81, 0xbf409397
v_mac_f32_e32 v81, v72, v86
v_rcp_f32_e32 v72, v84
v_mul_f32_e32 v45, s18, v45
v_mul_f32_e32 v45, v79, v45
v_mov_b32_e32 v84, 0x3fa00000
v_mul_f32_e32 v72, v37, v72
v_mul_f32_e32 v72, v81, v72
v_mul_f32_e32 v81, v70, v80
v_mac_f32_e32 v72, v79, v81
v_and_b32_e32 v81, s50, v45
v_cmp_gt_f32_e64 s[4:5], v84, v81
v_mul_f32_e32 v84, v81, v81
v_rcp_f32_e32 v85, v84
v_add_f32_e32 v86, -1.0, v81
v_mov_b32_e32 v87, 0xbd777f97
v_mov_b32_e32 v88, 0x6f800000
v_cndmask_b32_e64 v85, v85, v86, s[4:5]
v_mov_b32_e32 v86, 0x3f580000
v_cmp_gt_f32_e64 s[8:9], v86, v81
v_cndmask_b32_e64 v84, v85, v84, s[8:9]
v_mov_b32_e32 v86, 0xc11d077e
v_mov_b32_e32 v85, 0x4036db6e
v_madak_f32_e32 v86, v86, v84, 0xc2a2932b
v_cmp_gt_f32_e32 vcc, v85, v81
v_mov_b32_e32 v85, 0xc3f1c275
v_madak_f32_e32 v85, v85, v84, 0xc480230b
v_madak_f32_e32 v86, v84, v86, 0xc3389ae7
v_madak_f32_e32 v85, v84, v85, 0xc41f6441
v_madak_f32_e32 v86, v84, v86, 0xc322658c
v_madak_f32_e32 v85, v84, v85, 0xc320a2ea
v_madak_f32_e32 v86, v84, v86, 0xc2798057
v_madak_f32_e32 v85, v84, v85, 0xc18e104b
v_madak_f32_e32 v86, v84, v86, 0xc128f022
v_madak_f32_e32 v85, v84, v85, 0xbf4c9dd4
v_madak_f32_e32 v86, v84, v86, 0xbf31a0b7
v_madak_f32_e32 v85, v84, v85, 0xbc21a092
v_madak_f32_e32 v86, v84, v86, 0xbc21a093
v_madak_f32_e32 v87, v87, v84, 0x40d23f7c
v_cndmask_b32_e32 v85, v85, v86, vcc
v_mov_b32_e32 v86, 0xc1b38712
v_madak_f32_e32 v86, v86, v84, 0x43ed43a7
v_madak_f32_e32 v87, v84, v87, 0x42d9451f
v_madak_f32_e32 v86, v84, v86, 0x451f90ce
v_madak_f32_e32 v87, v84, v87, 0x43d6810b
v_madak_f32_e32 v86, v84, v86, 0x4547fdbb
v_madak_f32_e32 v87, v84, v87, 0x442158c9
v_madak_f32_e32 v86, v84, v86, 0x44c01759
v_madak_f32_e32 v87, v84, v87, 0x43d9486f
v_madak_f32_e32 v86, v84, v86, 0x43a2e571
v_madak_f32_e32 v87, v84, v87, 0x4309a863
v_madak_f32_e32 v86, v84, v86, 0x41f2b459
v_madak_f32_e32 v87, v84, v87, 0x419d35ce
v_cndmask_b32_e32 v86, v86, v87, vcc
v_mov_b32_e32 v87, 0xbb0df9c0
v_madak_f32_e32 v87, v87, v84, 0x3d1151b3
v_madak_f32_e32 v87, v84, v87, 0xbde31cc2
v_madak_f32_e32 v87, v84, v87, 0x3ea2fe54
v_madak_f32_e32 v87, v84, v87, 0xbebe9208
v_madak_f32_e32 v87, v84, v87, 0x3ed46805
v_madak_f32_e32 v87, v84, v87, 0xbb1acdc6
v_cndmask_b32_e64 v85, v85, v87, s[4:5]
v_mov_b32_e32 v87, 0x3c445aa3
v_madak_f32_e32 v87, v87, v84, 0x3c5f6e13
v_madak_f32_e32 v87, v84, v87, 0x3e013307
v_madak_f32_e32 v87, v84, v87, 0x3d931ae7
v_madak_f32_e32 v87, v84, v87, 0x3f0a5785
v_madak_f32_e32 v87, v84, v87, 0x3dd9f331
v_cndmask_b32_e64 v86, v86, v87, s[4:5]
v_mov_b32_e32 v87, 0xb684e21a
v_madak_f32_e32 v87, v87, v84, 0x390aee49
v_madak_f32_e32 v87, v84, v87, 0x3ba68116
v_madak_f32_e32 v87, v84, v87, 0x3d852a63
v_madak_f32_e32 v87, v84, v87, 0x3ecbbbce
v_cndmask_b32_e64 v86, v86, v87, s[8:9]
v_mad_f32 v86, v84, v86, 1.0
v_and_b32_e32 v91, s51, v45
v_mov_b32_e32 v92, 0xbf100000
v_cmp_gt_f32_e64 vcc, |v86|, v88
v_mov_b32_e32 v89, 0x2f800000
v_mad_f32 v92, v91, -v91, v92
v_cndmask_b32_e32 v90, 1.0, v89, vcc
v_cmp_gt_f32_e32 vcc, 0, v92
v_cndmask_b32_e64 v93, 0.5, -0.5, vcc
v_mov_b32_e32 v94, 0x3fb8aa3b
v_mac_f32_e32 v93, v94, v92
v_mov_b32_e32 v87, 0xb7c756b1
v_cvt_i32_f32_e32 v93, v93
v_madak_f32_e32 v87, v87, v84, 0xbbbd1489
v_madak_f32_e32 v87, v84, v87, 0xbce9528f
v_madak_f32_e32 v87, v84, v87, 0xbea66beb
v_mul_f32_e32 v86, v90, v86
v_madak_f32_e32 v84, v84, v87, 0x3e0375d4
v_rcp_f32_e32 v86, v86
v_cvt_f32_i32_e32 v87, v93
v_cndmask_b32_e64 v84, v85, v84, s[8:9]
v_mov_b32_e32 v85, 0xbf317180
v_mul_f32_e32 v84, v86, v84
v_mad_f32 v86, v85, v87, v92
v_mov_b32_e32 v95, 0xb717f7d1
v_mad_f32 v96, v95, v87, v86
v_mul_f32_e32 v97, v96, v96
v_mov_b32_e32 v98, 0xb5ddea0e
v_mov_b32_e32 v99, 0x3331bb4c
v_mad_f32 v100, v99, v97, v98
v_mov_b32_e32 v101, 0x388ab355
v_mad_f32 v100, v100, v97, v101
v_mov_b32_e32 v102, 0xbb360b61
v_mad_f32 v100, v100, v97, v102
v_mov_b32_e32 v103, 0x3e2aaaab
v_mad_f32 v100, v100, v97, v103
v_mad_f32 v97, -v97, v100, v96
v_mul_f32_e32 v96, v97, v96
v_sub_f32_e32 v97, 2.0, v97
v_cmp_gt_f32_e64 vcc, |v97|, v88
v_cndmask_b32_e32 v100, 1.0, v89, vcc
v_mul_f32_e32 v97, v100, v97
v_rcp_f32_e32 v97, v97
v_mul_f32_e32 v96, v97, v96
v_mul_f32_e32 v96, v96, v100
v_mad_f32 v87, -v87, v95, -v96
v_subrev_f32_e32 v86, v86, v87
v_lshlrev_b32_e32 v87, 23, v93
v_sub_f32_e32 v86, 1.0, v86
v_add_i32_e32 v86, vcc, v86, v87
v_mov_b32_e32 v87, 0xc2aeac4f
v_cmp_ge_f32_e32 vcc, v92, v87
v_mov_b32_e32 v93, 0x42b17218
v_cndmask_b32_e32 v86, 0, v86, vcc
v_cmp_lt_f32_e32 vcc, v92, v93
v_mov_b32_e32 v96, 0x7f800000
v_cndmask_b32_e32 v86, v96, v86, vcc
v_cmp_u_f32_e32 vcc, v92, v92
v_cndmask_b32_e32 v86, v86, v92, vcc
v_subrev_f32_e32 v92, v81, v91
v_mul_f32_e32 v97, v84, v90
v_add_f32_e32 v91, v81, v91
v_mad_f32 v91, v91, v92, v97
v_cmp_gt_f32_e32 vcc, 0, v91
v_cndmask_b32_e64 v92, 0.5, -0.5, vcc
v_mac_f32_e32 v92, v94, v91
v_cvt_i32_f32_e32 v92, v92
v_madak_f32_e32 v84, v90, v84, 0x3f58560b
v_cvt_f32_i32_e32 v94, v92
v_lshlrev_b32_e32 v92, 23, v92
v_mad_f32 v85, v85, v94, v91
v_mad_f32 v100, v95, v94, v85
v_mul_f32_e32 v104, v100, v100
v_mac_f32_e32 v98, v99, v104
v_mac_f32_e32 v101, v98, v104
v_mac_f32_e32 v102, v101, v104
v_mac_f32_e32 v103, v102, v104
v_mad_f32 v98, -v104, v103, v100
v_mul_f32_e32 v99, v98, v100
v_sub_f32_e32 v98, 2.0, v98
v_cmp_gt_f32_e64 vcc, |v98|, v88
v_cndmask_b32_e32 v100, 1.0, v89, vcc
v_mul_f32_e32 v98, v100, v98
v_rcp_f32_e32 v98, v98
v_mul_f32_e32 v98, v98, v99
v_mul_f32_e32 v98, v98, v100
v_mad_f32 v94, -v94, v95, -v98
v_subrev_f32_e32 v85, v85, v94
v_sub_f32_e32 v85, 1.0, v85
v_add_i32_e32 v85, vcc, v85, v92
v_cmp_ge_f32_e32 vcc, v91, v87
v_cndmask_b32_e32 v85, 0, v85, vcc
v_cmp_lt_f32_e32 vcc, v91, v93
v_cndmask_b32_e32 v85, v96, v85, vcc
v_cmp_u_f32_e32 vcc, v91, v91
v_cndmask_b32_e32 v85, v85, v91, vcc
v_cmp_gt_f32_e64 vcc, |v81|, v88
v_mul_f32_e32 v85, v85, v86
v_cndmask_b32_e32 v86, 1.0, v89, vcc
v_mul_f32_e32 v87, v86, v81
v_rcp_f32_e32 v87, v87
v_mov_b32_e32 v88, 0x40c00000
v_cmp_gt_f32_e32 vcc, v88, v81
v_mov_b32_e32 v88, 0x31800000
v_cmp_gt_f32_e64 s[10:11], v88, v81
v_mul_f32_e32 v81, v87, v85
v_mad_f32 v81, -v86, v81, 1.0
v_cndmask_b32_e32 v81, 1.0, v81, vcc
v_cndmask_b32_e64 v81, v81, v84, s[4:5]
v_and_b32_e32 v84, s52, v45
v_or_b32_e32 v81, v84, v81
v_mad_f32 v84, v97, v45, v45
v_cndmask_b32_e64 v81, v81, v84, s[8:9]
v_mul_f32_e32 v84, 0x3f8375d4, v45
v_mac_f32_e32 v84, 0x41000000, v45
v_mul_f32_e32 v84, 0x3e000000, v84
v_cndmask_b32_e64 v81, v81, v84, s[10:11]
v_cmp_u_f32_e32 vcc, v45, v45
v_cndmask_b32_e32 v45, v81, v45, vcc
v_subrev_f32_e32 v45, v45, v70
v_mul_f32_e32 v70, s19, v70
v_mad_f32 v45, v79, v45, -v70
v_mul_f32_e32 v70, v73, v80
v_mul_f32_e32 v70, v82, v70
v_mac_f32_e32 v5, v45, v51
v_mul_f32_e32 v45, v83, v70
v_mad_f32 v45, v51, v72, -v45
v_mul_f32_e32 v51, v72, v51
v_mad_f32 v51, v70, v83, -v51
v_mad_f32 v74, v35, v51, v74
v_mad_f32 v71, v31, v51, v71
v_mac_f32_e32 v69, v27, v51
v_mad_f32 v44, v35, v45, v44
v_mad_f32 v43, v31, v45, v43
v_mac_f32_e32 v42, v27, v45
BB7_22: ; %Flow1252
; in Loop: Header=BB7_11 Depth=1
s_or_b64 exec, exec, s[30:31]
BB7_23: ; in Loop: Header=BB7_11 Depth=1
s_or_b64 exec, exec, s[12:13]
v_lshrrev_b32_e32 v27, 2, v65
v_and_b32_e32 v27, 1, v27
v_cmp_eq_u32_e32 vcc, 1, v27
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[12:13], exec, s[4:5]
; mask branch BB7_27
s_cbranch_execz BB7_27
BB7_24: ; in Loop: Header=BB7_11 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[79:82], v55 offset0:32 offset1:33
s_and_b64 s[4:5], exec, s[0:1]
v_cmp_ne_u32_e32 vcc, v63, v23
s_xor_b64 s[4:5], s[4:5], -1
s_or_b64 s[4:5], s[4:5], vcc
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v31, v76, v80
s_and_b64 s[8:9], exec, s[2:3]
s_or_b64 s[4:5], s[8:9], s[4:5]
v_subrev_f32_e32 v27, v75, v79
v_mul_f32_e32 v45, v31, v31
v_cndmask_b32_e64 v51, 0, 1.0, s[4:5]
v_subrev_f32_e32 v35, v77, v81
v_mac_f32_e32 v45, v27, v27
v_mac_f32_e32 v45, v35, v35
v_mul_f32_e32 v51, s26, v51
v_cmp_lt_f32_e32 vcc, v45, v51
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[30:31], exec, s[4:5]
; mask branch BB7_26
s_cbranch_execz BB7_26
BB7_25: ; in Loop: Header=BB7_11 Depth=1
v_max_f32_e32 v45, 0x34cd15ae, v45
v_rsq_f32_e32 v79, v45
s_mov_b32 m0, -1
v_lshrrev_b32_e32 v70, 2, v19
ds_read_b64 v[72:73], v56 offset:128
v_and_b32_e32 v70, 1, v70
v_cmp_eq_u32_e32 vcc, 1, v70
v_mul_f32_e32 v80, v79, v79
v_cndmask_b32_e64 v70, 0, 1.0, vcc
v_mul_f32_e32 v81, v80, v80
v_mul_f32_e32 v81, v70, v81
v_mul_f32_e32 v51, v78, v82
v_mul_f32_e32 v82, v80, v81
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v73, v68, v73
v_mul_f32_e32 v83, v82, v73
v_mad_f32 v81, v81, v80, s42
v_mad_f32 v83, v72, v67, -v83
v_mad_f32 v84, v82, v82, s43
v_mul_f32_e32 v72, v67, v72
v_mul_f32_e32 v81, 0xbe2aaaab, v81
v_mul_f32_e32 v73, v84, v73
v_mul_f32_e32 v72, v72, v81
v_cmp_gt_f32_e32 vcc, s27, v45
v_mac_f32_e32 v72, 0x3daaaaaa, v73
v_cndmask_b32_e64 v73, 0, 1.0, vcc
v_mul_f32_e32 v81, v70, v73
v_mac_f32_e32 v8, v72, v81
v_mul_f32_e32 v72, v9, v45
v_mul_f32_e32 v81, v72, v72
v_mov_b32_e32 v84, 0x3a92b707
v_madak_f32_e32 v84, v84, v81, 0x3ded3cb2
v_mov_b32_e32 v85, 0x3c739487
v_madak_f32_e32 v85, v85, v81, 0x3f01e2bc
v_mad_f32 v84, v84, v81, 1.0
v_mac_f32_e32 v84, v72, v85
v_mov_b32_e32 v85, 0xb2951928
v_madak_f32_e32 v85, v85, v81, 0xb85ffb93
v_mov_b32_e32 v86, 0x35c55945
v_madak_f32_e32 v86, v86, v81, 0x3a83ca0c
v_madak_f32_e32 v85, v85, v81, 0xbc9ded90
v_madak_f32_e32 v86, v86, v81, 0x3d8eaf3b
v_madak_f32_e32 v81, v85, v81, 0xbf409397
v_mac_f32_e32 v81, v72, v86
v_rcp_f32_e32 v72, v84
v_mul_f32_e32 v45, s18, v45
v_mul_f32_e32 v45, v79, v45
v_mov_b32_e32 v84, 0x3fa00000
v_mul_f32_e32 v72, v37, v72
v_mul_f32_e32 v72, v81, v72
v_mul_f32_e32 v81, v70, v80
v_mac_f32_e32 v72, v79, v81
v_and_b32_e32 v81, s50, v45
v_cmp_gt_f32_e64 s[4:5], v84, v81
v_mul_f32_e32 v84, v81, v81
v_rcp_f32_e32 v85, v84
v_add_f32_e32 v86, -1.0, v81
v_mov_b32_e32 v87, 0xbd777f97
v_mov_b32_e32 v88, 0x6f800000
v_cndmask_b32_e64 v85, v85, v86, s[4:5]
v_mov_b32_e32 v86, 0x3f580000
v_cmp_gt_f32_e64 s[8:9], v86, v81
v_cndmask_b32_e64 v84, v85, v84, s[8:9]
v_mov_b32_e32 v86, 0xc11d077e
v_mov_b32_e32 v85, 0x4036db6e
v_madak_f32_e32 v86, v86, v84, 0xc2a2932b
v_cmp_gt_f32_e32 vcc, v85, v81
v_mov_b32_e32 v85, 0xc3f1c275
v_madak_f32_e32 v85, v85, v84, 0xc480230b
v_madak_f32_e32 v86, v84, v86, 0xc3389ae7
v_madak_f32_e32 v85, v84, v85, 0xc41f6441
v_madak_f32_e32 v86, v84, v86, 0xc322658c
v_madak_f32_e32 v85, v84, v85, 0xc320a2ea
v_madak_f32_e32 v86, v84, v86, 0xc2798057
v_madak_f32_e32 v85, v84, v85, 0xc18e104b
v_madak_f32_e32 v86, v84, v86, 0xc128f022
v_madak_f32_e32 v85, v84, v85, 0xbf4c9dd4
v_madak_f32_e32 v86, v84, v86, 0xbf31a0b7
v_madak_f32_e32 v85, v84, v85, 0xbc21a092
v_madak_f32_e32 v86, v84, v86, 0xbc21a093
v_madak_f32_e32 v87, v87, v84, 0x40d23f7c
v_cndmask_b32_e32 v85, v85, v86, vcc
v_mov_b32_e32 v86, 0xc1b38712
v_madak_f32_e32 v86, v86, v84, 0x43ed43a7
v_madak_f32_e32 v87, v84, v87, 0x42d9451f
v_madak_f32_e32 v86, v84, v86, 0x451f90ce
v_madak_f32_e32 v87, v84, v87, 0x43d6810b
v_madak_f32_e32 v86, v84, v86, 0x4547fdbb
v_madak_f32_e32 v87, v84, v87, 0x442158c9
v_madak_f32_e32 v86, v84, v86, 0x44c01759
v_madak_f32_e32 v87, v84, v87, 0x43d9486f
v_madak_f32_e32 v86, v84, v86, 0x43a2e571
v_madak_f32_e32 v87, v84, v87, 0x4309a863
v_madak_f32_e32 v86, v84, v86, 0x41f2b459
v_madak_f32_e32 v87, v84, v87, 0x419d35ce
v_cndmask_b32_e32 v86, v86, v87, vcc
v_mov_b32_e32 v87, 0xbb0df9c0
v_madak_f32_e32 v87, v87, v84, 0x3d1151b3
v_madak_f32_e32 v87, v84, v87, 0xbde31cc2
v_madak_f32_e32 v87, v84, v87, 0x3ea2fe54
v_madak_f32_e32 v87, v84, v87, 0xbebe9208
v_madak_f32_e32 v87, v84, v87, 0x3ed46805
v_madak_f32_e32 v87, v84, v87, 0xbb1acdc6
v_cndmask_b32_e64 v85, v85, v87, s[4:5]
v_mov_b32_e32 v87, 0x3c445aa3
v_madak_f32_e32 v87, v87, v84, 0x3c5f6e13
v_madak_f32_e32 v87, v84, v87, 0x3e013307
v_madak_f32_e32 v87, v84, v87, 0x3d931ae7
v_madak_f32_e32 v87, v84, v87, 0x3f0a5785
v_madak_f32_e32 v87, v84, v87, 0x3dd9f331
v_cndmask_b32_e64 v86, v86, v87, s[4:5]
v_mov_b32_e32 v87, 0xb684e21a
v_madak_f32_e32 v87, v87, v84, 0x390aee49
v_madak_f32_e32 v87, v84, v87, 0x3ba68116
v_madak_f32_e32 v87, v84, v87, 0x3d852a63
v_madak_f32_e32 v87, v84, v87, 0x3ecbbbce
v_cndmask_b32_e64 v86, v86, v87, s[8:9]
v_mad_f32 v86, v84, v86, 1.0
v_and_b32_e32 v91, s51, v45
v_mov_b32_e32 v92, 0xbf100000
v_cmp_gt_f32_e64 vcc, |v86|, v88
v_mov_b32_e32 v89, 0x2f800000
v_mad_f32 v92, v91, -v91, v92
v_cndmask_b32_e32 v90, 1.0, v89, vcc
v_cmp_gt_f32_e32 vcc, 0, v92
v_cndmask_b32_e64 v93, 0.5, -0.5, vcc
v_mov_b32_e32 v94, 0x3fb8aa3b
v_mac_f32_e32 v93, v94, v92
v_mov_b32_e32 v87, 0xb7c756b1
v_cvt_i32_f32_e32 v93, v93
v_madak_f32_e32 v87, v87, v84, 0xbbbd1489
v_madak_f32_e32 v87, v84, v87, 0xbce9528f
v_madak_f32_e32 v87, v84, v87, 0xbea66beb
v_mul_f32_e32 v86, v90, v86
v_madak_f32_e32 v84, v84, v87, 0x3e0375d4
v_rcp_f32_e32 v86, v86
v_cvt_f32_i32_e32 v87, v93
v_cndmask_b32_e64 v84, v85, v84, s[8:9]
v_mov_b32_e32 v85, 0xbf317180
v_mul_f32_e32 v84, v86, v84
v_mad_f32 v86, v85, v87, v92
v_mov_b32_e32 v95, 0xb717f7d1
v_mad_f32 v96, v95, v87, v86
v_mul_f32_e32 v97, v96, v96
v_mov_b32_e32 v98, 0xb5ddea0e
v_mov_b32_e32 v99, 0x3331bb4c
v_mad_f32 v100, v99, v97, v98
v_mov_b32_e32 v101, 0x388ab355
v_mad_f32 v100, v100, v97, v101
v_mov_b32_e32 v102, 0xbb360b61
v_mad_f32 v100, v100, v97, v102
v_mov_b32_e32 v103, 0x3e2aaaab
v_mad_f32 v100, v100, v97, v103
v_mad_f32 v97, -v97, v100, v96
v_mul_f32_e32 v96, v97, v96
v_sub_f32_e32 v97, 2.0, v97
v_cmp_gt_f32_e64 vcc, |v97|, v88
v_cndmask_b32_e32 v100, 1.0, v89, vcc
v_mul_f32_e32 v97, v100, v97
v_rcp_f32_e32 v97, v97
v_mul_f32_e32 v96, v97, v96
v_mul_f32_e32 v96, v96, v100
v_mad_f32 v87, -v87, v95, -v96
v_subrev_f32_e32 v86, v86, v87
v_lshlrev_b32_e32 v87, 23, v93
v_sub_f32_e32 v86, 1.0, v86
v_add_i32_e32 v86, vcc, v86, v87
v_mov_b32_e32 v87, 0xc2aeac4f
v_cmp_ge_f32_e32 vcc, v92, v87
v_mov_b32_e32 v93, 0x42b17218
v_cndmask_b32_e32 v86, 0, v86, vcc
v_cmp_lt_f32_e32 vcc, v92, v93
v_mov_b32_e32 v96, 0x7f800000
v_cndmask_b32_e32 v86, v96, v86, vcc
v_cmp_u_f32_e32 vcc, v92, v92
v_cndmask_b32_e32 v86, v86, v92, vcc
v_subrev_f32_e32 v92, v81, v91
v_mul_f32_e32 v97, v84, v90
v_add_f32_e32 v91, v81, v91
v_mad_f32 v91, v91, v92, v97
v_cmp_gt_f32_e32 vcc, 0, v91
v_cndmask_b32_e64 v92, 0.5, -0.5, vcc
v_mac_f32_e32 v92, v94, v91
v_cvt_i32_f32_e32 v92, v92
v_madak_f32_e32 v84, v90, v84, 0x3f58560b
v_cvt_f32_i32_e32 v94, v92
v_lshlrev_b32_e32 v92, 23, v92
v_mad_f32 v85, v85, v94, v91
v_mad_f32 v100, v95, v94, v85
v_mul_f32_e32 v104, v100, v100
v_mac_f32_e32 v98, v99, v104
v_mac_f32_e32 v101, v98, v104
v_mac_f32_e32 v102, v101, v104
v_mac_f32_e32 v103, v102, v104
v_mad_f32 v98, -v104, v103, v100
v_mul_f32_e32 v99, v98, v100
v_sub_f32_e32 v98, 2.0, v98
v_cmp_gt_f32_e64 vcc, |v98|, v88
v_cndmask_b32_e32 v100, 1.0, v89, vcc
v_mul_f32_e32 v98, v100, v98
v_rcp_f32_e32 v98, v98
v_mul_f32_e32 v98, v98, v99
v_mul_f32_e32 v98, v98, v100
v_mad_f32 v94, -v94, v95, -v98
v_subrev_f32_e32 v85, v85, v94
v_sub_f32_e32 v85, 1.0, v85
v_add_i32_e32 v85, vcc, v85, v92
v_cmp_ge_f32_e32 vcc, v91, v87
v_cndmask_b32_e32 v85, 0, v85, vcc
v_cmp_lt_f32_e32 vcc, v91, v93
v_cndmask_b32_e32 v85, v96, v85, vcc
v_cmp_u_f32_e32 vcc, v91, v91
v_cndmask_b32_e32 v85, v85, v91, vcc
v_cmp_gt_f32_e64 vcc, |v81|, v88
v_mul_f32_e32 v85, v85, v86
v_cndmask_b32_e32 v86, 1.0, v89, vcc
v_mul_f32_e32 v87, v86, v81
v_rcp_f32_e32 v87, v87
v_mov_b32_e32 v88, 0x40c00000
v_cmp_gt_f32_e32 vcc, v88, v81
v_mov_b32_e32 v88, 0x31800000
v_cmp_gt_f32_e64 s[10:11], v88, v81
v_mul_f32_e32 v81, v87, v85
v_mad_f32 v81, -v86, v81, 1.0
v_cndmask_b32_e32 v81, 1.0, v81, vcc
v_cndmask_b32_e64 v81, v81, v84, s[4:5]
v_and_b32_e32 v84, s52, v45
v_or_b32_e32 v81, v84, v81
v_mad_f32 v84, v97, v45, v45
v_cndmask_b32_e64 v81, v81, v84, s[8:9]
v_mul_f32_e32 v84, 0x3f8375d4, v45
v_mac_f32_e32 v84, 0x41000000, v45
v_mul_f32_e32 v84, 0x3e000000, v84
v_cndmask_b32_e64 v81, v81, v84, s[10:11]
v_cmp_u_f32_e32 vcc, v45, v45
v_cndmask_b32_e32 v45, v81, v45, vcc
v_subrev_f32_e32 v45, v45, v70
v_mul_f32_e32 v70, s19, v70
v_mad_f32 v45, v79, v45, -v70
v_mul_f32_e32 v70, v73, v80
v_mul_f32_e32 v70, v82, v70
v_mac_f32_e32 v5, v45, v51
v_mul_f32_e32 v45, v83, v70
v_mad_f32 v45, v51, v72, -v45
v_mul_f32_e32 v51, v72, v51
v_mad_f32 v51, v70, v83, -v51
v_mad_f32 v74, v35, v51, v74
v_mad_f32 v71, v31, v51, v71
v_mac_f32_e32 v69, v27, v51
v_mad_f32 v34, v35, v45, v34
v_mad_f32 v33, v31, v45, v33
v_mac_f32_e32 v32, v27, v45
BB7_26: ; %Flow1251
; in Loop: Header=BB7_11 Depth=1
s_or_b64 exec, exec, s[30:31]
BB7_27: ; in Loop: Header=BB7_11 Depth=1
s_or_b64 exec, exec, s[12:13]
v_lshrrev_b32_e32 v27, 3, v65
v_and_b32_e32 v27, 1, v27
v_cmp_eq_u32_e32 vcc, 1, v27
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[12:13], exec, s[4:5]
; mask branch BB7_31
s_cbranch_execz BB7_31
BB7_28: ; in Loop: Header=BB7_11 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[79:82], v55 offset0:48 offset1:49
s_and_b64 s[4:5], exec, s[0:1]
v_cmp_ne_u32_e32 vcc, v62, v23
s_xor_b64 s[4:5], s[4:5], -1
s_or_b64 s[4:5], s[4:5], vcc
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v31, v76, v80
s_and_b64 s[8:9], exec, s[2:3]
s_or_b64 s[4:5], s[8:9], s[4:5]
v_subrev_f32_e32 v27, v75, v79
v_mul_f32_e32 v45, v31, v31
v_cndmask_b32_e64 v51, 0, 1.0, s[4:5]
v_subrev_f32_e32 v35, v77, v81
v_mac_f32_e32 v45, v27, v27
v_mac_f32_e32 v45, v35, v35
v_mul_f32_e32 v51, s26, v51
v_cmp_lt_f32_e32 vcc, v45, v51
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[30:31], exec, s[4:5]
; mask branch BB7_30
s_cbranch_execz BB7_30
BB7_29: ; in Loop: Header=BB7_11 Depth=1
v_max_f32_e32 v45, 0x34cd15ae, v45
v_rsq_f32_e32 v79, v45
s_mov_b32 m0, -1
v_lshrrev_b32_e32 v70, 3, v19
ds_read_b64 v[72:73], v56 offset:192
v_and_b32_e32 v70, 1, v70
v_cmp_eq_u32_e32 vcc, 1, v70
v_mul_f32_e32 v80, v79, v79
v_cndmask_b32_e64 v70, 0, 1.0, vcc
v_mul_f32_e32 v81, v80, v80
v_mul_f32_e32 v81, v70, v81
v_mul_f32_e32 v51, v78, v82
v_mul_f32_e32 v82, v80, v81
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v73, v68, v73
v_mul_f32_e32 v83, v82, v73
v_mad_f32 v81, v81, v80, s42
v_mad_f32 v83, v72, v67, -v83
v_mad_f32 v84, v82, v82, s43
v_mul_f32_e32 v72, v67, v72
v_mul_f32_e32 v81, 0xbe2aaaab, v81
v_mul_f32_e32 v73, v84, v73
v_mul_f32_e32 v72, v72, v81
v_cmp_gt_f32_e32 vcc, s27, v45
v_mac_f32_e32 v72, 0x3daaaaaa, v73
v_cndmask_b32_e64 v73, 0, 1.0, vcc
v_mul_f32_e32 v81, v70, v73
v_mac_f32_e32 v8, v72, v81
v_mul_f32_e32 v72, v9, v45
v_mul_f32_e32 v81, v72, v72
v_mov_b32_e32 v84, 0x3a92b707
v_madak_f32_e32 v84, v84, v81, 0x3ded3cb2
v_mov_b32_e32 v85, 0x3c739487
v_madak_f32_e32 v85, v85, v81, 0x3f01e2bc
v_mad_f32 v84, v84, v81, 1.0
v_mac_f32_e32 v84, v72, v85
v_mov_b32_e32 v85, 0xb2951928
v_madak_f32_e32 v85, v85, v81, 0xb85ffb93
v_mov_b32_e32 v86, 0x35c55945
v_madak_f32_e32 v86, v86, v81, 0x3a83ca0c
v_madak_f32_e32 v85, v85, v81, 0xbc9ded90
v_madak_f32_e32 v86, v86, v81, 0x3d8eaf3b
v_madak_f32_e32 v81, v85, v81, 0xbf409397
v_mac_f32_e32 v81, v72, v86
v_rcp_f32_e32 v72, v84
v_mul_f32_e32 v45, s18, v45
v_mul_f32_e32 v45, v79, v45
v_mov_b32_e32 v84, 0x3fa00000
v_mul_f32_e32 v72, v37, v72
v_mul_f32_e32 v72, v81, v72
v_mul_f32_e32 v81, v70, v80
v_mac_f32_e32 v72, v79, v81
v_and_b32_e32 v81, s50, v45
v_cmp_gt_f32_e64 s[4:5], v84, v81
v_mul_f32_e32 v84, v81, v81
v_rcp_f32_e32 v85, v84
v_add_f32_e32 v86, -1.0, v81
v_mov_b32_e32 v87, 0xbd777f97
v_mov_b32_e32 v88, 0x6f800000
v_cndmask_b32_e64 v85, v85, v86, s[4:5]
v_mov_b32_e32 v86, 0x3f580000
v_cmp_gt_f32_e64 s[8:9], v86, v81
v_cndmask_b32_e64 v84, v85, v84, s[8:9]
v_mov_b32_e32 v86, 0xc11d077e
v_mov_b32_e32 v85, 0x4036db6e
v_madak_f32_e32 v86, v86, v84, 0xc2a2932b
v_cmp_gt_f32_e32 vcc, v85, v81
v_mov_b32_e32 v85, 0xc3f1c275
v_madak_f32_e32 v85, v85, v84, 0xc480230b
v_madak_f32_e32 v86, v84, v86, 0xc3389ae7
v_madak_f32_e32 v85, v84, v85, 0xc41f6441
v_madak_f32_e32 v86, v84, v86, 0xc322658c
v_madak_f32_e32 v85, v84, v85, 0xc320a2ea
v_madak_f32_e32 v86, v84, v86, 0xc2798057
v_madak_f32_e32 v85, v84, v85, 0xc18e104b
v_madak_f32_e32 v86, v84, v86, 0xc128f022
v_madak_f32_e32 v85, v84, v85, 0xbf4c9dd4
v_madak_f32_e32 v86, v84, v86, 0xbf31a0b7
v_madak_f32_e32 v85, v84, v85, 0xbc21a092
v_madak_f32_e32 v86, v84, v86, 0xbc21a093
v_madak_f32_e32 v87, v87, v84, 0x40d23f7c
v_cndmask_b32_e32 v85, v85, v86, vcc
v_mov_b32_e32 v86, 0xc1b38712
v_madak_f32_e32 v86, v86, v84, 0x43ed43a7
v_madak_f32_e32 v87, v84, v87, 0x42d9451f
v_madak_f32_e32 v86, v84, v86, 0x451f90ce
v_madak_f32_e32 v87, v84, v87, 0x43d6810b
v_madak_f32_e32 v86, v84, v86, 0x4547fdbb
v_madak_f32_e32 v87, v84, v87, 0x442158c9
v_madak_f32_e32 v86, v84, v86, 0x44c01759
v_madak_f32_e32 v87, v84, v87, 0x43d9486f
v_madak_f32_e32 v86, v84, v86, 0x43a2e571
v_madak_f32_e32 v87, v84, v87, 0x4309a863
v_madak_f32_e32 v86, v84, v86, 0x41f2b459
v_madak_f32_e32 v87, v84, v87, 0x419d35ce
v_cndmask_b32_e32 v86, v86, v87, vcc
v_mov_b32_e32 v87, 0xbb0df9c0
v_madak_f32_e32 v87, v87, v84, 0x3d1151b3
v_madak_f32_e32 v87, v84, v87, 0xbde31cc2
v_madak_f32_e32 v87, v84, v87, 0x3ea2fe54
v_madak_f32_e32 v87, v84, v87, 0xbebe9208
v_madak_f32_e32 v87, v84, v87, 0x3ed46805
v_madak_f32_e32 v87, v84, v87, 0xbb1acdc6
v_cndmask_b32_e64 v85, v85, v87, s[4:5]
v_mov_b32_e32 v87, 0x3c445aa3
v_madak_f32_e32 v87, v87, v84, 0x3c5f6e13
v_madak_f32_e32 v87, v84, v87, 0x3e013307
v_madak_f32_e32 v87, v84, v87, 0x3d931ae7
v_madak_f32_e32 v87, v84, v87, 0x3f0a5785
v_madak_f32_e32 v87, v84, v87, 0x3dd9f331
v_cndmask_b32_e64 v86, v86, v87, s[4:5]
v_mov_b32_e32 v87, 0xb684e21a
v_madak_f32_e32 v87, v87, v84, 0x390aee49
v_madak_f32_e32 v87, v84, v87, 0x3ba68116
v_madak_f32_e32 v87, v84, v87, 0x3d852a63
v_madak_f32_e32 v87, v84, v87, 0x3ecbbbce
v_cndmask_b32_e64 v86, v86, v87, s[8:9]
v_mad_f32 v86, v84, v86, 1.0
v_and_b32_e32 v91, s51, v45
v_mov_b32_e32 v92, 0xbf100000
v_cmp_gt_f32_e64 vcc, |v86|, v88
v_mov_b32_e32 v89, 0x2f800000
v_mad_f32 v92, v91, -v91, v92
v_cndmask_b32_e32 v90, 1.0, v89, vcc
v_cmp_gt_f32_e32 vcc, 0, v92
v_cndmask_b32_e64 v93, 0.5, -0.5, vcc
v_mov_b32_e32 v94, 0x3fb8aa3b
v_mac_f32_e32 v93, v94, v92
v_mov_b32_e32 v87, 0xb7c756b1
v_cvt_i32_f32_e32 v93, v93
v_madak_f32_e32 v87, v87, v84, 0xbbbd1489
v_madak_f32_e32 v87, v84, v87, 0xbce9528f
v_madak_f32_e32 v87, v84, v87, 0xbea66beb
v_mul_f32_e32 v86, v90, v86
v_madak_f32_e32 v84, v84, v87, 0x3e0375d4
v_rcp_f32_e32 v86, v86
v_cvt_f32_i32_e32 v87, v93
v_cndmask_b32_e64 v84, v85, v84, s[8:9]
v_mov_b32_e32 v85, 0xbf317180
v_mul_f32_e32 v84, v86, v84
v_mad_f32 v86, v85, v87, v92
v_mov_b32_e32 v95, 0xb717f7d1
v_mad_f32 v96, v95, v87, v86
v_mul_f32_e32 v97, v96, v96
v_mov_b32_e32 v98, 0xb5ddea0e
v_mov_b32_e32 v99, 0x3331bb4c
v_mad_f32 v100, v99, v97, v98
v_mov_b32_e32 v101, 0x388ab355
v_mad_f32 v100, v100, v97, v101
v_mov_b32_e32 v102, 0xbb360b61
v_mad_f32 v100, v100, v97, v102
v_mov_b32_e32 v103, 0x3e2aaaab
v_mad_f32 v100, v100, v97, v103
v_mad_f32 v97, -v97, v100, v96
v_mul_f32_e32 v96, v97, v96
v_sub_f32_e32 v97, 2.0, v97
v_cmp_gt_f32_e64 vcc, |v97|, v88
v_cndmask_b32_e32 v100, 1.0, v89, vcc
v_mul_f32_e32 v97, v100, v97
v_rcp_f32_e32 v97, v97
v_mul_f32_e32 v96, v97, v96
v_mul_f32_e32 v96, v96, v100
v_mad_f32 v87, -v87, v95, -v96
v_subrev_f32_e32 v86, v86, v87
v_lshlrev_b32_e32 v87, 23, v93
v_sub_f32_e32 v86, 1.0, v86
v_add_i32_e32 v86, vcc, v86, v87
v_mov_b32_e32 v87, 0xc2aeac4f
v_cmp_ge_f32_e32 vcc, v92, v87
v_mov_b32_e32 v93, 0x42b17218
v_cndmask_b32_e32 v86, 0, v86, vcc
v_cmp_lt_f32_e32 vcc, v92, v93
v_mov_b32_e32 v96, 0x7f800000
v_cndmask_b32_e32 v86, v96, v86, vcc
v_cmp_u_f32_e32 vcc, v92, v92
v_cndmask_b32_e32 v86, v86, v92, vcc
v_subrev_f32_e32 v92, v81, v91
v_mul_f32_e32 v97, v84, v90
v_add_f32_e32 v91, v81, v91
v_mad_f32 v91, v91, v92, v97
v_cmp_gt_f32_e32 vcc, 0, v91
v_cndmask_b32_e64 v92, 0.5, -0.5, vcc
v_mac_f32_e32 v92, v94, v91
v_cvt_i32_f32_e32 v92, v92
v_madak_f32_e32 v84, v90, v84, 0x3f58560b
v_cvt_f32_i32_e32 v94, v92
v_lshlrev_b32_e32 v92, 23, v92
v_mad_f32 v85, v85, v94, v91
v_mad_f32 v100, v95, v94, v85
v_mul_f32_e32 v104, v100, v100
v_mac_f32_e32 v98, v99, v104
v_mac_f32_e32 v101, v98, v104
v_mac_f32_e32 v102, v101, v104
v_mac_f32_e32 v103, v102, v104
v_mad_f32 v98, -v104, v103, v100
v_mul_f32_e32 v99, v98, v100
v_sub_f32_e32 v98, 2.0, v98
v_cmp_gt_f32_e64 vcc, |v98|, v88
v_cndmask_b32_e32 v100, 1.0, v89, vcc
v_mul_f32_e32 v98, v100, v98
v_rcp_f32_e32 v98, v98
v_mul_f32_e32 v98, v98, v99
v_mul_f32_e32 v98, v98, v100
v_mad_f32 v94, -v94, v95, -v98
v_subrev_f32_e32 v85, v85, v94
v_sub_f32_e32 v85, 1.0, v85
v_add_i32_e32 v85, vcc, v85, v92
v_cmp_ge_f32_e32 vcc, v91, v87
v_cndmask_b32_e32 v85, 0, v85, vcc
v_cmp_lt_f32_e32 vcc, v91, v93
v_cndmask_b32_e32 v85, v96, v85, vcc
v_cmp_u_f32_e32 vcc, v91, v91
v_cndmask_b32_e32 v85, v85, v91, vcc
v_cmp_gt_f32_e64 vcc, |v81|, v88
v_mul_f32_e32 v85, v85, v86
v_cndmask_b32_e32 v86, 1.0, v89, vcc
v_mul_f32_e32 v87, v86, v81
v_rcp_f32_e32 v87, v87
v_mov_b32_e32 v88, 0x40c00000
v_cmp_gt_f32_e32 vcc, v88, v81
v_mov_b32_e32 v88, 0x31800000
v_cmp_gt_f32_e64 s[10:11], v88, v81
v_mul_f32_e32 v81, v87, v85
v_mad_f32 v81, -v86, v81, 1.0
v_cndmask_b32_e32 v81, 1.0, v81, vcc
v_cndmask_b32_e64 v81, v81, v84, s[4:5]
v_and_b32_e32 v84, s52, v45
v_or_b32_e32 v81, v84, v81
v_mad_f32 v84, v97, v45, v45
v_cndmask_b32_e64 v81, v81, v84, s[8:9]
v_mul_f32_e32 v84, 0x3f8375d4, v45
v_mac_f32_e32 v84, 0x41000000, v45
v_mul_f32_e32 v84, 0x3e000000, v84
v_cndmask_b32_e64 v81, v81, v84, s[10:11]
v_cmp_u_f32_e32 vcc, v45, v45
v_cndmask_b32_e32 v45, v81, v45, vcc
v_subrev_f32_e32 v45, v45, v70
v_mul_f32_e32 v70, s19, v70
v_mad_f32 v45, v79, v45, -v70
v_mul_f32_e32 v70, v73, v80
v_mul_f32_e32 v70, v82, v70
v_mac_f32_e32 v5, v45, v51
v_mul_f32_e32 v45, v83, v70
v_mad_f32 v45, v51, v72, -v45
v_mul_f32_e32 v51, v72, v51
v_mad_f32 v51, v70, v83, -v51
v_mad_f32 v74, v35, v51, v74
v_mad_f32 v71, v31, v51, v71
v_mac_f32_e32 v69, v27, v51
v_mad_f32 v30, v35, v45, v30
v_mad_f32 v29, v31, v45, v29
v_mac_f32_e32 v28, v27, v45
BB7_30: ; %Flow1250
; in Loop: Header=BB7_11 Depth=1
s_or_b64 exec, exec, s[30:31]
BB7_31: ; in Loop: Header=BB7_11 Depth=1
s_or_b64 exec, exec, s[12:13]
v_lshrrev_b32_e32 v27, 4, v65
v_and_b32_e32 v27, 1, v27
v_cmp_eq_u32_e32 vcc, 1, v27
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[12:13], exec, s[4:5]
; mask branch BB7_35
s_cbranch_execz BB7_35
BB7_32: ; in Loop: Header=BB7_11 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[79:82], v55 offset0:64 offset1:65
s_and_b64 s[4:5], exec, s[0:1]
v_cmp_ne_u32_e32 vcc, v61, v23
s_xor_b64 s[4:5], s[4:5], -1
s_or_b64 s[4:5], s[4:5], vcc
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v31, v76, v80
s_and_b64 s[8:9], exec, s[2:3]
s_or_b64 s[4:5], s[8:9], s[4:5]
v_subrev_f32_e32 v27, v75, v79
v_mul_f32_e32 v45, v31, v31
v_cndmask_b32_e64 v51, 0, 1.0, s[4:5]
v_subrev_f32_e32 v35, v77, v81
v_mac_f32_e32 v45, v27, v27
v_mac_f32_e32 v45, v35, v35
v_mul_f32_e32 v51, s26, v51
v_cmp_lt_f32_e32 vcc, v45, v51
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[30:31], exec, s[4:5]
; mask branch BB7_34
s_cbranch_execz BB7_34
BB7_33: ; in Loop: Header=BB7_11 Depth=1
v_max_f32_e32 v45, 0x34cd15ae, v45
v_rsq_f32_e32 v79, v45
s_mov_b32 m0, -1
v_lshrrev_b32_e32 v70, 4, v19
ds_read_b64 v[72:73], v56 offset:256
v_and_b32_e32 v70, 1, v70
v_cmp_eq_u32_e32 vcc, 1, v70
v_mul_f32_e32 v80, v79, v79
v_cndmask_b32_e64 v70, 0, 1.0, vcc
v_mul_f32_e32 v81, v80, v80
v_mul_f32_e32 v81, v70, v81
v_mul_f32_e32 v51, v78, v82
v_mul_f32_e32 v82, v80, v81
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v73, v68, v73
v_mul_f32_e32 v83, v82, v73
v_mad_f32 v81, v81, v80, s42
v_mad_f32 v83, v72, v67, -v83
v_mad_f32 v84, v82, v82, s43
v_mul_f32_e32 v72, v67, v72
v_mul_f32_e32 v81, 0xbe2aaaab, v81
v_mul_f32_e32 v73, v84, v73
v_mul_f32_e32 v72, v72, v81
v_cmp_gt_f32_e32 vcc, s27, v45
v_mac_f32_e32 v72, 0x3daaaaaa, v73
v_cndmask_b32_e64 v73, 0, 1.0, vcc
v_mul_f32_e32 v81, v70, v73
v_mac_f32_e32 v8, v72, v81
v_mul_f32_e32 v72, v9, v45
v_mul_f32_e32 v81, v72, v72
v_mov_b32_e32 v84, 0x3a92b707
v_madak_f32_e32 v84, v84, v81, 0x3ded3cb2
v_mov_b32_e32 v85, 0x3c739487
v_madak_f32_e32 v85, v85, v81, 0x3f01e2bc
v_mad_f32 v84, v84, v81, 1.0
v_mac_f32_e32 v84, v72, v85
v_mov_b32_e32 v85, 0xb2951928
v_madak_f32_e32 v85, v85, v81, 0xb85ffb93
v_mov_b32_e32 v86, 0x35c55945
v_madak_f32_e32 v86, v86, v81, 0x3a83ca0c
v_madak_f32_e32 v85, v85, v81, 0xbc9ded90
v_madak_f32_e32 v86, v86, v81, 0x3d8eaf3b
v_madak_f32_e32 v81, v85, v81, 0xbf409397
v_mac_f32_e32 v81, v72, v86
v_rcp_f32_e32 v72, v84
v_mul_f32_e32 v45, s18, v45
v_mul_f32_e32 v45, v79, v45
v_mov_b32_e32 v84, 0x3fa00000
v_mul_f32_e32 v72, v37, v72
v_mul_f32_e32 v72, v81, v72
v_mul_f32_e32 v81, v70, v80
v_mac_f32_e32 v72, v79, v81
v_and_b32_e32 v81, s50, v45
v_cmp_gt_f32_e64 s[4:5], v84, v81
v_mul_f32_e32 v84, v81, v81
v_rcp_f32_e32 v85, v84
v_add_f32_e32 v86, -1.0, v81
v_mov_b32_e32 v87, 0xbd777f97
v_mov_b32_e32 v88, 0x6f800000
v_cndmask_b32_e64 v85, v85, v86, s[4:5]
v_mov_b32_e32 v86, 0x3f580000
v_cmp_gt_f32_e64 s[8:9], v86, v81
v_cndmask_b32_e64 v84, v85, v84, s[8:9]
v_mov_b32_e32 v86, 0xc11d077e
v_mov_b32_e32 v85, 0x4036db6e
v_madak_f32_e32 v86, v86, v84, 0xc2a2932b
v_cmp_gt_f32_e32 vcc, v85, v81
v_mov_b32_e32 v85, 0xc3f1c275
v_madak_f32_e32 v85, v85, v84, 0xc480230b
v_madak_f32_e32 v86, v84, v86, 0xc3389ae7
v_madak_f32_e32 v85, v84, v85, 0xc41f6441
v_madak_f32_e32 v86, v84, v86, 0xc322658c
v_madak_f32_e32 v85, v84, v85, 0xc320a2ea
v_madak_f32_e32 v86, v84, v86, 0xc2798057
v_madak_f32_e32 v85, v84, v85, 0xc18e104b
v_madak_f32_e32 v86, v84, v86, 0xc128f022
v_madak_f32_e32 v85, v84, v85, 0xbf4c9dd4
v_madak_f32_e32 v86, v84, v86, 0xbf31a0b7
v_madak_f32_e32 v85, v84, v85, 0xbc21a092
v_madak_f32_e32 v86, v84, v86, 0xbc21a093
v_madak_f32_e32 v87, v87, v84, 0x40d23f7c
v_cndmask_b32_e32 v85, v85, v86, vcc
v_mov_b32_e32 v86, 0xc1b38712
v_madak_f32_e32 v86, v86, v84, 0x43ed43a7
v_madak_f32_e32 v87, v84, v87, 0x42d9451f
v_madak_f32_e32 v86, v84, v86, 0x451f90ce
v_madak_f32_e32 v87, v84, v87, 0x43d6810b
v_madak_f32_e32 v86, v84, v86, 0x4547fdbb
v_madak_f32_e32 v87, v84, v87, 0x442158c9
v_madak_f32_e32 v86, v84, v86, 0x44c01759
v_madak_f32_e32 v87, v84, v87, 0x43d9486f
v_madak_f32_e32 v86, v84, v86, 0x43a2e571
v_madak_f32_e32 v87, v84, v87, 0x4309a863
v_madak_f32_e32 v86, v84, v86, 0x41f2b459
v_madak_f32_e32 v87, v84, v87, 0x419d35ce
v_cndmask_b32_e32 v86, v86, v87, vcc
v_mov_b32_e32 v87, 0xbb0df9c0
v_madak_f32_e32 v87, v87, v84, 0x3d1151b3
v_madak_f32_e32 v87, v84, v87, 0xbde31cc2
v_madak_f32_e32 v87, v84, v87, 0x3ea2fe54
v_madak_f32_e32 v87, v84, v87, 0xbebe9208
v_madak_f32_e32 v87, v84, v87, 0x3ed46805
v_madak_f32_e32 v87, v84, v87, 0xbb1acdc6
v_cndmask_b32_e64 v85, v85, v87, s[4:5]
v_mov_b32_e32 v87, 0x3c445aa3
v_madak_f32_e32 v87, v87, v84, 0x3c5f6e13
v_madak_f32_e32 v87, v84, v87, 0x3e013307
v_madak_f32_e32 v87, v84, v87, 0x3d931ae7
v_madak_f32_e32 v87, v84, v87, 0x3f0a5785
v_madak_f32_e32 v87, v84, v87, 0x3dd9f331
v_cndmask_b32_e64 v86, v86, v87, s[4:5]
v_mov_b32_e32 v87, 0xb684e21a
v_madak_f32_e32 v87, v87, v84, 0x390aee49
v_madak_f32_e32 v87, v84, v87, 0x3ba68116
v_madak_f32_e32 v87, v84, v87, 0x3d852a63
v_madak_f32_e32 v87, v84, v87, 0x3ecbbbce
v_cndmask_b32_e64 v86, v86, v87, s[8:9]
v_mad_f32 v86, v84, v86, 1.0
v_and_b32_e32 v91, s51, v45
v_mov_b32_e32 v92, 0xbf100000
v_cmp_gt_f32_e64 vcc, |v86|, v88
v_mov_b32_e32 v89, 0x2f800000
v_mad_f32 v92, v91, -v91, v92
v_cndmask_b32_e32 v90, 1.0, v89, vcc
v_cmp_gt_f32_e32 vcc, 0, v92
v_cndmask_b32_e64 v93, 0.5, -0.5, vcc
v_mov_b32_e32 v94, 0x3fb8aa3b
v_mac_f32_e32 v93, v94, v92
v_mov_b32_e32 v87, 0xb7c756b1
v_cvt_i32_f32_e32 v93, v93
v_madak_f32_e32 v87, v87, v84, 0xbbbd1489
v_madak_f32_e32 v87, v84, v87, 0xbce9528f
v_madak_f32_e32 v87, v84, v87, 0xbea66beb
v_mul_f32_e32 v86, v90, v86
v_madak_f32_e32 v84, v84, v87, 0x3e0375d4
v_rcp_f32_e32 v86, v86
v_cvt_f32_i32_e32 v87, v93
v_cndmask_b32_e64 v84, v85, v84, s[8:9]
v_mov_b32_e32 v85, 0xbf317180
v_mul_f32_e32 v84, v86, v84
v_mad_f32 v86, v85, v87, v92
v_mov_b32_e32 v95, 0xb717f7d1
v_mad_f32 v96, v95, v87, v86
v_mul_f32_e32 v97, v96, v96
v_mov_b32_e32 v98, 0xb5ddea0e
v_mov_b32_e32 v99, 0x3331bb4c
v_mad_f32 v100, v99, v97, v98
v_mov_b32_e32 v101, 0x388ab355
v_mad_f32 v100, v100, v97, v101
v_mov_b32_e32 v102, 0xbb360b61
v_mad_f32 v100, v100, v97, v102
v_mov_b32_e32 v103, 0x3e2aaaab
v_mad_f32 v100, v100, v97, v103
v_mad_f32 v97, -v97, v100, v96
v_mul_f32_e32 v96, v97, v96
v_sub_f32_e32 v97, 2.0, v97
v_cmp_gt_f32_e64 vcc, |v97|, v88
v_cndmask_b32_e32 v100, 1.0, v89, vcc
v_mul_f32_e32 v97, v100, v97
v_rcp_f32_e32 v97, v97
v_mul_f32_e32 v96, v97, v96
v_mul_f32_e32 v96, v96, v100
v_mad_f32 v87, -v87, v95, -v96
v_subrev_f32_e32 v86, v86, v87
v_lshlrev_b32_e32 v87, 23, v93
v_sub_f32_e32 v86, 1.0, v86
v_add_i32_e32 v86, vcc, v86, v87
v_mov_b32_e32 v87, 0xc2aeac4f
v_cmp_ge_f32_e32 vcc, v92, v87
v_mov_b32_e32 v93, 0x42b17218
v_cndmask_b32_e32 v86, 0, v86, vcc
v_cmp_lt_f32_e32 vcc, v92, v93
v_mov_b32_e32 v96, 0x7f800000
v_cndmask_b32_e32 v86, v96, v86, vcc
v_cmp_u_f32_e32 vcc, v92, v92
v_cndmask_b32_e32 v86, v86, v92, vcc
v_subrev_f32_e32 v92, v81, v91
v_mul_f32_e32 v97, v84, v90
v_add_f32_e32 v91, v81, v91
v_mad_f32 v91, v91, v92, v97
v_cmp_gt_f32_e32 vcc, 0, v91
v_cndmask_b32_e64 v92, 0.5, -0.5, vcc
v_mac_f32_e32 v92, v94, v91
v_cvt_i32_f32_e32 v92, v92
v_madak_f32_e32 v84, v90, v84, 0x3f58560b
v_cvt_f32_i32_e32 v94, v92
v_lshlrev_b32_e32 v92, 23, v92
v_mad_f32 v85, v85, v94, v91
v_mad_f32 v100, v95, v94, v85
v_mul_f32_e32 v104, v100, v100
v_mac_f32_e32 v98, v99, v104
v_mac_f32_e32 v101, v98, v104
v_mac_f32_e32 v102, v101, v104
v_mac_f32_e32 v103, v102, v104
v_mad_f32 v98, -v104, v103, v100
v_mul_f32_e32 v99, v98, v100
v_sub_f32_e32 v98, 2.0, v98
v_cmp_gt_f32_e64 vcc, |v98|, v88
v_cndmask_b32_e32 v100, 1.0, v89, vcc
v_mul_f32_e32 v98, v100, v98
v_rcp_f32_e32 v98, v98
v_mul_f32_e32 v98, v98, v99
v_mul_f32_e32 v98, v98, v100
v_mad_f32 v94, -v94, v95, -v98
v_subrev_f32_e32 v85, v85, v94
v_sub_f32_e32 v85, 1.0, v85
v_add_i32_e32 v85, vcc, v85, v92
v_cmp_ge_f32_e32 vcc, v91, v87
v_cndmask_b32_e32 v85, 0, v85, vcc
v_cmp_lt_f32_e32 vcc, v91, v93
v_cndmask_b32_e32 v85, v96, v85, vcc
v_cmp_u_f32_e32 vcc, v91, v91
v_cndmask_b32_e32 v85, v85, v91, vcc
v_cmp_gt_f32_e64 vcc, |v81|, v88
v_mul_f32_e32 v85, v85, v86
v_cndmask_b32_e32 v86, 1.0, v89, vcc
v_mul_f32_e32 v87, v86, v81
v_rcp_f32_e32 v87, v87
v_mov_b32_e32 v88, 0x40c00000
v_cmp_gt_f32_e32 vcc, v88, v81
v_mov_b32_e32 v88, 0x31800000
v_cmp_gt_f32_e64 s[10:11], v88, v81
v_mul_f32_e32 v81, v87, v85
v_mad_f32 v81, -v86, v81, 1.0
v_cndmask_b32_e32 v81, 1.0, v81, vcc
v_cndmask_b32_e64 v81, v81, v84, s[4:5]
v_and_b32_e32 v84, s52, v45
v_or_b32_e32 v81, v84, v81
v_mad_f32 v84, v97, v45, v45
v_cndmask_b32_e64 v81, v81, v84, s[8:9]
v_mul_f32_e32 v84, 0x3f8375d4, v45
v_mac_f32_e32 v84, 0x41000000, v45
v_mul_f32_e32 v84, 0x3e000000, v84
v_cndmask_b32_e64 v81, v81, v84, s[10:11]
v_cmp_u_f32_e32 vcc, v45, v45
v_cndmask_b32_e32 v45, v81, v45, vcc
v_subrev_f32_e32 v45, v45, v70
v_mul_f32_e32 v70, s19, v70
v_mad_f32 v45, v79, v45, -v70
v_mul_f32_e32 v70, v73, v80
v_mul_f32_e32 v70, v82, v70
v_mac_f32_e32 v5, v45, v51
v_mul_f32_e32 v45, v83, v70
v_mad_f32 v45, v51, v72, -v45
v_mul_f32_e32 v51, v72, v51
v_mad_f32 v51, v70, v83, -v51
v_mad_f32 v74, v35, v51, v74
v_mad_f32 v71, v31, v51, v71
v_mac_f32_e32 v69, v27, v51
v_mad_f32 v26, v35, v45, v26
v_mad_f32 v25, v31, v45, v25
v_mac_f32_e32 v24, v27, v45
BB7_34: ; %Flow1249
; in Loop: Header=BB7_11 Depth=1
s_or_b64 exec, exec, s[30:31]
BB7_35: ; in Loop: Header=BB7_11 Depth=1
s_or_b64 exec, exec, s[12:13]
v_lshrrev_b32_e32 v27, 5, v65
v_and_b32_e32 v27, 1, v27
v_cmp_eq_u32_e32 vcc, 1, v27
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[12:13], exec, s[4:5]
; mask branch BB7_39
s_cbranch_execz BB7_39
BB7_36: ; in Loop: Header=BB7_11 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[79:82], v55 offset0:80 offset1:81
s_and_b64 s[4:5], exec, s[0:1]
v_cmp_ne_u32_e32 vcc, v60, v23
s_xor_b64 s[4:5], s[4:5], -1
s_or_b64 s[4:5], s[4:5], vcc
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v31, v76, v80
s_and_b64 s[8:9], exec, s[2:3]
s_or_b64 s[4:5], s[8:9], s[4:5]
v_subrev_f32_e32 v27, v75, v79
v_mul_f32_e32 v45, v31, v31
v_cndmask_b32_e64 v51, 0, 1.0, s[4:5]
v_subrev_f32_e32 v35, v77, v81
v_mac_f32_e32 v45, v27, v27
v_mac_f32_e32 v45, v35, v35
v_mul_f32_e32 v51, s26, v51
v_cmp_lt_f32_e32 vcc, v45, v51
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[30:31], exec, s[4:5]
; mask branch BB7_38
s_cbranch_execz BB7_38
BB7_37: ; in Loop: Header=BB7_11 Depth=1
v_max_f32_e32 v45, 0x34cd15ae, v45
v_rsq_f32_e32 v79, v45
s_mov_b32 m0, -1
v_lshrrev_b32_e32 v70, 5, v19
ds_read_b64 v[72:73], v56 offset:320
v_and_b32_e32 v70, 1, v70
v_cmp_eq_u32_e32 vcc, 1, v70
v_mul_f32_e32 v80, v79, v79
v_cndmask_b32_e64 v70, 0, 1.0, vcc
v_mul_f32_e32 v81, v80, v80
v_mul_f32_e32 v81, v70, v81
v_mul_f32_e32 v51, v78, v82
v_mul_f32_e32 v82, v80, v81
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v73, v68, v73
v_mul_f32_e32 v83, v82, v73
v_mad_f32 v81, v81, v80, s42
v_mad_f32 v83, v72, v67, -v83
v_mad_f32 v84, v82, v82, s43
v_mul_f32_e32 v72, v67, v72
v_mul_f32_e32 v81, 0xbe2aaaab, v81
v_mul_f32_e32 v73, v84, v73
v_mul_f32_e32 v72, v72, v81
v_cmp_gt_f32_e32 vcc, s27, v45
v_mac_f32_e32 v72, 0x3daaaaaa, v73
v_cndmask_b32_e64 v73, 0, 1.0, vcc
v_mul_f32_e32 v81, v70, v73
v_mac_f32_e32 v8, v72, v81
v_mul_f32_e32 v72, v9, v45
v_mul_f32_e32 v81, v72, v72
v_mov_b32_e32 v84, 0x3a92b707
v_madak_f32_e32 v84, v84, v81, 0x3ded3cb2
v_mov_b32_e32 v85, 0x3c739487
v_madak_f32_e32 v85, v85, v81, 0x3f01e2bc
v_mad_f32 v84, v84, v81, 1.0
v_mac_f32_e32 v84, v72, v85
v_mov_b32_e32 v85, 0xb2951928
v_madak_f32_e32 v85, v85, v81, 0xb85ffb93
v_mov_b32_e32 v86, 0x35c55945
v_madak_f32_e32 v86, v86, v81, 0x3a83ca0c
v_madak_f32_e32 v85, v85, v81, 0xbc9ded90
v_madak_f32_e32 v86, v86, v81, 0x3d8eaf3b
v_madak_f32_e32 v81, v85, v81, 0xbf409397
v_mac_f32_e32 v81, v72, v86
v_rcp_f32_e32 v72, v84
v_mul_f32_e32 v45, s18, v45
v_mul_f32_e32 v45, v79, v45
v_mov_b32_e32 v84, 0x3fa00000
v_mul_f32_e32 v72, v37, v72
v_mul_f32_e32 v72, v81, v72
v_mul_f32_e32 v81, v70, v80
v_mac_f32_e32 v72, v79, v81
v_and_b32_e32 v81, s50, v45
v_cmp_gt_f32_e64 s[4:5], v84, v81
v_mul_f32_e32 v84, v81, v81
v_rcp_f32_e32 v85, v84
v_add_f32_e32 v86, -1.0, v81
v_mov_b32_e32 v87, 0xbd777f97
v_mov_b32_e32 v88, 0x6f800000
v_cndmask_b32_e64 v85, v85, v86, s[4:5]
v_mov_b32_e32 v86, 0x3f580000
v_cmp_gt_f32_e64 s[8:9], v86, v81
v_cndmask_b32_e64 v84, v85, v84, s[8:9]
v_mov_b32_e32 v86, 0xc11d077e
v_mov_b32_e32 v85, 0x4036db6e
v_madak_f32_e32 v86, v86, v84, 0xc2a2932b
v_cmp_gt_f32_e32 vcc, v85, v81
v_mov_b32_e32 v85, 0xc3f1c275
v_madak_f32_e32 v85, v85, v84, 0xc480230b
v_madak_f32_e32 v86, v84, v86, 0xc3389ae7
v_madak_f32_e32 v85, v84, v85, 0xc41f6441
v_madak_f32_e32 v86, v84, v86, 0xc322658c
v_madak_f32_e32 v85, v84, v85, 0xc320a2ea
v_madak_f32_e32 v86, v84, v86, 0xc2798057
v_madak_f32_e32 v85, v84, v85, 0xc18e104b
v_madak_f32_e32 v86, v84, v86, 0xc128f022
v_madak_f32_e32 v85, v84, v85, 0xbf4c9dd4
v_madak_f32_e32 v86, v84, v86, 0xbf31a0b7
v_madak_f32_e32 v85, v84, v85, 0xbc21a092
v_madak_f32_e32 v86, v84, v86, 0xbc21a093
v_madak_f32_e32 v87, v87, v84, 0x40d23f7c
v_cndmask_b32_e32 v85, v85, v86, vcc
v_mov_b32_e32 v86, 0xc1b38712
v_madak_f32_e32 v86, v86, v84, 0x43ed43a7
v_madak_f32_e32 v87, v84, v87, 0x42d9451f
v_madak_f32_e32 v86, v84, v86, 0x451f90ce
v_madak_f32_e32 v87, v84, v87, 0x43d6810b
v_madak_f32_e32 v86, v84, v86, 0x4547fdbb
v_madak_f32_e32 v87, v84, v87, 0x442158c9
v_madak_f32_e32 v86, v84, v86, 0x44c01759
v_madak_f32_e32 v87, v84, v87, 0x43d9486f
v_madak_f32_e32 v86, v84, v86, 0x43a2e571
v_madak_f32_e32 v87, v84, v87, 0x4309a863
v_madak_f32_e32 v86, v84, v86, 0x41f2b459
v_madak_f32_e32 v87, v84, v87, 0x419d35ce
v_cndmask_b32_e32 v86, v86, v87, vcc
v_mov_b32_e32 v87, 0xbb0df9c0
v_madak_f32_e32 v87, v87, v84, 0x3d1151b3
v_madak_f32_e32 v87, v84, v87, 0xbde31cc2
v_madak_f32_e32 v87, v84, v87, 0x3ea2fe54
v_madak_f32_e32 v87, v84, v87, 0xbebe9208
v_madak_f32_e32 v87, v84, v87, 0x3ed46805
v_madak_f32_e32 v87, v84, v87, 0xbb1acdc6
v_cndmask_b32_e64 v85, v85, v87, s[4:5]
v_mov_b32_e32 v87, 0x3c445aa3
v_madak_f32_e32 v87, v87, v84, 0x3c5f6e13
v_madak_f32_e32 v87, v84, v87, 0x3e013307
v_madak_f32_e32 v87, v84, v87, 0x3d931ae7
v_madak_f32_e32 v87, v84, v87, 0x3f0a5785
v_madak_f32_e32 v87, v84, v87, 0x3dd9f331
v_cndmask_b32_e64 v86, v86, v87, s[4:5]
v_mov_b32_e32 v87, 0xb684e21a
v_madak_f32_e32 v87, v87, v84, 0x390aee49
v_madak_f32_e32 v87, v84, v87, 0x3ba68116
v_madak_f32_e32 v87, v84, v87, 0x3d852a63
v_madak_f32_e32 v87, v84, v87, 0x3ecbbbce
v_cndmask_b32_e64 v86, v86, v87, s[8:9]
v_mad_f32 v86, v84, v86, 1.0
v_and_b32_e32 v91, s51, v45
v_mov_b32_e32 v92, 0xbf100000
v_cmp_gt_f32_e64 vcc, |v86|, v88
v_mov_b32_e32 v89, 0x2f800000
v_mad_f32 v92, v91, -v91, v92
v_cndmask_b32_e32 v90, 1.0, v89, vcc
v_cmp_gt_f32_e32 vcc, 0, v92
v_cndmask_b32_e64 v93, 0.5, -0.5, vcc
v_mov_b32_e32 v94, 0x3fb8aa3b
v_mac_f32_e32 v93, v94, v92
v_mov_b32_e32 v87, 0xb7c756b1
v_cvt_i32_f32_e32 v93, v93
v_madak_f32_e32 v87, v87, v84, 0xbbbd1489
v_madak_f32_e32 v87, v84, v87, 0xbce9528f
v_madak_f32_e32 v87, v84, v87, 0xbea66beb
v_mul_f32_e32 v86, v90, v86
v_madak_f32_e32 v84, v84, v87, 0x3e0375d4
v_rcp_f32_e32 v86, v86
v_cvt_f32_i32_e32 v87, v93
v_cndmask_b32_e64 v84, v85, v84, s[8:9]
v_mov_b32_e32 v85, 0xbf317180
v_mul_f32_e32 v84, v86, v84
v_mad_f32 v86, v85, v87, v92
v_mov_b32_e32 v95, 0xb717f7d1
v_mad_f32 v96, v95, v87, v86
v_mul_f32_e32 v97, v96, v96
v_mov_b32_e32 v98, 0xb5ddea0e
v_mov_b32_e32 v99, 0x3331bb4c
v_mad_f32 v100, v99, v97, v98
v_mov_b32_e32 v101, 0x388ab355
v_mad_f32 v100, v100, v97, v101
v_mov_b32_e32 v102, 0xbb360b61
v_mad_f32 v100, v100, v97, v102
v_mov_b32_e32 v103, 0x3e2aaaab
v_mad_f32 v100, v100, v97, v103
v_mad_f32 v97, -v97, v100, v96
v_mul_f32_e32 v96, v97, v96
v_sub_f32_e32 v97, 2.0, v97
v_cmp_gt_f32_e64 vcc, |v97|, v88
v_cndmask_b32_e32 v100, 1.0, v89, vcc
v_mul_f32_e32 v97, v100, v97
v_rcp_f32_e32 v97, v97
v_mul_f32_e32 v96, v97, v96
v_mul_f32_e32 v96, v96, v100
v_mad_f32 v87, -v87, v95, -v96
v_subrev_f32_e32 v86, v86, v87
v_lshlrev_b32_e32 v87, 23, v93
v_sub_f32_e32 v86, 1.0, v86
v_add_i32_e32 v86, vcc, v86, v87
v_mov_b32_e32 v87, 0xc2aeac4f
v_cmp_ge_f32_e32 vcc, v92, v87
v_mov_b32_e32 v93, 0x42b17218
v_cndmask_b32_e32 v86, 0, v86, vcc
v_cmp_lt_f32_e32 vcc, v92, v93
v_mov_b32_e32 v96, 0x7f800000
v_cndmask_b32_e32 v86, v96, v86, vcc
v_cmp_u_f32_e32 vcc, v92, v92
v_cndmask_b32_e32 v86, v86, v92, vcc
v_subrev_f32_e32 v92, v81, v91
v_mul_f32_e32 v97, v84, v90
v_add_f32_e32 v91, v81, v91
v_mad_f32 v91, v91, v92, v97
v_cmp_gt_f32_e32 vcc, 0, v91
v_cndmask_b32_e64 v92, 0.5, -0.5, vcc
v_mac_f32_e32 v92, v94, v91
v_cvt_i32_f32_e32 v92, v92
v_madak_f32_e32 v84, v90, v84, 0x3f58560b
v_cvt_f32_i32_e32 v94, v92
v_lshlrev_b32_e32 v92, 23, v92
v_mad_f32 v85, v85, v94, v91
v_mad_f32 v100, v95, v94, v85
v_mul_f32_e32 v104, v100, v100
v_mac_f32_e32 v98, v99, v104
v_mac_f32_e32 v101, v98, v104
v_mac_f32_e32 v102, v101, v104
v_mac_f32_e32 v103, v102, v104
v_mad_f32 v98, -v104, v103, v100
v_mul_f32_e32 v99, v98, v100
v_sub_f32_e32 v98, 2.0, v98
v_cmp_gt_f32_e64 vcc, |v98|, v88
v_cndmask_b32_e32 v100, 1.0, v89, vcc
v_mul_f32_e32 v98, v100, v98
v_rcp_f32_e32 v98, v98
v_mul_f32_e32 v98, v98, v99
v_mul_f32_e32 v98, v98, v100
v_mad_f32 v94, -v94, v95, -v98
v_subrev_f32_e32 v85, v85, v94
v_sub_f32_e32 v85, 1.0, v85
v_add_i32_e32 v85, vcc, v85, v92
v_cmp_ge_f32_e32 vcc, v91, v87
v_cndmask_b32_e32 v85, 0, v85, vcc
v_cmp_lt_f32_e32 vcc, v91, v93
v_cndmask_b32_e32 v85, v96, v85, vcc
v_cmp_u_f32_e32 vcc, v91, v91
v_cndmask_b32_e32 v85, v85, v91, vcc
v_cmp_gt_f32_e64 vcc, |v81|, v88
v_mul_f32_e32 v85, v85, v86
v_cndmask_b32_e32 v86, 1.0, v89, vcc
v_mul_f32_e32 v87, v86, v81
v_rcp_f32_e32 v87, v87
v_mov_b32_e32 v88, 0x40c00000
v_cmp_gt_f32_e32 vcc, v88, v81
v_mov_b32_e32 v88, 0x31800000
v_cmp_gt_f32_e64 s[10:11], v88, v81
v_mul_f32_e32 v81, v87, v85
v_mad_f32 v81, -v86, v81, 1.0
v_cndmask_b32_e32 v81, 1.0, v81, vcc
v_cndmask_b32_e64 v81, v81, v84, s[4:5]
v_and_b32_e32 v84, s52, v45
v_or_b32_e32 v81, v84, v81
v_mad_f32 v84, v97, v45, v45
v_cndmask_b32_e64 v81, v81, v84, s[8:9]
v_mul_f32_e32 v84, 0x3f8375d4, v45
v_mac_f32_e32 v84, 0x41000000, v45
v_mul_f32_e32 v84, 0x3e000000, v84
v_cndmask_b32_e64 v81, v81, v84, s[10:11]
v_cmp_u_f32_e32 vcc, v45, v45
v_cndmask_b32_e32 v45, v81, v45, vcc
v_subrev_f32_e32 v45, v45, v70
v_mul_f32_e32 v70, s19, v70
v_mad_f32 v45, v79, v45, -v70
v_mul_f32_e32 v70, v73, v80
v_mul_f32_e32 v70, v82, v70
v_mac_f32_e32 v5, v45, v51
v_mul_f32_e32 v45, v83, v70
v_mad_f32 v45, v51, v72, -v45
v_mul_f32_e32 v51, v72, v51
v_mad_f32 v51, v70, v83, -v51
v_mad_f32 v74, v35, v51, v74
v_mad_f32 v71, v31, v51, v71
v_mac_f32_e32 v69, v27, v51
v_mad_f32 v22, v35, v45, v22
v_mad_f32 v21, v31, v45, v21
v_mac_f32_e32 v20, v27, v45
BB7_38: ; %Flow1248
; in Loop: Header=BB7_11 Depth=1
s_or_b64 exec, exec, s[30:31]
BB7_39: ; in Loop: Header=BB7_11 Depth=1
s_or_b64 exec, exec, s[12:13]
v_lshrrev_b32_e32 v27, 6, v65
v_and_b32_e32 v27, 1, v27
v_cmp_eq_u32_e32 vcc, 1, v27
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[12:13], exec, s[4:5]
; mask branch BB7_43
s_cbranch_execz BB7_43
BB7_40: ; in Loop: Header=BB7_11 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[79:82], v55 offset0:96 offset1:97
s_and_b64 s[4:5], exec, s[0:1]
v_cmp_ne_u32_e32 vcc, v59, v23
s_xor_b64 s[4:5], s[4:5], -1
s_or_b64 s[4:5], s[4:5], vcc
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v31, v76, v80
s_and_b64 s[8:9], exec, s[2:3]
s_or_b64 s[4:5], s[8:9], s[4:5]
v_subrev_f32_e32 v27, v75, v79
v_mul_f32_e32 v45, v31, v31
v_cndmask_b32_e64 v51, 0, 1.0, s[4:5]
v_subrev_f32_e32 v35, v77, v81
v_mac_f32_e32 v45, v27, v27
v_mac_f32_e32 v45, v35, v35
v_mul_f32_e32 v51, s26, v51
v_cmp_lt_f32_e32 vcc, v45, v51
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[30:31], exec, s[4:5]
; mask branch BB7_42
s_cbranch_execz BB7_42
BB7_41: ; in Loop: Header=BB7_11 Depth=1
v_max_f32_e32 v45, 0x34cd15ae, v45
v_rsq_f32_e32 v79, v45
s_mov_b32 m0, -1
v_lshrrev_b32_e32 v70, 6, v19
ds_read_b64 v[72:73], v56 offset:384
v_and_b32_e32 v70, 1, v70
v_cmp_eq_u32_e32 vcc, 1, v70
v_mul_f32_e32 v80, v79, v79
v_cndmask_b32_e64 v70, 0, 1.0, vcc
v_mul_f32_e32 v81, v80, v80
v_mul_f32_e32 v81, v70, v81
v_mul_f32_e32 v51, v78, v82
v_mul_f32_e32 v82, v80, v81
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v73, v68, v73
v_mul_f32_e32 v83, v82, v73
v_mad_f32 v81, v81, v80, s42
v_mad_f32 v83, v72, v67, -v83
v_mad_f32 v84, v82, v82, s43
v_mul_f32_e32 v72, v67, v72
v_mul_f32_e32 v81, 0xbe2aaaab, v81
v_mul_f32_e32 v73, v84, v73
v_mul_f32_e32 v72, v72, v81
v_cmp_gt_f32_e32 vcc, s27, v45
v_mac_f32_e32 v72, 0x3daaaaaa, v73
v_cndmask_b32_e64 v73, 0, 1.0, vcc
v_mul_f32_e32 v81, v70, v73
v_mac_f32_e32 v8, v72, v81
v_mul_f32_e32 v72, v9, v45
v_mul_f32_e32 v81, v72, v72
v_mov_b32_e32 v84, 0x3a92b707
v_madak_f32_e32 v84, v84, v81, 0x3ded3cb2
v_mov_b32_e32 v85, 0x3c739487
v_madak_f32_e32 v85, v85, v81, 0x3f01e2bc
v_mad_f32 v84, v84, v81, 1.0
v_mac_f32_e32 v84, v72, v85
v_mov_b32_e32 v85, 0xb2951928
v_madak_f32_e32 v85, v85, v81, 0xb85ffb93
v_mov_b32_e32 v86, 0x35c55945
v_madak_f32_e32 v86, v86, v81, 0x3a83ca0c
v_madak_f32_e32 v85, v85, v81, 0xbc9ded90
v_madak_f32_e32 v86, v86, v81, 0x3d8eaf3b
v_madak_f32_e32 v81, v85, v81, 0xbf409397
v_mac_f32_e32 v81, v72, v86
v_rcp_f32_e32 v72, v84
v_mul_f32_e32 v45, s18, v45
v_mul_f32_e32 v45, v79, v45
v_mov_b32_e32 v84, 0x3fa00000
v_mul_f32_e32 v72, v37, v72
v_mul_f32_e32 v72, v81, v72
v_mul_f32_e32 v81, v70, v80
v_mac_f32_e32 v72, v79, v81
v_and_b32_e32 v81, s50, v45
v_cmp_gt_f32_e64 s[4:5], v84, v81
v_mul_f32_e32 v84, v81, v81
v_rcp_f32_e32 v85, v84
v_add_f32_e32 v86, -1.0, v81
v_mov_b32_e32 v87, 0xbd777f97
v_mov_b32_e32 v88, 0x6f800000
v_cndmask_b32_e64 v85, v85, v86, s[4:5]
v_mov_b32_e32 v86, 0x3f580000
v_cmp_gt_f32_e64 s[8:9], v86, v81
v_cndmask_b32_e64 v84, v85, v84, s[8:9]
v_mov_b32_e32 v86, 0xc11d077e
v_mov_b32_e32 v85, 0x4036db6e
v_madak_f32_e32 v86, v86, v84, 0xc2a2932b
v_cmp_gt_f32_e32 vcc, v85, v81
v_mov_b32_e32 v85, 0xc3f1c275
v_madak_f32_e32 v85, v85, v84, 0xc480230b
v_madak_f32_e32 v86, v84, v86, 0xc3389ae7
v_madak_f32_e32 v85, v84, v85, 0xc41f6441
v_madak_f32_e32 v86, v84, v86, 0xc322658c
v_madak_f32_e32 v85, v84, v85, 0xc320a2ea
v_madak_f32_e32 v86, v84, v86, 0xc2798057
v_madak_f32_e32 v85, v84, v85, 0xc18e104b
v_madak_f32_e32 v86, v84, v86, 0xc128f022
v_madak_f32_e32 v85, v84, v85, 0xbf4c9dd4
v_madak_f32_e32 v86, v84, v86, 0xbf31a0b7
v_madak_f32_e32 v85, v84, v85, 0xbc21a092
v_madak_f32_e32 v86, v84, v86, 0xbc21a093
v_madak_f32_e32 v87, v87, v84, 0x40d23f7c
v_cndmask_b32_e32 v85, v85, v86, vcc
v_mov_b32_e32 v86, 0xc1b38712
v_madak_f32_e32 v86, v86, v84, 0x43ed43a7
v_madak_f32_e32 v87, v84, v87, 0x42d9451f
v_madak_f32_e32 v86, v84, v86, 0x451f90ce
v_madak_f32_e32 v87, v84, v87, 0x43d6810b
v_madak_f32_e32 v86, v84, v86, 0x4547fdbb
v_madak_f32_e32 v87, v84, v87, 0x442158c9
v_madak_f32_e32 v86, v84, v86, 0x44c01759
v_madak_f32_e32 v87, v84, v87, 0x43d9486f
v_madak_f32_e32 v86, v84, v86, 0x43a2e571
v_madak_f32_e32 v87, v84, v87, 0x4309a863
v_madak_f32_e32 v86, v84, v86, 0x41f2b459
v_madak_f32_e32 v87, v84, v87, 0x419d35ce
v_cndmask_b32_e32 v86, v86, v87, vcc
v_mov_b32_e32 v87, 0xbb0df9c0
v_madak_f32_e32 v87, v87, v84, 0x3d1151b3
v_madak_f32_e32 v87, v84, v87, 0xbde31cc2
v_madak_f32_e32 v87, v84, v87, 0x3ea2fe54
v_madak_f32_e32 v87, v84, v87, 0xbebe9208
v_madak_f32_e32 v87, v84, v87, 0x3ed46805
v_madak_f32_e32 v87, v84, v87, 0xbb1acdc6
v_cndmask_b32_e64 v85, v85, v87, s[4:5]
v_mov_b32_e32 v87, 0x3c445aa3
v_madak_f32_e32 v87, v87, v84, 0x3c5f6e13
v_madak_f32_e32 v87, v84, v87, 0x3e013307
v_madak_f32_e32 v87, v84, v87, 0x3d931ae7
v_madak_f32_e32 v87, v84, v87, 0x3f0a5785
v_madak_f32_e32 v87, v84, v87, 0x3dd9f331
v_cndmask_b32_e64 v86, v86, v87, s[4:5]
v_mov_b32_e32 v87, 0xb684e21a
v_madak_f32_e32 v87, v87, v84, 0x390aee49
v_madak_f32_e32 v87, v84, v87, 0x3ba68116
v_madak_f32_e32 v87, v84, v87, 0x3d852a63
v_madak_f32_e32 v87, v84, v87, 0x3ecbbbce
v_cndmask_b32_e64 v86, v86, v87, s[8:9]
v_mad_f32 v86, v84, v86, 1.0
v_and_b32_e32 v91, s51, v45
v_mov_b32_e32 v92, 0xbf100000
v_cmp_gt_f32_e64 vcc, |v86|, v88
v_mov_b32_e32 v89, 0x2f800000
v_mad_f32 v92, v91, -v91, v92
v_cndmask_b32_e32 v90, 1.0, v89, vcc
v_cmp_gt_f32_e32 vcc, 0, v92
v_cndmask_b32_e64 v93, 0.5, -0.5, vcc
v_mov_b32_e32 v94, 0x3fb8aa3b
v_mac_f32_e32 v93, v94, v92
v_mov_b32_e32 v87, 0xb7c756b1
v_cvt_i32_f32_e32 v93, v93
v_madak_f32_e32 v87, v87, v84, 0xbbbd1489
v_madak_f32_e32 v87, v84, v87, 0xbce9528f
v_madak_f32_e32 v87, v84, v87, 0xbea66beb
v_mul_f32_e32 v86, v90, v86
v_madak_f32_e32 v84, v84, v87, 0x3e0375d4
v_rcp_f32_e32 v86, v86
v_cvt_f32_i32_e32 v87, v93
v_cndmask_b32_e64 v84, v85, v84, s[8:9]
v_mov_b32_e32 v85, 0xbf317180
v_mul_f32_e32 v84, v86, v84
v_mad_f32 v86, v85, v87, v92
v_mov_b32_e32 v95, 0xb717f7d1
v_mad_f32 v96, v95, v87, v86
v_mul_f32_e32 v97, v96, v96
v_mov_b32_e32 v98, 0xb5ddea0e
v_mov_b32_e32 v99, 0x3331bb4c
v_mad_f32 v100, v99, v97, v98
v_mov_b32_e32 v101, 0x388ab355
v_mad_f32 v100, v100, v97, v101
v_mov_b32_e32 v102, 0xbb360b61
v_mad_f32 v100, v100, v97, v102
v_mov_b32_e32 v103, 0x3e2aaaab
v_mad_f32 v100, v100, v97, v103
v_mad_f32 v97, -v97, v100, v96
v_mul_f32_e32 v96, v97, v96
v_sub_f32_e32 v97, 2.0, v97
v_cmp_gt_f32_e64 vcc, |v97|, v88
v_cndmask_b32_e32 v100, 1.0, v89, vcc
v_mul_f32_e32 v97, v100, v97
v_rcp_f32_e32 v97, v97
v_mul_f32_e32 v96, v97, v96
v_mul_f32_e32 v96, v96, v100
v_mad_f32 v87, -v87, v95, -v96
v_subrev_f32_e32 v86, v86, v87
v_lshlrev_b32_e32 v87, 23, v93
v_sub_f32_e32 v86, 1.0, v86
v_add_i32_e32 v86, vcc, v86, v87
v_mov_b32_e32 v87, 0xc2aeac4f
v_cmp_ge_f32_e32 vcc, v92, v87
v_mov_b32_e32 v93, 0x42b17218
v_cndmask_b32_e32 v86, 0, v86, vcc
v_cmp_lt_f32_e32 vcc, v92, v93
v_mov_b32_e32 v96, 0x7f800000
v_cndmask_b32_e32 v86, v96, v86, vcc
v_cmp_u_f32_e32 vcc, v92, v92
v_cndmask_b32_e32 v86, v86, v92, vcc
v_subrev_f32_e32 v92, v81, v91
v_mul_f32_e32 v97, v84, v90
v_add_f32_e32 v91, v81, v91
v_mad_f32 v91, v91, v92, v97
v_cmp_gt_f32_e32 vcc, 0, v91
v_cndmask_b32_e64 v92, 0.5, -0.5, vcc
v_mac_f32_e32 v92, v94, v91
v_cvt_i32_f32_e32 v92, v92
v_madak_f32_e32 v84, v90, v84, 0x3f58560b
v_cvt_f32_i32_e32 v94, v92
v_lshlrev_b32_e32 v92, 23, v92
v_mad_f32 v85, v85, v94, v91
v_mad_f32 v100, v95, v94, v85
v_mul_f32_e32 v104, v100, v100
v_mac_f32_e32 v98, v99, v104
v_mac_f32_e32 v101, v98, v104
v_mac_f32_e32 v102, v101, v104
v_mac_f32_e32 v103, v102, v104
v_mad_f32 v98, -v104, v103, v100
v_mul_f32_e32 v99, v98, v100
v_sub_f32_e32 v98, 2.0, v98
v_cmp_gt_f32_e64 vcc, |v98|, v88
v_cndmask_b32_e32 v100, 1.0, v89, vcc
v_mul_f32_e32 v98, v100, v98
v_rcp_f32_e32 v98, v98
v_mul_f32_e32 v98, v98, v99
v_mul_f32_e32 v98, v98, v100
v_mad_f32 v94, -v94, v95, -v98
v_subrev_f32_e32 v85, v85, v94
v_sub_f32_e32 v85, 1.0, v85
v_add_i32_e32 v85, vcc, v85, v92
v_cmp_ge_f32_e32 vcc, v91, v87
v_cndmask_b32_e32 v85, 0, v85, vcc
v_cmp_lt_f32_e32 vcc, v91, v93
v_cndmask_b32_e32 v85, v96, v85, vcc
v_cmp_u_f32_e32 vcc, v91, v91
v_cndmask_b32_e32 v85, v85, v91, vcc
v_cmp_gt_f32_e64 vcc, |v81|, v88
v_mul_f32_e32 v85, v85, v86
v_cndmask_b32_e32 v86, 1.0, v89, vcc
v_mul_f32_e32 v87, v86, v81
v_rcp_f32_e32 v87, v87
v_mov_b32_e32 v88, 0x40c00000
v_cmp_gt_f32_e32 vcc, v88, v81
v_mov_b32_e32 v88, 0x31800000
v_cmp_gt_f32_e64 s[10:11], v88, v81
v_mul_f32_e32 v81, v87, v85
v_mad_f32 v81, -v86, v81, 1.0
v_cndmask_b32_e32 v81, 1.0, v81, vcc
v_cndmask_b32_e64 v81, v81, v84, s[4:5]
v_and_b32_e32 v84, s52, v45
v_or_b32_e32 v81, v84, v81
v_mad_f32 v84, v97, v45, v45
v_cndmask_b32_e64 v81, v81, v84, s[8:9]
v_mul_f32_e32 v84, 0x3f8375d4, v45
v_mac_f32_e32 v84, 0x41000000, v45
v_mul_f32_e32 v84, 0x3e000000, v84
v_cndmask_b32_e64 v81, v81, v84, s[10:11]
v_cmp_u_f32_e32 vcc, v45, v45
v_cndmask_b32_e32 v45, v81, v45, vcc
v_subrev_f32_e32 v45, v45, v70
v_mul_f32_e32 v70, s19, v70
v_mad_f32 v45, v79, v45, -v70
v_mul_f32_e32 v70, v73, v80
v_mul_f32_e32 v70, v82, v70
v_mac_f32_e32 v5, v45, v51
v_mul_f32_e32 v45, v83, v70
v_mad_f32 v45, v51, v72, -v45
v_mul_f32_e32 v51, v72, v51
v_mad_f32 v51, v70, v83, -v51
v_mad_f32 v74, v35, v51, v74
v_mad_f32 v71, v31, v51, v71
v_mac_f32_e32 v69, v27, v51
v_mad_f32 v18, v35, v45, v18
v_mad_f32 v17, v31, v45, v17
v_mac_f32_e32 v16, v27, v45
BB7_42: ; %Flow1247
; in Loop: Header=BB7_11 Depth=1
s_or_b64 exec, exec, s[30:31]
BB7_43: ; in Loop: Header=BB7_11 Depth=1
s_or_b64 exec, exec, s[12:13]
v_lshrrev_b32_e32 v27, 7, v65
v_and_b32_e32 v27, 1, v27
v_cmp_eq_u32_e32 vcc, 1, v27
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[12:13], exec, s[4:5]
; mask branch BB7_47
s_cbranch_execz BB7_47
BB7_44: ; in Loop: Header=BB7_11 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[79:82], v55 offset0:112 offset1:113
s_and_b64 s[4:5], exec, s[0:1]
v_cmp_ne_u32_e32 vcc, v38, v23
s_xor_b64 s[4:5], s[4:5], -1
s_or_b64 s[4:5], s[4:5], vcc
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v27, v76, v80
s_and_b64 s[8:9], exec, s[2:3]
s_or_b64 s[4:5], s[8:9], s[4:5]
v_subrev_f32_e32 v23, v75, v79
v_mul_f32_e32 v35, v27, v27
v_cndmask_b32_e64 v45, 0, 1.0, s[4:5]
v_subrev_f32_e32 v31, v77, v81
v_mac_f32_e32 v35, v23, v23
v_mac_f32_e32 v35, v31, v31
v_mul_f32_e32 v45, s26, v45
v_cmp_lt_f32_e32 vcc, v35, v45
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[30:31], exec, s[4:5]
; mask branch BB7_46
s_cbranch_execz BB7_46
BB7_45: ; in Loop: Header=BB7_11 Depth=1
v_lshrrev_b32_e32 v51, 7, v19
v_and_b32_e32 v51, 1, v51
v_max_f32_e32 v35, 0x34cd15ae, v35
v_cmp_eq_u32_e32 vcc, 1, v51
v_rsq_f32_e32 v51, v35
v_cndmask_b32_e64 v70, 0, 1.0, vcc
s_mov_b32 m0, -1
v_cmp_gt_f32_e32 vcc, s27, v35
v_mul_f32_e32 v75, v51, v51
v_mul_f32_e32 v72, v75, v75
v_mul_f32_e32 v76, v70, v72
ds_read_b64 v[72:73], v56 offset:448
v_mul_f32_e32 v45, v78, v82
v_mov_b32_e32 v78, 0x3c739487
v_mov_b32_e32 v79, 0x35c55945
v_mov_b32_e32 v80, 0x3fa00000
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v68, v68, v73
v_mul_f32_e32 v73, v75, v76
v_mul_f32_e32 v77, v73, v68
v_mad_f32 v77, v72, v67, -v77
v_mul_f32_e32 v67, v67, v72
v_mad_f32 v72, v73, v73, s43
v_mul_f32_e32 v68, v72, v68
v_mad_f32 v72, v76, v75, s42
v_mul_f32_e32 v72, 0xbe2aaaab, v72
v_mul_f32_e32 v67, v67, v72
v_mac_f32_e32 v67, 0x3daaaaaa, v68
v_cndmask_b32_e64 v68, 0, 1.0, vcc
v_mul_f32_e32 v72, v70, v68
v_mac_f32_e32 v8, v67, v72
v_mul_f32_e32 v67, v9, v35
v_mul_f32_e32 v72, v67, v67
v_mov_b32_e32 v76, 0x3a92b707
v_madak_f32_e32 v76, v76, v72, 0x3ded3cb2
v_madak_f32_e32 v78, v78, v72, 0x3f01e2bc
v_mad_f32 v76, v76, v72, 1.0
v_mac_f32_e32 v76, v67, v78
v_mov_b32_e32 v78, 0xb2951928
v_madak_f32_e32 v78, v78, v72, 0xb85ffb93
v_madak_f32_e32 v79, v79, v72, 0x3a83ca0c
v_madak_f32_e32 v78, v78, v72, 0xbc9ded90
v_madak_f32_e32 v79, v79, v72, 0x3d8eaf3b
v_madak_f32_e32 v72, v78, v72, 0xbf409397
v_mac_f32_e32 v72, v67, v79
v_rcp_f32_e32 v67, v76
v_mul_f32_e32 v35, s18, v35
v_mul_f32_e32 v35, v51, v35
v_mov_b32_e32 v81, 0xbd777f97
v_mul_f32_e32 v67, v37, v67
v_mul_f32_e32 v67, v72, v67
v_mul_f32_e32 v72, v70, v75
v_mac_f32_e32 v67, v51, v72
v_and_b32_e32 v72, s50, v35
v_mul_f32_e32 v76, v72, v72
v_rcp_f32_e32 v78, v76
v_cmp_gt_f32_e64 s[4:5], v80, v72
v_add_f32_e32 v79, -1.0, v72
v_mov_b32_e32 v80, 0xc11d077e
v_cndmask_b32_e64 v78, v78, v79, s[4:5]
v_mov_b32_e32 v79, 0x3f580000
v_cmp_gt_f32_e64 s[8:9], v79, v72
v_cndmask_b32_e64 v76, v78, v76, s[8:9]
v_mov_b32_e32 v78, 0xc3f1c275
v_madak_f32_e32 v80, v80, v76, 0xc2a2932b
v_madak_f32_e32 v78, v78, v76, 0xc480230b
v_mov_b32_e32 v79, 0xc1b38712
v_madak_f32_e32 v80, v76, v80, 0xc3389ae7
v_madak_f32_e32 v81, v81, v76, 0x40d23f7c
v_madak_f32_e32 v78, v76, v78, 0xc41f6441
v_madak_f32_e32 v79, v79, v76, 0x43ed43a7
v_madak_f32_e32 v80, v76, v80, 0xc322658c
v_madak_f32_e32 v81, v76, v81, 0x42d9451f
v_madak_f32_e32 v78, v76, v78, 0xc320a2ea
v_madak_f32_e32 v79, v76, v79, 0x451f90ce
v_madak_f32_e32 v80, v76, v80, 0xc2798057
v_madak_f32_e32 v81, v76, v81, 0x43d6810b
v_madak_f32_e32 v78, v76, v78, 0xc18e104b
v_madak_f32_e32 v79, v76, v79, 0x4547fdbb
v_madak_f32_e32 v80, v76, v80, 0xc128f022
v_madak_f32_e32 v81, v76, v81, 0x442158c9
v_madak_f32_e32 v78, v76, v78, 0xbf4c9dd4
v_madak_f32_e32 v79, v76, v79, 0x44c01759
v_madak_f32_e32 v80, v76, v80, 0xbf31a0b7
v_madak_f32_e32 v81, v76, v81, 0x43d9486f
v_mov_b32_e32 v82, 0x4036db6e
v_madak_f32_e32 v79, v76, v79, 0x43a2e571
v_madak_f32_e32 v81, v76, v81, 0x4309a863
v_madak_f32_e32 v78, v76, v78, 0xbc21a092
v_madak_f32_e32 v80, v76, v80, 0xbc21a093
v_cmp_gt_f32_e32 vcc, v82, v72
v_cndmask_b32_e32 v78, v78, v80, vcc
v_mov_b32_e32 v80, 0xbb0df9c0
v_madak_f32_e32 v79, v76, v79, 0x41f2b459
v_madak_f32_e32 v81, v76, v81, 0x419d35ce
v_madak_f32_e32 v80, v80, v76, 0x3d1151b3
v_cndmask_b32_e32 v79, v79, v81, vcc
v_mov_b32_e32 v81, 0x3c445aa3
v_madak_f32_e32 v80, v76, v80, 0xbde31cc2
v_madak_f32_e32 v81, v81, v76, 0x3c5f6e13
v_madak_f32_e32 v80, v76, v80, 0x3ea2fe54
v_madak_f32_e32 v81, v76, v81, 0x3e013307
v_madak_f32_e32 v80, v76, v80, 0xbebe9208
v_madak_f32_e32 v81, v76, v81, 0x3d931ae7
v_madak_f32_e32 v80, v76, v80, 0x3ed46805
v_madak_f32_e32 v81, v76, v81, 0x3f0a5785
v_madak_f32_e32 v80, v76, v80, 0xbb1acdc6
v_madak_f32_e32 v81, v76, v81, 0x3dd9f331
v_cndmask_b32_e64 v78, v78, v80, s[4:5]
v_mov_b32_e32 v80, 0xb7c756b1
v_cndmask_b32_e64 v79, v79, v81, s[4:5]
v_mov_b32_e32 v81, 0xb684e21a
v_madak_f32_e32 v80, v80, v76, 0xbbbd1489
v_madak_f32_e32 v81, v81, v76, 0x390aee49
v_madak_f32_e32 v80, v76, v80, 0xbce9528f
v_madak_f32_e32 v81, v76, v81, 0x3ba68116
v_madak_f32_e32 v80, v76, v80, 0xbea66beb
v_madak_f32_e32 v81, v76, v81, 0x3d852a63
v_madak_f32_e32 v80, v76, v80, 0x3e0375d4
v_madak_f32_e32 v81, v76, v81, 0x3ecbbbce
v_cndmask_b32_e64 v79, v79, v81, s[8:9]
v_cndmask_b32_e64 v78, v78, v80, s[8:9]
v_and_b32_e32 v80, s51, v35
v_mov_b32_e32 v81, 0xbf100000
v_mad_f32 v81, v80, -v80, v81
v_cmp_gt_f32_e64 s[10:11], 0, v81
v_cndmask_b32_e64 v82, 0.5, -0.5, s[10:11]
v_mov_b32_e32 v83, 0x3fb8aa3b
v_mac_f32_e32 v82, v83, v81
v_cvt_i32_f32_e32 v82, v82
v_mov_b32_e32 v87, 0xbf317180
v_mov_b32_e32 v89, 0xb717f7d1
v_mov_b32_e32 v92, 0xb5ddea0e
v_cvt_f32_i32_e32 v86, v82
v_mov_b32_e32 v93, 0x3331bb4c
v_mov_b32_e32 v95, 0x388ab355
v_mov_b32_e32 v96, 0xbb360b61
v_mad_f32 v88, v87, v86, v81
v_mad_f32 v90, v89, v86, v88
v_mul_f32_e32 v91, v90, v90
v_mad_f32 v94, v93, v91, v92
v_mad_f32 v94, v94, v91, v95
v_mad_f32 v94, v94, v91, v96
v_mov_b32_e32 v97, 0x3e2aaaab
v_mad_f32 v94, v94, v91, v97
v_mad_f32 v91, -v91, v94, v90
v_mad_f32 v76, v76, v79, 1.0
v_mov_b32_e32 v79, 0x6f800000
v_cmp_gt_f32_e64 vcc, |v76|, v79
v_mov_b32_e32 v84, 0x2f800000
v_sub_f32_e32 v94, 2.0, v91
v_cndmask_b32_e32 v85, 1.0, v84, vcc
v_cmp_gt_f32_e64 vcc, |v94|, v79
v_cndmask_b32_e32 v98, 1.0, v84, vcc
v_mul_f32_e32 v94, v98, v94
v_rcp_f32_e32 v94, v94
v_mul_f32_e32 v76, v85, v76
v_mul_f32_e32 v90, v91, v90
v_rcp_f32_e32 v76, v76
v_mul_f32_e32 v90, v94, v90
v_mul_f32_e32 v90, v90, v98
v_mad_f32 v86, -v86, v89, -v90
v_subrev_f32_e32 v86, v88, v86
v_mul_f32_e32 v76, v76, v78
v_subrev_f32_e32 v88, v72, v80
v_mul_f32_e32 v78, v76, v85
v_add_f32_e32 v80, v72, v80
v_sub_f32_e32 v86, 1.0, v86
v_lshlrev_b32_e32 v82, 23, v82
v_add_i32_e32 v82, vcc, v86, v82
v_mad_f32 v80, v80, v88, v78
v_cmp_gt_f32_e32 vcc, 0, v80
v_cndmask_b32_e64 v88, 0.5, -0.5, vcc
v_mac_f32_e32 v88, v83, v80
v_cvt_i32_f32_e32 v83, v88
v_mov_b32_e32 v86, 0xc2aeac4f
v_cmp_ge_f32_e32 vcc, v81, v86
v_mov_b32_e32 v88, 0x42b17218
v_cvt_f32_i32_e32 v90, v83
v_cndmask_b32_e32 v82, 0, v82, vcc
v_cmp_lt_f32_e32 vcc, v81, v88
v_mov_b32_e32 v91, 0x7f800000
v_mad_f32 v87, v87, v90, v80
v_mad_f32 v94, v89, v90, v87
v_mul_f32_e32 v98, v94, v94
v_mac_f32_e32 v92, v93, v98
v_mac_f32_e32 v95, v92, v98
v_mac_f32_e32 v96, v95, v98
v_mac_f32_e32 v97, v96, v98
v_mad_f32 v92, -v98, v97, v94
v_sub_f32_e32 v93, 2.0, v92
v_cndmask_b32_e32 v82, v91, v82, vcc
v_cmp_gt_f32_e64 vcc, |v93|, v79
v_cndmask_b32_e32 v95, 1.0, v84, vcc
v_mul_f32_e32 v93, v95, v93
v_rcp_f32_e32 v93, v93
v_cmp_u_f32_e32 vcc, v81, v81
v_cndmask_b32_e32 v81, v82, v81, vcc
v_mul_f32_e32 v82, v92, v94
v_mul_f32_e32 v82, v93, v82
v_mul_f32_e32 v82, v82, v95
v_mad_f32 v82, -v90, v89, -v82
v_subrev_f32_e32 v82, v87, v82
v_sub_f32_e32 v82, 1.0, v82
v_lshlrev_b32_e32 v83, 23, v83
v_add_i32_e32 v82, vcc, v82, v83
v_cmp_ge_f32_e32 vcc, v80, v86
v_cndmask_b32_e32 v82, 0, v82, vcc
v_cmp_lt_f32_e64 s[10:11], v80, v88
v_cndmask_b32_e64 v82, v91, v82, s[10:11]
v_cmp_u_f32_e32 vcc, v80, v80
v_cndmask_b32_e32 v80, v82, v80, vcc
v_cmp_gt_f32_e64 vcc, |v72|, v79
v_cndmask_b32_e32 v79, 1.0, v84, vcc
v_mul_f32_e32 v80, v80, v81
v_mul_f32_e32 v81, v79, v72
v_rcp_f32_e32 v81, v81
v_mul_f32_e32 v80, v81, v80
v_mad_f32 v79, -v79, v80, 1.0
v_mov_b32_e32 v80, 0x40c00000
v_cmp_gt_f32_e32 vcc, v80, v72
v_mov_b32_e32 v80, 0x31800000
v_cmp_gt_f32_e64 s[10:11], v80, v72
v_madak_f32_e32 v72, v85, v76, 0x3f58560b
v_cndmask_b32_e32 v76, 1.0, v79, vcc
v_cndmask_b32_e64 v72, v76, v72, s[4:5]
v_and_b32_e32 v76, s52, v35
v_or_b32_e32 v72, v76, v72
v_mad_f32 v76, v78, v35, v35
v_cndmask_b32_e64 v72, v72, v76, s[8:9]
v_mul_f32_e32 v76, 0x3f8375d4, v35
v_mac_f32_e32 v76, 0x41000000, v35
v_mul_f32_e32 v76, 0x3e000000, v76
v_cndmask_b32_e64 v72, v72, v76, s[10:11]
v_cmp_u_f32_e32 vcc, v35, v35
v_cndmask_b32_e32 v35, v72, v35, vcc
v_subrev_f32_e32 v35, v35, v70
v_mul_f32_e32 v70, s19, v70
v_mad_f32 v35, v51, v35, -v70
v_mul_f32_e32 v51, v68, v75
v_mul_f32_e32 v51, v73, v51
v_mac_f32_e32 v5, v35, v45
v_mul_f32_e32 v35, v77, v51
v_mad_f32 v35, v45, v67, -v35
v_mul_f32_e32 v45, v67, v45
v_mad_f32 v45, v51, v77, -v45
v_mad_f32 v74, v31, v45, v74
v_mad_f32 v71, v27, v45, v71
v_mac_f32_e32 v69, v23, v45
v_mad_f32 v15, v31, v35, v15
v_mad_f32 v14, v27, v35, v14
v_mac_f32_e32 v13, v23, v35
BB7_46: ; %Flow1246
; in Loop: Header=BB7_11 Depth=1
s_or_b64 exec, exec, s[30:31]
BB7_47: ; in Loop: Header=BB7_11 Depth=1
s_or_b64 exec, exec, s[12:13]
s_mov_b32 m0, -1
v_cmp_gt_i32_e32 vcc, 3, v2
ds_write_b32 v6, v69
ds_write_b32 v7, v71
ds_write_b32 v12, v74
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[4:5], exec, s[4:5]
s_waitcnt lgkmcnt(0)
; mask branch BB7_53
s_cbranch_execz BB7_53
BB7_48: ; in Loop: Header=BB7_11 Depth=1
v_lshlrev_b32_e32 v27, 6, v2
v_add_i32_e32 v23, vcc, v11, v27
v_lshlrev_b32_e32 v23, 2, v23
v_add_i32_e32 v31, vcc, s15, v23
s_mov_b32 m0, -1
ds_read_b32 v23, v31
v_add_i32_e32 v35, vcc, 8, v11
v_or_b32_e32 v45, 1, v11
v_cmp_lt_i32_e32 vcc, v45, v35
s_and_saveexec_b64 s[8:9], vcc
s_xor_b64 s[8:9], exec, s[8:9]
s_waitcnt lgkmcnt(0)
; mask branch BB7_50
s_cbranch_execz BB7_50
BB7_49: ; in Loop: Header=BB7_11 Depth=1
s_mov_b32 m0, -1
ds_read2_b32 v[67:68], v31 offset0:1 offset1:2
v_or_b32_e32 v35, 3, v11
v_add_i32_e32 v27, vcc, v35, v27
v_lshlrev_b32_e32 v27, 2, v27
ds_read2_b32 v[69:70], v31 offset0:3 offset1:4
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v23, v23, v67
v_add_i32_e32 v27, vcc, s15, v27
v_add_f32_e32 v23, v68, v23
ds_read2_b32 v[67:68], v27 offset0:2 offset1:3
ds_read_b32 v31, v31 offset:28
v_add_f32_e32 v23, v69, v23
v_add_f32_e32 v23, v70, v23
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v23, v67, v23
v_add_f32_e32 v23, v68, v23
v_add_f32_e32 v23, v31, v23
BB7_50: ; %._crit_edge.i118
; in Loop: Header=BB7_11 Depth=1
s_or_b64 exec, exec, s[8:9]
v_mul_lo_i32 v27, v66, 3
v_mov_b32_e32 v31, s29
s_mov_b64 s[30:31], s[46:47]
s_mov_b64 s[8:9], 0
v_add_i32_e32 v66, vcc, v27, v2
v_ashrrev_i32_e32 v67, 31, v66
v_lshl_b64 v[68:69], v[66:67], 2
v_add_i32_e32 v66, vcc, s28, v68
v_addc_u32_e32 v67, vcc, v69, v31, vcc
buffer_load_dword v69, v[68:69], s[28:31], 0 addr64
s_waitcnt vmcnt(0)
BB7_51: ; Parent Loop BB7_11 Depth=1
; => This Inner Loop Header: Depth=2
v_add_f32_e32 v68, v23, v69
v_mov_b32_e32 v71, v69
v_mov_b32_e32 v70, v68
buffer_atomic_cmpswap v[70:71], v[66:67], s[44:47], 0 addr64 glc
v_mov_b32_e32 v27, -1
v_mov_b32_e32 v27, 0xf000
s_waitcnt vmcnt(0) expcnt(0)
v_cmp_eq_u32_e32 vcc, v70, v69
s_or_b64 s[8:9], vcc, s[8:9]
v_mov_b32_e32 v69, v70
s_andn2_b64 exec, exec, s[8:9]
s_cbranch_execnz BB7_51
; BB#52: ; %Flow1244
; in Loop: Header=BB7_11 Depth=1
s_or_b64 exec, exec, s[8:9]
BB7_53: ; %Flow1245
; in Loop: Header=BB7_11 Depth=1
s_or_b64 exec, exec, s[4:5]
BB7_54: ; %Flow1254
; in Loop: Header=BB7_11 Depth=1
s_or_b64 exec, exec, s[56:57]
v_and_b32_e32 v23, 0xff00, v65
v_cmp_ne_u32_e32 vcc, 0, v23
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[30:31], exec, s[4:5]
; mask branch BB7_94
s_cbranch_execz BB7_94
BB7_55: ; in Loop: Header=BB7_11 Depth=1
s_mov_b32 m0, -1
ds_read_b32 v23, v54 offset:4
s_mov_b64 s[8:9], s[32:33]
s_mov_b64 s[10:11], s[46:47]
s_waitcnt lgkmcnt(0)
v_lshlrev_b32_e32 v27, 3, v23
v_add_i32_e32 v66, vcc, v27, v1
v_ashrrev_i32_e32 v67, 31, v66
v_lshl_b64 v[68:69], v[66:67], 4
buffer_load_dwordx4 v[70:73], v[68:69], s[8:11], 0 addr64
v_lshl_b64 v[74:75], v[66:67], 3
s_mov_b64 s[8:9], s[36:37]
buffer_load_dwordx2 v[67:68], v[74:75], s[8:11], 0 addr64
v_lshrrev_b32_e32 v27, 8, v65
v_mov_b32_e32 v69, 0
v_and_b32_e32 v27, 1, v27
v_cmp_eq_u32_e32 vcc, 1, v27
v_mov_b32_e32 v75, v69
v_mov_b32_e32 v78, v69
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[34:35], exec, s[4:5]
s_waitcnt vmcnt(0)
; mask branch BB7_59
s_cbranch_execz BB7_59
BB7_56: ; in Loop: Header=BB7_11 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[79:82], v55 offset1:1
s_and_b64 s[4:5], exec, s[0:1]
v_cmp_ne_u32_e32 vcc, v41, v23
s_xor_b64 s[4:5], s[4:5], -1
s_or_b64 s[4:5], s[4:5], vcc
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v27, v71, v80
s_and_b64 s[8:9], exec, s[2:3]
s_or_b64 s[4:5], s[8:9], s[4:5]
v_subrev_f32_e32 v31, v70, v79
v_mul_f32_e32 v45, v27, v27
v_cndmask_b32_e64 v51, 0, 1.0, s[4:5]
v_subrev_f32_e32 v35, v72, v81
v_mac_f32_e32 v45, v31, v31
v_mov_b32_e32 v69, 0
v_mac_f32_e32 v45, v35, v35
v_mul_f32_e32 v51, s26, v51
v_cmp_lt_f32_e32 vcc, v45, v51
v_mov_b32_e32 v75, v69
v_mov_b32_e32 v78, v69
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[38:39], exec, s[4:5]
; mask branch BB7_58
s_cbranch_execz BB7_58
BB7_57: ; in Loop: Header=BB7_11 Depth=1
v_lshrrev_b32_e32 v69, 8, v19
v_and_b32_e32 v69, 1, v69
v_max_f32_e32 v45, 0x34cd15ae, v45
v_cmp_eq_u32_e32 vcc, 1, v69
v_rsq_f32_e32 v69, v45
v_cndmask_b32_e64 v76, 0, 1.0, vcc
s_mov_b32 m0, -1
v_cmp_gt_f32_e32 vcc, s27, v45
v_mul_f32_e32 v77, v69, v69
v_mul_f32_e32 v74, v77, v77
v_mul_f32_e32 v78, v76, v74
ds_read_b64 v[74:75], v56
v_mul_f32_e32 v79, v77, v78
v_mad_f32 v78, v78, v77, s42
v_mad_f32 v81, v79, v79, s43
v_mul_f32_e32 v78, 0xbe2aaaab, v78
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v75, v68, v75
v_mul_f32_e32 v80, v79, v75
v_mad_f32 v80, v74, v67, -v80
v_mul_f32_e32 v74, v67, v74
v_mul_f32_e32 v75, v81, v75
v_mul_f32_e32 v74, v74, v78
v_mac_f32_e32 v74, 0x3daaaaaa, v75
v_cndmask_b32_e64 v75, 0, 1.0, vcc
v_mul_f32_e32 v78, v76, v75
v_mac_f32_e32 v8, v74, v78
v_mul_f32_e32 v74, v9, v45
v_mul_f32_e32 v78, v74, v74
v_mov_b32_e32 v81, 0x3a92b707
v_madak_f32_e32 v81, v81, v78, 0x3ded3cb2
v_mul_f32_e32 v51, v73, v82
v_mov_b32_e32 v82, 0x3c739487
v_madak_f32_e32 v82, v82, v78, 0x3f01e2bc
v_mad_f32 v81, v81, v78, 1.0
v_mac_f32_e32 v81, v74, v82
v_mov_b32_e32 v82, 0xb2951928
v_madak_f32_e32 v82, v82, v78, 0xb85ffb93
v_mov_b32_e32 v83, 0x35c55945
v_mul_f32_e32 v45, s18, v45
v_madak_f32_e32 v83, v83, v78, 0x3a83ca0c
v_mul_f32_e32 v45, v69, v45
v_madak_f32_e32 v82, v82, v78, 0xbc9ded90
v_madak_f32_e32 v83, v83, v78, 0x3d8eaf3b
v_madak_f32_e32 v78, v82, v78, 0xbf409397
v_and_b32_e32 v82, s50, v45
v_mov_b32_e32 v84, 0x3fa00000
v_cmp_gt_f32_e64 s[4:5], v84, v82
v_mul_f32_e32 v84, v82, v82
v_rcp_f32_e32 v85, v84
v_add_f32_e32 v86, -1.0, v82
v_mov_b32_e32 v87, 0xbd777f97
v_and_b32_e32 v89, s51, v45
v_cndmask_b32_e64 v85, v85, v86, s[4:5]
v_mov_b32_e32 v86, 0x3f580000
v_cmp_gt_f32_e64 s[8:9], v86, v82
v_cndmask_b32_e64 v84, v85, v84, s[8:9]
v_mov_b32_e32 v86, 0xc11d077e
v_mov_b32_e32 v85, 0x4036db6e
v_madak_f32_e32 v86, v86, v84, 0xc2a2932b
v_cmp_gt_f32_e32 vcc, v85, v82
v_mov_b32_e32 v85, 0xc3f1c275
v_madak_f32_e32 v85, v85, v84, 0xc480230b
v_madak_f32_e32 v86, v84, v86, 0xc3389ae7
v_madak_f32_e32 v85, v84, v85, 0xc41f6441
v_madak_f32_e32 v86, v84, v86, 0xc322658c
v_madak_f32_e32 v85, v84, v85, 0xc320a2ea
v_madak_f32_e32 v86, v84, v86, 0xc2798057
v_madak_f32_e32 v85, v84, v85, 0xc18e104b
v_madak_f32_e32 v86, v84, v86, 0xc128f022
v_madak_f32_e32 v85, v84, v85, 0xbf4c9dd4
v_madak_f32_e32 v86, v84, v86, 0xbf31a0b7
v_madak_f32_e32 v85, v84, v85, 0xbc21a092
v_madak_f32_e32 v86, v84, v86, 0xbc21a093
v_madak_f32_e32 v87, v87, v84, 0x40d23f7c
v_cndmask_b32_e32 v85, v85, v86, vcc
v_mov_b32_e32 v86, 0xc1b38712
v_madak_f32_e32 v86, v86, v84, 0x43ed43a7
v_madak_f32_e32 v87, v84, v87, 0x42d9451f
v_madak_f32_e32 v86, v84, v86, 0x451f90ce
v_madak_f32_e32 v87, v84, v87, 0x43d6810b
v_madak_f32_e32 v86, v84, v86, 0x4547fdbb
v_madak_f32_e32 v87, v84, v87, 0x442158c9
v_madak_f32_e32 v86, v84, v86, 0x44c01759
v_madak_f32_e32 v87, v84, v87, 0x43d9486f
v_madak_f32_e32 v86, v84, v86, 0x43a2e571
v_madak_f32_e32 v87, v84, v87, 0x4309a863
v_madak_f32_e32 v86, v84, v86, 0x41f2b459
v_madak_f32_e32 v87, v84, v87, 0x419d35ce
v_cndmask_b32_e32 v86, v86, v87, vcc
v_mov_b32_e32 v87, 0xbb0df9c0
v_madak_f32_e32 v87, v87, v84, 0x3d1151b3
v_madak_f32_e32 v87, v84, v87, 0xbde31cc2
v_madak_f32_e32 v87, v84, v87, 0x3ea2fe54
v_madak_f32_e32 v87, v84, v87, 0xbebe9208
v_madak_f32_e32 v87, v84, v87, 0x3ed46805
v_madak_f32_e32 v87, v84, v87, 0xbb1acdc6
v_cndmask_b32_e64 v85, v85, v87, s[4:5]
v_mov_b32_e32 v87, 0x3c445aa3
v_madak_f32_e32 v87, v87, v84, 0x3c5f6e13
v_madak_f32_e32 v87, v84, v87, 0x3e013307
v_madak_f32_e32 v87, v84, v87, 0x3d931ae7
v_madak_f32_e32 v87, v84, v87, 0x3f0a5785
v_madak_f32_e32 v87, v84, v87, 0x3dd9f331
v_cndmask_b32_e64 v86, v86, v87, s[4:5]
v_mov_b32_e32 v87, 0xb7c756b1
v_madak_f32_e32 v87, v87, v84, 0xbbbd1489
v_madak_f32_e32 v87, v84, v87, 0xbce9528f
v_madak_f32_e32 v87, v84, v87, 0xbea66beb
v_madak_f32_e32 v87, v84, v87, 0x3e0375d4
v_cndmask_b32_e64 v85, v85, v87, s[8:9]
v_mov_b32_e32 v87, 0xb684e21a
v_madak_f32_e32 v87, v87, v84, 0x390aee49
v_madak_f32_e32 v87, v84, v87, 0x3ba68116
v_madak_f32_e32 v87, v84, v87, 0x3d852a63
v_madak_f32_e32 v87, v84, v87, 0x3ecbbbce
v_cndmask_b32_e64 v86, v86, v87, s[8:9]
v_mad_f32 v84, v84, v86, 1.0
v_mov_b32_e32 v86, 0x6f800000
v_cmp_gt_f32_e64 vcc, |v84|, v86
v_mov_b32_e32 v87, 0x2f800000
v_cndmask_b32_e32 v88, 1.0, v87, vcc
v_mul_f32_e32 v84, v88, v84
v_mov_b32_e32 v90, 0xbf100000
v_rcp_f32_e32 v84, v84
v_mad_f32 v90, v89, -v89, v90
v_cmp_gt_f32_e32 vcc, 0, v90
v_cndmask_b32_e64 v91, 0.5, -0.5, vcc
v_mov_b32_e32 v92, 0x3fb8aa3b
v_mac_f32_e32 v91, v92, v90
v_mul_f32_e32 v84, v84, v85
v_cvt_i32_f32_e32 v91, v91
v_subrev_f32_e32 v105, v82, v89
v_mul_f32_e32 v93, v84, v88
v_add_f32_e32 v89, v82, v89
v_mad_f32 v89, v89, v105, v93
v_cmp_gt_f32_e64 s[10:11], 0, v89
v_cndmask_b32_e64 v105, 0.5, -0.5, s[10:11]
v_cvt_f32_i32_e32 v85, v91
v_mac_f32_e32 v105, v92, v89
v_cvt_i32_f32_e32 v92, v105
v_mov_b32_e32 v94, 0xbf317180
v_mad_f32 v95, v94, v85, v90
v_mov_b32_e32 v96, 0xb717f7d1
v_mad_f32 v97, v96, v85, v95
v_mul_f32_e32 v98, v97, v97
v_mov_b32_e32 v99, 0xb5ddea0e
v_mov_b32_e32 v100, 0x3331bb4c
v_cvt_f32_i32_e32 v106, v92
v_mad_f32 v101, v100, v98, v99
v_mov_b32_e32 v102, 0x388ab355
v_mad_f32 v101, v101, v98, v102
v_mov_b32_e32 v103, 0xbb360b61
v_mad_f32 v101, v101, v98, v103
v_mov_b32_e32 v104, 0x3e2aaaab
v_mad_f32 v101, v101, v98, v104
v_mac_f32_e32 v78, v74, v83
v_mad_f32 v83, v94, v106, v89
v_mad_f32 v98, -v98, v101, v97
v_mad_f32 v94, v96, v106, v83
v_mul_f32_e32 v74, v98, v97
v_mul_f32_e32 v97, v94, v94
v_mac_f32_e32 v99, v100, v97
v_sub_f32_e32 v101, 2.0, v98
v_mac_f32_e32 v102, v99, v97
v_cmp_gt_f32_e64 vcc, |v101|, v86
v_mac_f32_e32 v103, v102, v97
v_mac_f32_e32 v104, v103, v97
v_cndmask_b32_e32 v105, 1.0, v87, vcc
v_mul_f32_e32 v101, v105, v101
v_mad_f32 v97, -v97, v104, v94
v_rcp_f32_e32 v101, v101
v_sub_f32_e32 v98, 2.0, v97
v_cmp_gt_f32_e64 vcc, |v98|, v86
v_rcp_f32_e32 v81, v81
v_cndmask_b32_e32 v99, 1.0, v87, vcc
v_mul_f32_e32 v98, v99, v98
v_mul_f32_e32 v74, v101, v74
v_rcp_f32_e32 v98, v98
v_mul_f32_e32 v74, v74, v105
v_mad_f32 v74, -v85, v96, -v74
v_mul_f32_e32 v81, v37, v81
v_mul_f32_e32 v85, v97, v94
v_subrev_f32_e32 v74, v95, v74
v_mul_f32_e32 v78, v78, v81
v_mul_f32_e32 v81, v76, v77
v_mul_f32_e32 v85, v98, v85
v_mac_f32_e32 v78, v69, v81
v_lshlrev_b32_e32 v81, 23, v91
v_sub_f32_e32 v74, 1.0, v74
v_mul_f32_e32 v85, v85, v99
v_add_i32_e32 v74, vcc, v74, v81
v_mov_b32_e32 v81, 0xc2aeac4f
v_mad_f32 v85, -v106, v96, -v85
v_cmp_ge_f32_e32 vcc, v90, v81
v_mov_b32_e32 v91, 0x42b17218
v_subrev_f32_e32 v83, v83, v85
v_cndmask_b32_e32 v74, 0, v74, vcc
v_cmp_lt_f32_e32 vcc, v90, v91
v_mov_b32_e32 v94, 0x7f800000
v_cndmask_b32_e32 v74, v94, v74, vcc
v_cmp_u_f32_e32 vcc, v90, v90
v_sub_f32_e32 v83, 1.0, v83
v_lshlrev_b32_e32 v85, 23, v92
v_cndmask_b32_e32 v74, v74, v90, vcc
v_add_i32_e32 v83, vcc, v83, v85
v_cmp_ge_f32_e32 vcc, v89, v81
v_cndmask_b32_e32 v81, 0, v83, vcc
v_cmp_lt_f32_e32 vcc, v89, v91
v_cndmask_b32_e32 v81, v94, v81, vcc
v_cmp_u_f32_e32 vcc, v89, v89
v_cndmask_b32_e32 v81, v81, v89, vcc
v_mul_f32_e32 v74, v81, v74
v_mov_b32_e32 v81, 0x40c00000
v_cmp_gt_f32_e64 s[10:11], v81, v82
v_mov_b32_e32 v81, 0x31800000
v_cmp_gt_f32_e64 vcc, |v82|, v86
v_cmp_gt_f32_e64 s[12:13], v81, v82
v_cndmask_b32_e32 v81, 1.0, v87, vcc
v_mul_f32_e32 v82, v81, v82
v_rcp_f32_e32 v82, v82
v_cmp_u_f32_e32 vcc, v45, v45
v_mul_f32_e32 v74, v82, v74
v_mad_f32 v74, -v81, v74, 1.0
v_madak_f32_e32 v81, v88, v84, 0x3f58560b
v_cndmask_b32_e64 v74, 1.0, v74, s[10:11]
v_cndmask_b32_e64 v74, v74, v81, s[4:5]
v_and_b32_e32 v81, s52, v45
v_or_b32_e32 v74, v81, v74
v_mad_f32 v81, v93, v45, v45
v_cndmask_b32_e64 v74, v74, v81, s[8:9]
v_mul_f32_e32 v81, 0x3f8375d4, v45
v_mac_f32_e32 v81, 0x41000000, v45
v_mul_f32_e32 v81, 0x3e000000, v81
v_cndmask_b32_e64 v74, v74, v81, s[12:13]
v_cndmask_b32_e32 v45, v74, v45, vcc
v_subrev_f32_e32 v45, v45, v76
v_mul_f32_e32 v74, s19, v76
v_mad_f32 v45, v69, v45, -v74
v_mul_f32_e32 v69, v75, v77
v_mul_f32_e32 v69, v79, v69
v_mac_f32_e32 v5, v45, v51
v_mul_f32_e32 v45, v80, v69
v_mad_f32 v45, v51, v78, -v45
v_mul_f32_e32 v51, v78, v51
v_mad_f32 v51, v69, v80, -v51
v_mad_f32 v50, v51, -v35, v50
v_mad_f32 v49, v51, -v27, v49
v_mad_f32 v48, v51, -v31, v48
v_mul_f32_e64 v78, v45, -v35
v_mul_f32_e64 v75, v45, -v27
v_mul_f32_e64 v69, v45, -v31
BB7_58: ; %Flow1242
; in Loop: Header=BB7_11 Depth=1
s_or_b64 exec, exec, s[38:39]
BB7_59: ; in Loop: Header=BB7_11 Depth=1
s_or_b64 exec, exec, s[34:35]
v_lshrrev_b32_e32 v27, 9, v65
v_and_b32_e32 v27, 1, v27
v_cmp_eq_u32_e32 vcc, 1, v27
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[12:13], exec, s[4:5]
; mask branch BB7_63
s_cbranch_execz BB7_63
BB7_60: ; in Loop: Header=BB7_11 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[79:82], v55 offset0:16 offset1:17
s_and_b64 s[4:5], exec, s[0:1]
v_cmp_ne_u32_e32 vcc, v64, v23
s_xor_b64 s[4:5], s[4:5], -1
s_or_b64 s[4:5], s[4:5], vcc
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v31, v71, v80
s_and_b64 s[8:9], exec, s[2:3]
s_or_b64 s[4:5], s[8:9], s[4:5]
v_subrev_f32_e32 v27, v70, v79
v_mul_f32_e32 v45, v31, v31
v_cndmask_b32_e64 v51, 0, 1.0, s[4:5]
v_subrev_f32_e32 v35, v72, v81
v_mac_f32_e32 v45, v27, v27
v_mac_f32_e32 v45, v35, v35
v_mul_f32_e32 v51, s26, v51
v_cmp_lt_f32_e32 vcc, v45, v51
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[34:35], exec, s[4:5]
; mask branch BB7_62
s_cbranch_execz BB7_62
BB7_61: ; in Loop: Header=BB7_11 Depth=1
v_max_f32_e32 v45, 0x34cd15ae, v45
v_rsq_f32_e32 v79, v45
s_mov_b32 m0, -1
v_lshrrev_b32_e32 v74, 9, v19
ds_read_b64 v[76:77], v56 offset:64
v_and_b32_e32 v74, 1, v74
v_cmp_eq_u32_e32 vcc, 1, v74
v_mul_f32_e32 v80, v79, v79
v_cndmask_b32_e64 v74, 0, 1.0, vcc
v_mul_f32_e32 v81, v80, v80
v_mul_f32_e32 v81, v74, v81
v_mul_f32_e32 v51, v73, v82
v_mul_f32_e32 v82, v80, v81
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v77, v68, v77
v_mul_f32_e32 v83, v82, v77
v_mad_f32 v81, v81, v80, s42
v_mad_f32 v83, v76, v67, -v83
v_mad_f32 v84, v82, v82, s43
v_mul_f32_e32 v76, v67, v76
v_mul_f32_e32 v81, 0xbe2aaaab, v81
v_mul_f32_e32 v77, v84, v77
v_mul_f32_e32 v76, v76, v81
v_cmp_gt_f32_e32 vcc, s27, v45
v_mac_f32_e32 v76, 0x3daaaaaa, v77
v_cndmask_b32_e64 v77, 0, 1.0, vcc
v_mul_f32_e32 v81, v74, v77
v_mac_f32_e32 v8, v76, v81
v_mul_f32_e32 v76, v9, v45
v_mul_f32_e32 v81, v76, v76
v_mov_b32_e32 v84, 0x3a92b707
v_madak_f32_e32 v84, v84, v81, 0x3ded3cb2
v_mov_b32_e32 v85, 0x3c739487
v_madak_f32_e32 v85, v85, v81, 0x3f01e2bc
v_mad_f32 v84, v84, v81, 1.0
v_mac_f32_e32 v84, v76, v85
v_mov_b32_e32 v85, 0xb2951928
v_madak_f32_e32 v85, v85, v81, 0xb85ffb93
v_mov_b32_e32 v86, 0x35c55945
v_madak_f32_e32 v86, v86, v81, 0x3a83ca0c
v_madak_f32_e32 v85, v85, v81, 0xbc9ded90
v_madak_f32_e32 v86, v86, v81, 0x3d8eaf3b
v_madak_f32_e32 v81, v85, v81, 0xbf409397
v_mac_f32_e32 v81, v76, v86
v_rcp_f32_e32 v76, v84
v_mul_f32_e32 v45, s18, v45
v_mul_f32_e32 v45, v79, v45
v_mov_b32_e32 v84, 0x3fa00000
v_mul_f32_e32 v76, v37, v76
v_mul_f32_e32 v76, v81, v76
v_mul_f32_e32 v81, v74, v80
v_mac_f32_e32 v76, v79, v81
v_and_b32_e32 v81, s50, v45
v_cmp_gt_f32_e64 s[4:5], v84, v81
v_mul_f32_e32 v84, v81, v81
v_rcp_f32_e32 v85, v84
v_add_f32_e32 v86, -1.0, v81
v_mov_b32_e32 v87, 0xbd777f97
v_mov_b32_e32 v88, 0x6f800000
v_cndmask_b32_e64 v85, v85, v86, s[4:5]
v_mov_b32_e32 v86, 0x3f580000
v_cmp_gt_f32_e64 s[8:9], v86, v81
v_cndmask_b32_e64 v84, v85, v84, s[8:9]
v_mov_b32_e32 v86, 0xc11d077e
v_mov_b32_e32 v85, 0x4036db6e
v_madak_f32_e32 v86, v86, v84, 0xc2a2932b
v_cmp_gt_f32_e32 vcc, v85, v81
v_mov_b32_e32 v85, 0xc3f1c275
v_madak_f32_e32 v85, v85, v84, 0xc480230b
v_madak_f32_e32 v86, v84, v86, 0xc3389ae7
v_madak_f32_e32 v85, v84, v85, 0xc41f6441
v_madak_f32_e32 v86, v84, v86, 0xc322658c
v_madak_f32_e32 v85, v84, v85, 0xc320a2ea
v_madak_f32_e32 v86, v84, v86, 0xc2798057
v_madak_f32_e32 v85, v84, v85, 0xc18e104b
v_madak_f32_e32 v86, v84, v86, 0xc128f022
v_madak_f32_e32 v85, v84, v85, 0xbf4c9dd4
v_madak_f32_e32 v86, v84, v86, 0xbf31a0b7
v_madak_f32_e32 v85, v84, v85, 0xbc21a092
v_madak_f32_e32 v86, v84, v86, 0xbc21a093
v_madak_f32_e32 v87, v87, v84, 0x40d23f7c
v_cndmask_b32_e32 v85, v85, v86, vcc
v_mov_b32_e32 v86, 0xc1b38712
v_madak_f32_e32 v86, v86, v84, 0x43ed43a7
v_madak_f32_e32 v87, v84, v87, 0x42d9451f
v_madak_f32_e32 v86, v84, v86, 0x451f90ce
v_madak_f32_e32 v87, v84, v87, 0x43d6810b
v_madak_f32_e32 v86, v84, v86, 0x4547fdbb
v_madak_f32_e32 v87, v84, v87, 0x442158c9
v_madak_f32_e32 v86, v84, v86, 0x44c01759
v_madak_f32_e32 v87, v84, v87, 0x43d9486f
v_madak_f32_e32 v86, v84, v86, 0x43a2e571
v_madak_f32_e32 v87, v84, v87, 0x4309a863
v_madak_f32_e32 v86, v84, v86, 0x41f2b459
v_madak_f32_e32 v87, v84, v87, 0x419d35ce
v_cndmask_b32_e32 v86, v86, v87, vcc
v_mov_b32_e32 v87, 0xbb0df9c0
v_madak_f32_e32 v87, v87, v84, 0x3d1151b3
v_madak_f32_e32 v87, v84, v87, 0xbde31cc2
v_madak_f32_e32 v87, v84, v87, 0x3ea2fe54
v_madak_f32_e32 v87, v84, v87, 0xbebe9208
v_madak_f32_e32 v87, v84, v87, 0x3ed46805
v_madak_f32_e32 v87, v84, v87, 0xbb1acdc6
v_cndmask_b32_e64 v85, v85, v87, s[4:5]
v_mov_b32_e32 v87, 0x3c445aa3
v_madak_f32_e32 v87, v87, v84, 0x3c5f6e13
v_madak_f32_e32 v87, v84, v87, 0x3e013307
v_madak_f32_e32 v87, v84, v87, 0x3d931ae7
v_madak_f32_e32 v87, v84, v87, 0x3f0a5785
v_madak_f32_e32 v87, v84, v87, 0x3dd9f331
v_cndmask_b32_e64 v86, v86, v87, s[4:5]
v_mov_b32_e32 v87, 0xb684e21a
v_madak_f32_e32 v87, v87, v84, 0x390aee49
v_madak_f32_e32 v87, v84, v87, 0x3ba68116
v_madak_f32_e32 v87, v84, v87, 0x3d852a63
v_madak_f32_e32 v87, v84, v87, 0x3ecbbbce
v_cndmask_b32_e64 v86, v86, v87, s[8:9]
v_mad_f32 v86, v84, v86, 1.0
v_and_b32_e32 v91, s51, v45
v_mov_b32_e32 v92, 0xbf100000
v_cmp_gt_f32_e64 vcc, |v86|, v88
v_mov_b32_e32 v89, 0x2f800000
v_mad_f32 v92, v91, -v91, v92
v_cndmask_b32_e32 v90, 1.0, v89, vcc
v_cmp_gt_f32_e32 vcc, 0, v92
v_cndmask_b32_e64 v93, 0.5, -0.5, vcc
v_mov_b32_e32 v94, 0x3fb8aa3b
v_mac_f32_e32 v93, v94, v92
v_mov_b32_e32 v87, 0xb7c756b1
v_cvt_i32_f32_e32 v93, v93
v_madak_f32_e32 v87, v87, v84, 0xbbbd1489
v_madak_f32_e32 v87, v84, v87, 0xbce9528f
v_madak_f32_e32 v87, v84, v87, 0xbea66beb
v_mul_f32_e32 v86, v90, v86
v_madak_f32_e32 v84, v84, v87, 0x3e0375d4
v_rcp_f32_e32 v86, v86
v_cvt_f32_i32_e32 v87, v93
v_cndmask_b32_e64 v84, v85, v84, s[8:9]
v_mov_b32_e32 v85, 0xbf317180
v_mul_f32_e32 v84, v86, v84
v_mad_f32 v86, v85, v87, v92
v_mov_b32_e32 v95, 0xb717f7d1
v_mad_f32 v96, v95, v87, v86
v_mul_f32_e32 v97, v96, v96
v_mov_b32_e32 v98, 0xb5ddea0e
v_mov_b32_e32 v99, 0x3331bb4c
v_mad_f32 v100, v99, v97, v98
v_mov_b32_e32 v101, 0x388ab355
v_mad_f32 v100, v100, v97, v101
v_mov_b32_e32 v102, 0xbb360b61
v_mad_f32 v100, v100, v97, v102
v_mov_b32_e32 v103, 0x3e2aaaab
v_mad_f32 v100, v100, v97, v103
v_mad_f32 v97, -v97, v100, v96
v_mul_f32_e32 v96, v97, v96
v_sub_f32_e32 v97, 2.0, v97
v_cmp_gt_f32_e64 vcc, |v97|, v88
v_cndmask_b32_e32 v100, 1.0, v89, vcc
v_mul_f32_e32 v97, v100, v97
v_rcp_f32_e32 v97, v97
v_mul_f32_e32 v96, v97, v96
v_mul_f32_e32 v96, v96, v100
v_mad_f32 v87, -v87, v95, -v96
v_subrev_f32_e32 v86, v86, v87
v_lshlrev_b32_e32 v87, 23, v93
v_sub_f32_e32 v86, 1.0, v86
v_add_i32_e32 v86, vcc, v86, v87
v_mov_b32_e32 v87, 0xc2aeac4f
v_cmp_ge_f32_e32 vcc, v92, v87
v_mov_b32_e32 v93, 0x42b17218
v_cndmask_b32_e32 v86, 0, v86, vcc
v_cmp_lt_f32_e32 vcc, v92, v93
v_mov_b32_e32 v96, 0x7f800000
v_cndmask_b32_e32 v86, v96, v86, vcc
v_cmp_u_f32_e32 vcc, v92, v92
v_cndmask_b32_e32 v86, v86, v92, vcc
v_subrev_f32_e32 v92, v81, v91
v_mul_f32_e32 v97, v84, v90
v_add_f32_e32 v91, v81, v91
v_mad_f32 v91, v91, v92, v97
v_cmp_gt_f32_e32 vcc, 0, v91
v_cndmask_b32_e64 v92, 0.5, -0.5, vcc
v_mac_f32_e32 v92, v94, v91
v_cvt_i32_f32_e32 v92, v92
v_madak_f32_e32 v84, v90, v84, 0x3f58560b
v_cvt_f32_i32_e32 v94, v92
v_lshlrev_b32_e32 v92, 23, v92
v_mad_f32 v85, v85, v94, v91
v_mad_f32 v100, v95, v94, v85
v_mul_f32_e32 v104, v100, v100
v_mac_f32_e32 v98, v99, v104
v_mac_f32_e32 v101, v98, v104
v_mac_f32_e32 v102, v101, v104
v_mac_f32_e32 v103, v102, v104
v_mad_f32 v98, -v104, v103, v100
v_mul_f32_e32 v99, v98, v100
v_sub_f32_e32 v98, 2.0, v98
v_cmp_gt_f32_e64 vcc, |v98|, v88
v_cndmask_b32_e32 v100, 1.0, v89, vcc
v_mul_f32_e32 v98, v100, v98
v_rcp_f32_e32 v98, v98
v_mul_f32_e32 v98, v98, v99
v_mul_f32_e32 v98, v98, v100
v_mad_f32 v94, -v94, v95, -v98
v_subrev_f32_e32 v85, v85, v94
v_sub_f32_e32 v85, 1.0, v85
v_add_i32_e32 v85, vcc, v85, v92
v_cmp_ge_f32_e32 vcc, v91, v87
v_cndmask_b32_e32 v85, 0, v85, vcc
v_cmp_lt_f32_e32 vcc, v91, v93
v_cndmask_b32_e32 v85, v96, v85, vcc
v_cmp_u_f32_e32 vcc, v91, v91
v_cndmask_b32_e32 v85, v85, v91, vcc
v_cmp_gt_f32_e64 vcc, |v81|, v88
v_mul_f32_e32 v85, v85, v86
v_cndmask_b32_e32 v86, 1.0, v89, vcc
v_mul_f32_e32 v87, v86, v81
v_rcp_f32_e32 v87, v87
v_mov_b32_e32 v88, 0x40c00000
v_cmp_gt_f32_e32 vcc, v88, v81
v_mov_b32_e32 v88, 0x31800000
v_cmp_gt_f32_e64 s[10:11], v88, v81
v_mul_f32_e32 v81, v87, v85
v_mad_f32 v81, -v86, v81, 1.0
v_cndmask_b32_e32 v81, 1.0, v81, vcc
v_cndmask_b32_e64 v81, v81, v84, s[4:5]
v_and_b32_e32 v84, s52, v45
v_or_b32_e32 v81, v84, v81
v_mad_f32 v84, v97, v45, v45
v_cndmask_b32_e64 v81, v81, v84, s[8:9]
v_mul_f32_e32 v84, 0x3f8375d4, v45
v_mac_f32_e32 v84, 0x41000000, v45
v_mul_f32_e32 v84, 0x3e000000, v84
v_cndmask_b32_e64 v81, v81, v84, s[10:11]
v_cmp_u_f32_e32 vcc, v45, v45
v_cndmask_b32_e32 v45, v81, v45, vcc
v_subrev_f32_e32 v45, v45, v74
v_mul_f32_e32 v74, s19, v74
v_mad_f32 v45, v79, v45, -v74
v_mul_f32_e32 v74, v77, v80
v_mul_f32_e32 v74, v82, v74
v_mac_f32_e32 v5, v45, v51
v_mul_f32_e32 v45, v83, v74
v_mad_f32 v45, v51, v76, -v45
v_mul_f32_e32 v51, v76, v51
v_mad_f32 v51, v74, v83, -v51
v_mad_f32 v78, v35, v51, v78
v_mad_f32 v75, v31, v51, v75
v_mac_f32_e32 v69, v27, v51
v_mad_f32 v44, v35, v45, v44
v_mad_f32 v43, v31, v45, v43
v_mac_f32_e32 v42, v27, v45
BB7_62: ; %Flow1241
; in Loop: Header=BB7_11 Depth=1
s_or_b64 exec, exec, s[34:35]
BB7_63: ; in Loop: Header=BB7_11 Depth=1
s_or_b64 exec, exec, s[12:13]
v_lshrrev_b32_e32 v27, 10, v65
v_and_b32_e32 v27, 1, v27
v_cmp_eq_u32_e32 vcc, 1, v27
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[12:13], exec, s[4:5]
; mask branch BB7_67
s_cbranch_execz BB7_67
BB7_64: ; in Loop: Header=BB7_11 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[79:82], v55 offset0:32 offset1:33
s_and_b64 s[4:5], exec, s[0:1]
v_cmp_ne_u32_e32 vcc, v63, v23
s_xor_b64 s[4:5], s[4:5], -1
s_or_b64 s[4:5], s[4:5], vcc
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v31, v71, v80
s_and_b64 s[8:9], exec, s[2:3]
s_or_b64 s[4:5], s[8:9], s[4:5]
v_subrev_f32_e32 v27, v70, v79
v_mul_f32_e32 v45, v31, v31
v_cndmask_b32_e64 v51, 0, 1.0, s[4:5]
v_subrev_f32_e32 v35, v72, v81
v_mac_f32_e32 v45, v27, v27
v_mac_f32_e32 v45, v35, v35
v_mul_f32_e32 v51, s26, v51
v_cmp_lt_f32_e32 vcc, v45, v51
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[34:35], exec, s[4:5]
; mask branch BB7_66
s_cbranch_execz BB7_66
BB7_65: ; in Loop: Header=BB7_11 Depth=1
v_max_f32_e32 v45, 0x34cd15ae, v45
v_rsq_f32_e32 v79, v45
s_mov_b32 m0, -1
v_lshrrev_b32_e32 v74, 10, v19
ds_read_b64 v[76:77], v56 offset:128
v_and_b32_e32 v74, 1, v74
v_cmp_eq_u32_e32 vcc, 1, v74
v_mul_f32_e32 v80, v79, v79
v_cndmask_b32_e64 v74, 0, 1.0, vcc
v_mul_f32_e32 v81, v80, v80
v_mul_f32_e32 v81, v74, v81
v_mul_f32_e32 v51, v73, v82
v_mul_f32_e32 v82, v80, v81
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v77, v68, v77
v_mul_f32_e32 v83, v82, v77
v_mad_f32 v81, v81, v80, s42
v_mad_f32 v83, v76, v67, -v83
v_mad_f32 v84, v82, v82, s43
v_mul_f32_e32 v76, v67, v76
v_mul_f32_e32 v81, 0xbe2aaaab, v81
v_mul_f32_e32 v77, v84, v77
v_mul_f32_e32 v76, v76, v81
v_cmp_gt_f32_e32 vcc, s27, v45
v_mac_f32_e32 v76, 0x3daaaaaa, v77
v_cndmask_b32_e64 v77, 0, 1.0, vcc
v_mul_f32_e32 v81, v74, v77
v_mac_f32_e32 v8, v76, v81
v_mul_f32_e32 v76, v9, v45
v_mul_f32_e32 v81, v76, v76
v_mov_b32_e32 v84, 0x3a92b707
v_madak_f32_e32 v84, v84, v81, 0x3ded3cb2
v_mov_b32_e32 v85, 0x3c739487
v_madak_f32_e32 v85, v85, v81, 0x3f01e2bc
v_mad_f32 v84, v84, v81, 1.0
v_mac_f32_e32 v84, v76, v85
v_mov_b32_e32 v85, 0xb2951928
v_madak_f32_e32 v85, v85, v81, 0xb85ffb93
v_mov_b32_e32 v86, 0x35c55945
v_madak_f32_e32 v86, v86, v81, 0x3a83ca0c
v_madak_f32_e32 v85, v85, v81, 0xbc9ded90
v_madak_f32_e32 v86, v86, v81, 0x3d8eaf3b
v_madak_f32_e32 v81, v85, v81, 0xbf409397
v_mac_f32_e32 v81, v76, v86
v_rcp_f32_e32 v76, v84
v_mul_f32_e32 v45, s18, v45
v_mul_f32_e32 v45, v79, v45
v_mov_b32_e32 v84, 0x3fa00000
v_mul_f32_e32 v76, v37, v76
v_mul_f32_e32 v76, v81, v76
v_mul_f32_e32 v81, v74, v80
v_mac_f32_e32 v76, v79, v81
v_and_b32_e32 v81, s50, v45
v_cmp_gt_f32_e64 s[4:5], v84, v81
v_mul_f32_e32 v84, v81, v81
v_rcp_f32_e32 v85, v84
v_add_f32_e32 v86, -1.0, v81
v_mov_b32_e32 v87, 0xbd777f97
v_mov_b32_e32 v88, 0x6f800000
v_cndmask_b32_e64 v85, v85, v86, s[4:5]
v_mov_b32_e32 v86, 0x3f580000
v_cmp_gt_f32_e64 s[8:9], v86, v81
v_cndmask_b32_e64 v84, v85, v84, s[8:9]
v_mov_b32_e32 v86, 0xc11d077e
v_mov_b32_e32 v85, 0x4036db6e
v_madak_f32_e32 v86, v86, v84, 0xc2a2932b
v_cmp_gt_f32_e32 vcc, v85, v81
v_mov_b32_e32 v85, 0xc3f1c275
v_madak_f32_e32 v85, v85, v84, 0xc480230b
v_madak_f32_e32 v86, v84, v86, 0xc3389ae7
v_madak_f32_e32 v85, v84, v85, 0xc41f6441
v_madak_f32_e32 v86, v84, v86, 0xc322658c
v_madak_f32_e32 v85, v84, v85, 0xc320a2ea
v_madak_f32_e32 v86, v84, v86, 0xc2798057
v_madak_f32_e32 v85, v84, v85, 0xc18e104b
v_madak_f32_e32 v86, v84, v86, 0xc128f022
v_madak_f32_e32 v85, v84, v85, 0xbf4c9dd4
v_madak_f32_e32 v86, v84, v86, 0xbf31a0b7
v_madak_f32_e32 v85, v84, v85, 0xbc21a092
v_madak_f32_e32 v86, v84, v86, 0xbc21a093
v_madak_f32_e32 v87, v87, v84, 0x40d23f7c
v_cndmask_b32_e32 v85, v85, v86, vcc
v_mov_b32_e32 v86, 0xc1b38712
v_madak_f32_e32 v86, v86, v84, 0x43ed43a7
v_madak_f32_e32 v87, v84, v87, 0x42d9451f
v_madak_f32_e32 v86, v84, v86, 0x451f90ce
v_madak_f32_e32 v87, v84, v87, 0x43d6810b
v_madak_f32_e32 v86, v84, v86, 0x4547fdbb
v_madak_f32_e32 v87, v84, v87, 0x442158c9
v_madak_f32_e32 v86, v84, v86, 0x44c01759
v_madak_f32_e32 v87, v84, v87, 0x43d9486f
v_madak_f32_e32 v86, v84, v86, 0x43a2e571
v_madak_f32_e32 v87, v84, v87, 0x4309a863
v_madak_f32_e32 v86, v84, v86, 0x41f2b459
v_madak_f32_e32 v87, v84, v87, 0x419d35ce
v_cndmask_b32_e32 v86, v86, v87, vcc
v_mov_b32_e32 v87, 0xbb0df9c0
v_madak_f32_e32 v87, v87, v84, 0x3d1151b3
v_madak_f32_e32 v87, v84, v87, 0xbde31cc2
v_madak_f32_e32 v87, v84, v87, 0x3ea2fe54
v_madak_f32_e32 v87, v84, v87, 0xbebe9208
v_madak_f32_e32 v87, v84, v87, 0x3ed46805
v_madak_f32_e32 v87, v84, v87, 0xbb1acdc6
v_cndmask_b32_e64 v85, v85, v87, s[4:5]
v_mov_b32_e32 v87, 0x3c445aa3
v_madak_f32_e32 v87, v87, v84, 0x3c5f6e13
v_madak_f32_e32 v87, v84, v87, 0x3e013307
v_madak_f32_e32 v87, v84, v87, 0x3d931ae7
v_madak_f32_e32 v87, v84, v87, 0x3f0a5785
v_madak_f32_e32 v87, v84, v87, 0x3dd9f331
v_cndmask_b32_e64 v86, v86, v87, s[4:5]
v_mov_b32_e32 v87, 0xb684e21a
v_madak_f32_e32 v87, v87, v84, 0x390aee49
v_madak_f32_e32 v87, v84, v87, 0x3ba68116
v_madak_f32_e32 v87, v84, v87, 0x3d852a63
v_madak_f32_e32 v87, v84, v87, 0x3ecbbbce
v_cndmask_b32_e64 v86, v86, v87, s[8:9]
v_mad_f32 v86, v84, v86, 1.0
v_and_b32_e32 v91, s51, v45
v_mov_b32_e32 v92, 0xbf100000
v_cmp_gt_f32_e64 vcc, |v86|, v88
v_mov_b32_e32 v89, 0x2f800000
v_mad_f32 v92, v91, -v91, v92
v_cndmask_b32_e32 v90, 1.0, v89, vcc
v_cmp_gt_f32_e32 vcc, 0, v92
v_cndmask_b32_e64 v93, 0.5, -0.5, vcc
v_mov_b32_e32 v94, 0x3fb8aa3b
v_mac_f32_e32 v93, v94, v92
v_mov_b32_e32 v87, 0xb7c756b1
v_cvt_i32_f32_e32 v93, v93
v_madak_f32_e32 v87, v87, v84, 0xbbbd1489
v_madak_f32_e32 v87, v84, v87, 0xbce9528f
v_madak_f32_e32 v87, v84, v87, 0xbea66beb
v_mul_f32_e32 v86, v90, v86
v_madak_f32_e32 v84, v84, v87, 0x3e0375d4
v_rcp_f32_e32 v86, v86
v_cvt_f32_i32_e32 v87, v93
v_cndmask_b32_e64 v84, v85, v84, s[8:9]
v_mov_b32_e32 v85, 0xbf317180
v_mul_f32_e32 v84, v86, v84
v_mad_f32 v86, v85, v87, v92
v_mov_b32_e32 v95, 0xb717f7d1
v_mad_f32 v96, v95, v87, v86
v_mul_f32_e32 v97, v96, v96
v_mov_b32_e32 v98, 0xb5ddea0e
v_mov_b32_e32 v99, 0x3331bb4c
v_mad_f32 v100, v99, v97, v98
v_mov_b32_e32 v101, 0x388ab355
v_mad_f32 v100, v100, v97, v101
v_mov_b32_e32 v102, 0xbb360b61
v_mad_f32 v100, v100, v97, v102
v_mov_b32_e32 v103, 0x3e2aaaab
v_mad_f32 v100, v100, v97, v103
v_mad_f32 v97, -v97, v100, v96
v_mul_f32_e32 v96, v97, v96
v_sub_f32_e32 v97, 2.0, v97
v_cmp_gt_f32_e64 vcc, |v97|, v88
v_cndmask_b32_e32 v100, 1.0, v89, vcc
v_mul_f32_e32 v97, v100, v97
v_rcp_f32_e32 v97, v97
v_mul_f32_e32 v96, v97, v96
v_mul_f32_e32 v96, v96, v100
v_mad_f32 v87, -v87, v95, -v96
v_subrev_f32_e32 v86, v86, v87
v_lshlrev_b32_e32 v87, 23, v93
v_sub_f32_e32 v86, 1.0, v86
v_add_i32_e32 v86, vcc, v86, v87
v_mov_b32_e32 v87, 0xc2aeac4f
v_cmp_ge_f32_e32 vcc, v92, v87
v_mov_b32_e32 v93, 0x42b17218
v_cndmask_b32_e32 v86, 0, v86, vcc
v_cmp_lt_f32_e32 vcc, v92, v93
v_mov_b32_e32 v96, 0x7f800000
v_cndmask_b32_e32 v86, v96, v86, vcc
v_cmp_u_f32_e32 vcc, v92, v92
v_cndmask_b32_e32 v86, v86, v92, vcc
v_subrev_f32_e32 v92, v81, v91
v_mul_f32_e32 v97, v84, v90
v_add_f32_e32 v91, v81, v91
v_mad_f32 v91, v91, v92, v97
v_cmp_gt_f32_e32 vcc, 0, v91
v_cndmask_b32_e64 v92, 0.5, -0.5, vcc
v_mac_f32_e32 v92, v94, v91
v_cvt_i32_f32_e32 v92, v92
v_madak_f32_e32 v84, v90, v84, 0x3f58560b
v_cvt_f32_i32_e32 v94, v92
v_lshlrev_b32_e32 v92, 23, v92
v_mad_f32 v85, v85, v94, v91
v_mad_f32 v100, v95, v94, v85
v_mul_f32_e32 v104, v100, v100
v_mac_f32_e32 v98, v99, v104
v_mac_f32_e32 v101, v98, v104
v_mac_f32_e32 v102, v101, v104
v_mac_f32_e32 v103, v102, v104
v_mad_f32 v98, -v104, v103, v100
v_mul_f32_e32 v99, v98, v100
v_sub_f32_e32 v98, 2.0, v98
v_cmp_gt_f32_e64 vcc, |v98|, v88
v_cndmask_b32_e32 v100, 1.0, v89, vcc
v_mul_f32_e32 v98, v100, v98
v_rcp_f32_e32 v98, v98
v_mul_f32_e32 v98, v98, v99
v_mul_f32_e32 v98, v98, v100
v_mad_f32 v94, -v94, v95, -v98
v_subrev_f32_e32 v85, v85, v94
v_sub_f32_e32 v85, 1.0, v85
v_add_i32_e32 v85, vcc, v85, v92
v_cmp_ge_f32_e32 vcc, v91, v87
v_cndmask_b32_e32 v85, 0, v85, vcc
v_cmp_lt_f32_e32 vcc, v91, v93
v_cndmask_b32_e32 v85, v96, v85, vcc
v_cmp_u_f32_e32 vcc, v91, v91
v_cndmask_b32_e32 v85, v85, v91, vcc
v_cmp_gt_f32_e64 vcc, |v81|, v88
v_mul_f32_e32 v85, v85, v86
v_cndmask_b32_e32 v86, 1.0, v89, vcc
v_mul_f32_e32 v87, v86, v81
v_rcp_f32_e32 v87, v87
v_mov_b32_e32 v88, 0x40c00000
v_cmp_gt_f32_e32 vcc, v88, v81
v_mov_b32_e32 v88, 0x31800000
v_cmp_gt_f32_e64 s[10:11], v88, v81
v_mul_f32_e32 v81, v87, v85
v_mad_f32 v81, -v86, v81, 1.0
v_cndmask_b32_e32 v81, 1.0, v81, vcc
v_cndmask_b32_e64 v81, v81, v84, s[4:5]
v_and_b32_e32 v84, s52, v45
v_or_b32_e32 v81, v84, v81
v_mad_f32 v84, v97, v45, v45
v_cndmask_b32_e64 v81, v81, v84, s[8:9]
v_mul_f32_e32 v84, 0x3f8375d4, v45
v_mac_f32_e32 v84, 0x41000000, v45
v_mul_f32_e32 v84, 0x3e000000, v84
v_cndmask_b32_e64 v81, v81, v84, s[10:11]
v_cmp_u_f32_e32 vcc, v45, v45
v_cndmask_b32_e32 v45, v81, v45, vcc
v_subrev_f32_e32 v45, v45, v74
v_mul_f32_e32 v74, s19, v74
v_mad_f32 v45, v79, v45, -v74
v_mul_f32_e32 v74, v77, v80
v_mul_f32_e32 v74, v82, v74
v_mac_f32_e32 v5, v45, v51
v_mul_f32_e32 v45, v83, v74
v_mad_f32 v45, v51, v76, -v45
v_mul_f32_e32 v51, v76, v51
v_mad_f32 v51, v74, v83, -v51
v_mad_f32 v78, v35, v51, v78
v_mad_f32 v75, v31, v51, v75
v_mac_f32_e32 v69, v27, v51
v_mad_f32 v34, v35, v45, v34
v_mad_f32 v33, v31, v45, v33
v_mac_f32_e32 v32, v27, v45
BB7_66: ; %Flow1240
; in Loop: Header=BB7_11 Depth=1
s_or_b64 exec, exec, s[34:35]
BB7_67: ; in Loop: Header=BB7_11 Depth=1
s_or_b64 exec, exec, s[12:13]
v_lshrrev_b32_e32 v27, 11, v65
v_and_b32_e32 v27, 1, v27
v_cmp_eq_u32_e32 vcc, 1, v27
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[12:13], exec, s[4:5]
; mask branch BB7_71
s_cbranch_execz BB7_71
BB7_68: ; in Loop: Header=BB7_11 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[79:82], v55 offset0:48 offset1:49
s_and_b64 s[4:5], exec, s[0:1]
v_cmp_ne_u32_e32 vcc, v62, v23
s_xor_b64 s[4:5], s[4:5], -1
s_or_b64 s[4:5], s[4:5], vcc
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v31, v71, v80
s_and_b64 s[8:9], exec, s[2:3]
s_or_b64 s[4:5], s[8:9], s[4:5]
v_subrev_f32_e32 v27, v70, v79
v_mul_f32_e32 v45, v31, v31
v_cndmask_b32_e64 v51, 0, 1.0, s[4:5]
v_subrev_f32_e32 v35, v72, v81
v_mac_f32_e32 v45, v27, v27
v_mac_f32_e32 v45, v35, v35
v_mul_f32_e32 v51, s26, v51
v_cmp_lt_f32_e32 vcc, v45, v51
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[34:35], exec, s[4:5]
; mask branch BB7_70
s_cbranch_execz BB7_70
BB7_69: ; in Loop: Header=BB7_11 Depth=1
v_max_f32_e32 v45, 0x34cd15ae, v45
v_rsq_f32_e32 v79, v45
s_mov_b32 m0, -1
v_lshrrev_b32_e32 v74, 11, v19
ds_read_b64 v[76:77], v56 offset:192
v_and_b32_e32 v74, 1, v74
v_cmp_eq_u32_e32 vcc, 1, v74
v_mul_f32_e32 v80, v79, v79
v_cndmask_b32_e64 v74, 0, 1.0, vcc
v_mul_f32_e32 v81, v80, v80
v_mul_f32_e32 v81, v74, v81
v_mul_f32_e32 v51, v73, v82
v_mul_f32_e32 v82, v80, v81
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v77, v68, v77
v_mul_f32_e32 v83, v82, v77
v_mad_f32 v81, v81, v80, s42
v_mad_f32 v83, v76, v67, -v83
v_mad_f32 v84, v82, v82, s43
v_mul_f32_e32 v76, v67, v76
v_mul_f32_e32 v81, 0xbe2aaaab, v81
v_mul_f32_e32 v77, v84, v77
v_mul_f32_e32 v76, v76, v81
v_cmp_gt_f32_e32 vcc, s27, v45
v_mac_f32_e32 v76, 0x3daaaaaa, v77
v_cndmask_b32_e64 v77, 0, 1.0, vcc
v_mul_f32_e32 v81, v74, v77
v_mac_f32_e32 v8, v76, v81
v_mul_f32_e32 v76, v9, v45
v_mul_f32_e32 v81, v76, v76
v_mov_b32_e32 v84, 0x3a92b707
v_madak_f32_e32 v84, v84, v81, 0x3ded3cb2
v_mov_b32_e32 v85, 0x3c739487
v_madak_f32_e32 v85, v85, v81, 0x3f01e2bc
v_mad_f32 v84, v84, v81, 1.0
v_mac_f32_e32 v84, v76, v85
v_mov_b32_e32 v85, 0xb2951928
v_madak_f32_e32 v85, v85, v81, 0xb85ffb93
v_mov_b32_e32 v86, 0x35c55945
v_madak_f32_e32 v86, v86, v81, 0x3a83ca0c
v_madak_f32_e32 v85, v85, v81, 0xbc9ded90
v_madak_f32_e32 v86, v86, v81, 0x3d8eaf3b
v_madak_f32_e32 v81, v85, v81, 0xbf409397
v_mac_f32_e32 v81, v76, v86
v_rcp_f32_e32 v76, v84
v_mul_f32_e32 v45, s18, v45
v_mul_f32_e32 v45, v79, v45
v_mov_b32_e32 v84, 0x3fa00000
v_mul_f32_e32 v76, v37, v76
v_mul_f32_e32 v76, v81, v76
v_mul_f32_e32 v81, v74, v80
v_mac_f32_e32 v76, v79, v81
v_and_b32_e32 v81, s50, v45
v_cmp_gt_f32_e64 s[4:5], v84, v81
v_mul_f32_e32 v84, v81, v81
v_rcp_f32_e32 v85, v84
v_add_f32_e32 v86, -1.0, v81
v_mov_b32_e32 v87, 0xbd777f97
v_mov_b32_e32 v88, 0x6f800000
v_cndmask_b32_e64 v85, v85, v86, s[4:5]
v_mov_b32_e32 v86, 0x3f580000
v_cmp_gt_f32_e64 s[8:9], v86, v81
v_cndmask_b32_e64 v84, v85, v84, s[8:9]
v_mov_b32_e32 v86, 0xc11d077e
v_mov_b32_e32 v85, 0x4036db6e
v_madak_f32_e32 v86, v86, v84, 0xc2a2932b
v_cmp_gt_f32_e32 vcc, v85, v81
v_mov_b32_e32 v85, 0xc3f1c275
v_madak_f32_e32 v85, v85, v84, 0xc480230b
v_madak_f32_e32 v86, v84, v86, 0xc3389ae7
v_madak_f32_e32 v85, v84, v85, 0xc41f6441
v_madak_f32_e32 v86, v84, v86, 0xc322658c
v_madak_f32_e32 v85, v84, v85, 0xc320a2ea
v_madak_f32_e32 v86, v84, v86, 0xc2798057
v_madak_f32_e32 v85, v84, v85, 0xc18e104b
v_madak_f32_e32 v86, v84, v86, 0xc128f022
v_madak_f32_e32 v85, v84, v85, 0xbf4c9dd4
v_madak_f32_e32 v86, v84, v86, 0xbf31a0b7
v_madak_f32_e32 v85, v84, v85, 0xbc21a092
v_madak_f32_e32 v86, v84, v86, 0xbc21a093
v_madak_f32_e32 v87, v87, v84, 0x40d23f7c
v_cndmask_b32_e32 v85, v85, v86, vcc
v_mov_b32_e32 v86, 0xc1b38712
v_madak_f32_e32 v86, v86, v84, 0x43ed43a7
v_madak_f32_e32 v87, v84, v87, 0x42d9451f
v_madak_f32_e32 v86, v84, v86, 0x451f90ce
v_madak_f32_e32 v87, v84, v87, 0x43d6810b
v_madak_f32_e32 v86, v84, v86, 0x4547fdbb
v_madak_f32_e32 v87, v84, v87, 0x442158c9
v_madak_f32_e32 v86, v84, v86, 0x44c01759
v_madak_f32_e32 v87, v84, v87, 0x43d9486f
v_madak_f32_e32 v86, v84, v86, 0x43a2e571
v_madak_f32_e32 v87, v84, v87, 0x4309a863
v_madak_f32_e32 v86, v84, v86, 0x41f2b459
v_madak_f32_e32 v87, v84, v87, 0x419d35ce
v_cndmask_b32_e32 v86, v86, v87, vcc
v_mov_b32_e32 v87, 0xbb0df9c0
v_madak_f32_e32 v87, v87, v84, 0x3d1151b3
v_madak_f32_e32 v87, v84, v87, 0xbde31cc2
v_madak_f32_e32 v87, v84, v87, 0x3ea2fe54
v_madak_f32_e32 v87, v84, v87, 0xbebe9208
v_madak_f32_e32 v87, v84, v87, 0x3ed46805
v_madak_f32_e32 v87, v84, v87, 0xbb1acdc6
v_cndmask_b32_e64 v85, v85, v87, s[4:5]
v_mov_b32_e32 v87, 0x3c445aa3
v_madak_f32_e32 v87, v87, v84, 0x3c5f6e13
v_madak_f32_e32 v87, v84, v87, 0x3e013307
v_madak_f32_e32 v87, v84, v87, 0x3d931ae7
v_madak_f32_e32 v87, v84, v87, 0x3f0a5785
v_madak_f32_e32 v87, v84, v87, 0x3dd9f331
v_cndmask_b32_e64 v86, v86, v87, s[4:5]
v_mov_b32_e32 v87, 0xb684e21a
v_madak_f32_e32 v87, v87, v84, 0x390aee49
v_madak_f32_e32 v87, v84, v87, 0x3ba68116
v_madak_f32_e32 v87, v84, v87, 0x3d852a63
v_madak_f32_e32 v87, v84, v87, 0x3ecbbbce
v_cndmask_b32_e64 v86, v86, v87, s[8:9]
v_mad_f32 v86, v84, v86, 1.0
v_and_b32_e32 v91, s51, v45
v_mov_b32_e32 v92, 0xbf100000
v_cmp_gt_f32_e64 vcc, |v86|, v88
v_mov_b32_e32 v89, 0x2f800000
v_mad_f32 v92, v91, -v91, v92
v_cndmask_b32_e32 v90, 1.0, v89, vcc
v_cmp_gt_f32_e32 vcc, 0, v92
v_cndmask_b32_e64 v93, 0.5, -0.5, vcc
v_mov_b32_e32 v94, 0x3fb8aa3b
v_mac_f32_e32 v93, v94, v92
v_mov_b32_e32 v87, 0xb7c756b1
v_cvt_i32_f32_e32 v93, v93
v_madak_f32_e32 v87, v87, v84, 0xbbbd1489
v_madak_f32_e32 v87, v84, v87, 0xbce9528f
v_madak_f32_e32 v87, v84, v87, 0xbea66beb
v_mul_f32_e32 v86, v90, v86
v_madak_f32_e32 v84, v84, v87, 0x3e0375d4
v_rcp_f32_e32 v86, v86
v_cvt_f32_i32_e32 v87, v93
v_cndmask_b32_e64 v84, v85, v84, s[8:9]
v_mov_b32_e32 v85, 0xbf317180
v_mul_f32_e32 v84, v86, v84
v_mad_f32 v86, v85, v87, v92
v_mov_b32_e32 v95, 0xb717f7d1
v_mad_f32 v96, v95, v87, v86
v_mul_f32_e32 v97, v96, v96
v_mov_b32_e32 v98, 0xb5ddea0e
v_mov_b32_e32 v99, 0x3331bb4c
v_mad_f32 v100, v99, v97, v98
v_mov_b32_e32 v101, 0x388ab355
v_mad_f32 v100, v100, v97, v101
v_mov_b32_e32 v102, 0xbb360b61
v_mad_f32 v100, v100, v97, v102
v_mov_b32_e32 v103, 0x3e2aaaab
v_mad_f32 v100, v100, v97, v103
v_mad_f32 v97, -v97, v100, v96
v_mul_f32_e32 v96, v97, v96
v_sub_f32_e32 v97, 2.0, v97
v_cmp_gt_f32_e64 vcc, |v97|, v88
v_cndmask_b32_e32 v100, 1.0, v89, vcc
v_mul_f32_e32 v97, v100, v97
v_rcp_f32_e32 v97, v97
v_mul_f32_e32 v96, v97, v96
v_mul_f32_e32 v96, v96, v100
v_mad_f32 v87, -v87, v95, -v96
v_subrev_f32_e32 v86, v86, v87
v_lshlrev_b32_e32 v87, 23, v93
v_sub_f32_e32 v86, 1.0, v86
v_add_i32_e32 v86, vcc, v86, v87
v_mov_b32_e32 v87, 0xc2aeac4f
v_cmp_ge_f32_e32 vcc, v92, v87
v_mov_b32_e32 v93, 0x42b17218
v_cndmask_b32_e32 v86, 0, v86, vcc
v_cmp_lt_f32_e32 vcc, v92, v93
v_mov_b32_e32 v96, 0x7f800000
v_cndmask_b32_e32 v86, v96, v86, vcc
v_cmp_u_f32_e32 vcc, v92, v92
v_cndmask_b32_e32 v86, v86, v92, vcc
v_subrev_f32_e32 v92, v81, v91
v_mul_f32_e32 v97, v84, v90
v_add_f32_e32 v91, v81, v91
v_mad_f32 v91, v91, v92, v97
v_cmp_gt_f32_e32 vcc, 0, v91
v_cndmask_b32_e64 v92, 0.5, -0.5, vcc
v_mac_f32_e32 v92, v94, v91
v_cvt_i32_f32_e32 v92, v92
v_madak_f32_e32 v84, v90, v84, 0x3f58560b
v_cvt_f32_i32_e32 v94, v92
v_lshlrev_b32_e32 v92, 23, v92
v_mad_f32 v85, v85, v94, v91
v_mad_f32 v100, v95, v94, v85
v_mul_f32_e32 v104, v100, v100
v_mac_f32_e32 v98, v99, v104
v_mac_f32_e32 v101, v98, v104
v_mac_f32_e32 v102, v101, v104
v_mac_f32_e32 v103, v102, v104
v_mad_f32 v98, -v104, v103, v100
v_mul_f32_e32 v99, v98, v100
v_sub_f32_e32 v98, 2.0, v98
v_cmp_gt_f32_e64 vcc, |v98|, v88
v_cndmask_b32_e32 v100, 1.0, v89, vcc
v_mul_f32_e32 v98, v100, v98
v_rcp_f32_e32 v98, v98
v_mul_f32_e32 v98, v98, v99
v_mul_f32_e32 v98, v98, v100
v_mad_f32 v94, -v94, v95, -v98
v_subrev_f32_e32 v85, v85, v94
v_sub_f32_e32 v85, 1.0, v85
v_add_i32_e32 v85, vcc, v85, v92
v_cmp_ge_f32_e32 vcc, v91, v87
v_cndmask_b32_e32 v85, 0, v85, vcc
v_cmp_lt_f32_e32 vcc, v91, v93
v_cndmask_b32_e32 v85, v96, v85, vcc
v_cmp_u_f32_e32 vcc, v91, v91
v_cndmask_b32_e32 v85, v85, v91, vcc
v_cmp_gt_f32_e64 vcc, |v81|, v88
v_mul_f32_e32 v85, v85, v86
v_cndmask_b32_e32 v86, 1.0, v89, vcc
v_mul_f32_e32 v87, v86, v81
v_rcp_f32_e32 v87, v87
v_mov_b32_e32 v88, 0x40c00000
v_cmp_gt_f32_e32 vcc, v88, v81
v_mov_b32_e32 v88, 0x31800000
v_cmp_gt_f32_e64 s[10:11], v88, v81
v_mul_f32_e32 v81, v87, v85
v_mad_f32 v81, -v86, v81, 1.0
v_cndmask_b32_e32 v81, 1.0, v81, vcc
v_cndmask_b32_e64 v81, v81, v84, s[4:5]
v_and_b32_e32 v84, s52, v45
v_or_b32_e32 v81, v84, v81
v_mad_f32 v84, v97, v45, v45
v_cndmask_b32_e64 v81, v81, v84, s[8:9]
v_mul_f32_e32 v84, 0x3f8375d4, v45
v_mac_f32_e32 v84, 0x41000000, v45
v_mul_f32_e32 v84, 0x3e000000, v84
v_cndmask_b32_e64 v81, v81, v84, s[10:11]
v_cmp_u_f32_e32 vcc, v45, v45
v_cndmask_b32_e32 v45, v81, v45, vcc
v_subrev_f32_e32 v45, v45, v74
v_mul_f32_e32 v74, s19, v74
v_mad_f32 v45, v79, v45, -v74
v_mul_f32_e32 v74, v77, v80
v_mul_f32_e32 v74, v82, v74
v_mac_f32_e32 v5, v45, v51
v_mul_f32_e32 v45, v83, v74
v_mad_f32 v45, v51, v76, -v45
v_mul_f32_e32 v51, v76, v51
v_mad_f32 v51, v74, v83, -v51
v_mad_f32 v78, v35, v51, v78
v_mad_f32 v75, v31, v51, v75
v_mac_f32_e32 v69, v27, v51
v_mad_f32 v30, v35, v45, v30
v_mad_f32 v29, v31, v45, v29
v_mac_f32_e32 v28, v27, v45
BB7_70: ; %Flow1239
; in Loop: Header=BB7_11 Depth=1
s_or_b64 exec, exec, s[34:35]
BB7_71: ; in Loop: Header=BB7_11 Depth=1
s_or_b64 exec, exec, s[12:13]
v_lshrrev_b32_e32 v27, 12, v65
v_and_b32_e32 v27, 1, v27
v_cmp_eq_u32_e32 vcc, 1, v27
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[12:13], exec, s[4:5]
; mask branch BB7_75
s_cbranch_execz BB7_75
BB7_72: ; in Loop: Header=BB7_11 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[79:82], v55 offset0:64 offset1:65
s_and_b64 s[4:5], exec, s[0:1]
v_cmp_ne_u32_e32 vcc, v61, v23
s_xor_b64 s[4:5], s[4:5], -1
s_or_b64 s[4:5], s[4:5], vcc
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v31, v71, v80
s_and_b64 s[8:9], exec, s[2:3]
s_or_b64 s[4:5], s[8:9], s[4:5]
v_subrev_f32_e32 v27, v70, v79
v_mul_f32_e32 v45, v31, v31
v_cndmask_b32_e64 v51, 0, 1.0, s[4:5]
v_subrev_f32_e32 v35, v72, v81
v_mac_f32_e32 v45, v27, v27
v_mac_f32_e32 v45, v35, v35
v_mul_f32_e32 v51, s26, v51
v_cmp_lt_f32_e32 vcc, v45, v51
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[34:35], exec, s[4:5]
; mask branch BB7_74
s_cbranch_execz BB7_74
BB7_73: ; in Loop: Header=BB7_11 Depth=1
v_max_f32_e32 v45, 0x34cd15ae, v45
v_rsq_f32_e32 v79, v45
s_mov_b32 m0, -1
v_lshrrev_b32_e32 v74, 12, v19
ds_read_b64 v[76:77], v56 offset:256
v_and_b32_e32 v74, 1, v74
v_cmp_eq_u32_e32 vcc, 1, v74
v_mul_f32_e32 v80, v79, v79
v_cndmask_b32_e64 v74, 0, 1.0, vcc
v_mul_f32_e32 v81, v80, v80
v_mul_f32_e32 v81, v74, v81
v_mul_f32_e32 v51, v73, v82
v_mul_f32_e32 v82, v80, v81
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v77, v68, v77
v_mul_f32_e32 v83, v82, v77
v_mad_f32 v81, v81, v80, s42
v_mad_f32 v83, v76, v67, -v83
v_mad_f32 v84, v82, v82, s43
v_mul_f32_e32 v76, v67, v76
v_mul_f32_e32 v81, 0xbe2aaaab, v81
v_mul_f32_e32 v77, v84, v77
v_mul_f32_e32 v76, v76, v81
v_cmp_gt_f32_e32 vcc, s27, v45
v_mac_f32_e32 v76, 0x3daaaaaa, v77
v_cndmask_b32_e64 v77, 0, 1.0, vcc
v_mul_f32_e32 v81, v74, v77
v_mac_f32_e32 v8, v76, v81
v_mul_f32_e32 v76, v9, v45
v_mul_f32_e32 v81, v76, v76
v_mov_b32_e32 v84, 0x3a92b707
v_madak_f32_e32 v84, v84, v81, 0x3ded3cb2
v_mov_b32_e32 v85, 0x3c739487
v_madak_f32_e32 v85, v85, v81, 0x3f01e2bc
v_mad_f32 v84, v84, v81, 1.0
v_mac_f32_e32 v84, v76, v85
v_mov_b32_e32 v85, 0xb2951928
v_madak_f32_e32 v85, v85, v81, 0xb85ffb93
v_mov_b32_e32 v86, 0x35c55945
v_madak_f32_e32 v86, v86, v81, 0x3a83ca0c
v_madak_f32_e32 v85, v85, v81, 0xbc9ded90
v_madak_f32_e32 v86, v86, v81, 0x3d8eaf3b
v_madak_f32_e32 v81, v85, v81, 0xbf409397
v_mac_f32_e32 v81, v76, v86
v_rcp_f32_e32 v76, v84
v_mul_f32_e32 v45, s18, v45
v_mul_f32_e32 v45, v79, v45
v_mov_b32_e32 v84, 0x3fa00000
v_mul_f32_e32 v76, v37, v76
v_mul_f32_e32 v76, v81, v76
v_mul_f32_e32 v81, v74, v80
v_mac_f32_e32 v76, v79, v81
v_and_b32_e32 v81, s50, v45
v_cmp_gt_f32_e64 s[4:5], v84, v81
v_mul_f32_e32 v84, v81, v81
v_rcp_f32_e32 v85, v84
v_add_f32_e32 v86, -1.0, v81
v_mov_b32_e32 v87, 0xbd777f97
v_mov_b32_e32 v88, 0x6f800000
v_cndmask_b32_e64 v85, v85, v86, s[4:5]
v_mov_b32_e32 v86, 0x3f580000
v_cmp_gt_f32_e64 s[8:9], v86, v81
v_cndmask_b32_e64 v84, v85, v84, s[8:9]
v_mov_b32_e32 v86, 0xc11d077e
v_mov_b32_e32 v85, 0x4036db6e
v_madak_f32_e32 v86, v86, v84, 0xc2a2932b
v_cmp_gt_f32_e32 vcc, v85, v81
v_mov_b32_e32 v85, 0xc3f1c275
v_madak_f32_e32 v85, v85, v84, 0xc480230b
v_madak_f32_e32 v86, v84, v86, 0xc3389ae7
v_madak_f32_e32 v85, v84, v85, 0xc41f6441
v_madak_f32_e32 v86, v84, v86, 0xc322658c
v_madak_f32_e32 v85, v84, v85, 0xc320a2ea
v_madak_f32_e32 v86, v84, v86, 0xc2798057
v_madak_f32_e32 v85, v84, v85, 0xc18e104b
v_madak_f32_e32 v86, v84, v86, 0xc128f022
v_madak_f32_e32 v85, v84, v85, 0xbf4c9dd4
v_madak_f32_e32 v86, v84, v86, 0xbf31a0b7
v_madak_f32_e32 v85, v84, v85, 0xbc21a092
v_madak_f32_e32 v86, v84, v86, 0xbc21a093
v_madak_f32_e32 v87, v87, v84, 0x40d23f7c
v_cndmask_b32_e32 v85, v85, v86, vcc
v_mov_b32_e32 v86, 0xc1b38712
v_madak_f32_e32 v86, v86, v84, 0x43ed43a7
v_madak_f32_e32 v87, v84, v87, 0x42d9451f
v_madak_f32_e32 v86, v84, v86, 0x451f90ce
v_madak_f32_e32 v87, v84, v87, 0x43d6810b
v_madak_f32_e32 v86, v84, v86, 0x4547fdbb
v_madak_f32_e32 v87, v84, v87, 0x442158c9
v_madak_f32_e32 v86, v84, v86, 0x44c01759
v_madak_f32_e32 v87, v84, v87, 0x43d9486f
v_madak_f32_e32 v86, v84, v86, 0x43a2e571
v_madak_f32_e32 v87, v84, v87, 0x4309a863
v_madak_f32_e32 v86, v84, v86, 0x41f2b459
v_madak_f32_e32 v87, v84, v87, 0x419d35ce
v_cndmask_b32_e32 v86, v86, v87, vcc
v_mov_b32_e32 v87, 0xbb0df9c0
v_madak_f32_e32 v87, v87, v84, 0x3d1151b3
v_madak_f32_e32 v87, v84, v87, 0xbde31cc2
v_madak_f32_e32 v87, v84, v87, 0x3ea2fe54
v_madak_f32_e32 v87, v84, v87, 0xbebe9208
v_madak_f32_e32 v87, v84, v87, 0x3ed46805
v_madak_f32_e32 v87, v84, v87, 0xbb1acdc6
v_cndmask_b32_e64 v85, v85, v87, s[4:5]
v_mov_b32_e32 v87, 0x3c445aa3
v_madak_f32_e32 v87, v87, v84, 0x3c5f6e13
v_madak_f32_e32 v87, v84, v87, 0x3e013307
v_madak_f32_e32 v87, v84, v87, 0x3d931ae7
v_madak_f32_e32 v87, v84, v87, 0x3f0a5785
v_madak_f32_e32 v87, v84, v87, 0x3dd9f331
v_cndmask_b32_e64 v86, v86, v87, s[4:5]
v_mov_b32_e32 v87, 0xb684e21a
v_madak_f32_e32 v87, v87, v84, 0x390aee49
v_madak_f32_e32 v87, v84, v87, 0x3ba68116
v_madak_f32_e32 v87, v84, v87, 0x3d852a63
v_madak_f32_e32 v87, v84, v87, 0x3ecbbbce
v_cndmask_b32_e64 v86, v86, v87, s[8:9]
v_mad_f32 v86, v84, v86, 1.0
v_and_b32_e32 v91, s51, v45
v_mov_b32_e32 v92, 0xbf100000
v_cmp_gt_f32_e64 vcc, |v86|, v88
v_mov_b32_e32 v89, 0x2f800000
v_mad_f32 v92, v91, -v91, v92
v_cndmask_b32_e32 v90, 1.0, v89, vcc
v_cmp_gt_f32_e32 vcc, 0, v92
v_cndmask_b32_e64 v93, 0.5, -0.5, vcc
v_mov_b32_e32 v94, 0x3fb8aa3b
v_mac_f32_e32 v93, v94, v92
v_mov_b32_e32 v87, 0xb7c756b1
v_cvt_i32_f32_e32 v93, v93
v_madak_f32_e32 v87, v87, v84, 0xbbbd1489
v_madak_f32_e32 v87, v84, v87, 0xbce9528f
v_madak_f32_e32 v87, v84, v87, 0xbea66beb
v_mul_f32_e32 v86, v90, v86
v_madak_f32_e32 v84, v84, v87, 0x3e0375d4
v_rcp_f32_e32 v86, v86
v_cvt_f32_i32_e32 v87, v93
v_cndmask_b32_e64 v84, v85, v84, s[8:9]
v_mov_b32_e32 v85, 0xbf317180
v_mul_f32_e32 v84, v86, v84
v_mad_f32 v86, v85, v87, v92
v_mov_b32_e32 v95, 0xb717f7d1
v_mad_f32 v96, v95, v87, v86
v_mul_f32_e32 v97, v96, v96
v_mov_b32_e32 v98, 0xb5ddea0e
v_mov_b32_e32 v99, 0x3331bb4c
v_mad_f32 v100, v99, v97, v98
v_mov_b32_e32 v101, 0x388ab355
v_mad_f32 v100, v100, v97, v101
v_mov_b32_e32 v102, 0xbb360b61
v_mad_f32 v100, v100, v97, v102
v_mov_b32_e32 v103, 0x3e2aaaab
v_mad_f32 v100, v100, v97, v103
v_mad_f32 v97, -v97, v100, v96
v_mul_f32_e32 v96, v97, v96
v_sub_f32_e32 v97, 2.0, v97
v_cmp_gt_f32_e64 vcc, |v97|, v88
v_cndmask_b32_e32 v100, 1.0, v89, vcc
v_mul_f32_e32 v97, v100, v97
v_rcp_f32_e32 v97, v97
v_mul_f32_e32 v96, v97, v96
v_mul_f32_e32 v96, v96, v100
v_mad_f32 v87, -v87, v95, -v96
v_subrev_f32_e32 v86, v86, v87
v_lshlrev_b32_e32 v87, 23, v93
v_sub_f32_e32 v86, 1.0, v86
v_add_i32_e32 v86, vcc, v86, v87
v_mov_b32_e32 v87, 0xc2aeac4f
v_cmp_ge_f32_e32 vcc, v92, v87
v_mov_b32_e32 v93, 0x42b17218
v_cndmask_b32_e32 v86, 0, v86, vcc
v_cmp_lt_f32_e32 vcc, v92, v93
v_mov_b32_e32 v96, 0x7f800000
v_cndmask_b32_e32 v86, v96, v86, vcc
v_cmp_u_f32_e32 vcc, v92, v92
v_cndmask_b32_e32 v86, v86, v92, vcc
v_subrev_f32_e32 v92, v81, v91
v_mul_f32_e32 v97, v84, v90
v_add_f32_e32 v91, v81, v91
v_mad_f32 v91, v91, v92, v97
v_cmp_gt_f32_e32 vcc, 0, v91
v_cndmask_b32_e64 v92, 0.5, -0.5, vcc
v_mac_f32_e32 v92, v94, v91
v_cvt_i32_f32_e32 v92, v92
v_madak_f32_e32 v84, v90, v84, 0x3f58560b
v_cvt_f32_i32_e32 v94, v92
v_lshlrev_b32_e32 v92, 23, v92
v_mad_f32 v85, v85, v94, v91
v_mad_f32 v100, v95, v94, v85
v_mul_f32_e32 v104, v100, v100
v_mac_f32_e32 v98, v99, v104
v_mac_f32_e32 v101, v98, v104
v_mac_f32_e32 v102, v101, v104
v_mac_f32_e32 v103, v102, v104
v_mad_f32 v98, -v104, v103, v100
v_mul_f32_e32 v99, v98, v100
v_sub_f32_e32 v98, 2.0, v98
v_cmp_gt_f32_e64 vcc, |v98|, v88
v_cndmask_b32_e32 v100, 1.0, v89, vcc
v_mul_f32_e32 v98, v100, v98
v_rcp_f32_e32 v98, v98
v_mul_f32_e32 v98, v98, v99
v_mul_f32_e32 v98, v98, v100
v_mad_f32 v94, -v94, v95, -v98
v_subrev_f32_e32 v85, v85, v94
v_sub_f32_e32 v85, 1.0, v85
v_add_i32_e32 v85, vcc, v85, v92
v_cmp_ge_f32_e32 vcc, v91, v87
v_cndmask_b32_e32 v85, 0, v85, vcc
v_cmp_lt_f32_e32 vcc, v91, v93
v_cndmask_b32_e32 v85, v96, v85, vcc
v_cmp_u_f32_e32 vcc, v91, v91
v_cndmask_b32_e32 v85, v85, v91, vcc
v_cmp_gt_f32_e64 vcc, |v81|, v88
v_mul_f32_e32 v85, v85, v86
v_cndmask_b32_e32 v86, 1.0, v89, vcc
v_mul_f32_e32 v87, v86, v81
v_rcp_f32_e32 v87, v87
v_mov_b32_e32 v88, 0x40c00000
v_cmp_gt_f32_e32 vcc, v88, v81
v_mov_b32_e32 v88, 0x31800000
v_cmp_gt_f32_e64 s[10:11], v88, v81
v_mul_f32_e32 v81, v87, v85
v_mad_f32 v81, -v86, v81, 1.0
v_cndmask_b32_e32 v81, 1.0, v81, vcc
v_cndmask_b32_e64 v81, v81, v84, s[4:5]
v_and_b32_e32 v84, s52, v45
v_or_b32_e32 v81, v84, v81
v_mad_f32 v84, v97, v45, v45
v_cndmask_b32_e64 v81, v81, v84, s[8:9]
v_mul_f32_e32 v84, 0x3f8375d4, v45
v_mac_f32_e32 v84, 0x41000000, v45
v_mul_f32_e32 v84, 0x3e000000, v84
v_cndmask_b32_e64 v81, v81, v84, s[10:11]
v_cmp_u_f32_e32 vcc, v45, v45
v_cndmask_b32_e32 v45, v81, v45, vcc
v_subrev_f32_e32 v45, v45, v74
v_mul_f32_e32 v74, s19, v74
v_mad_f32 v45, v79, v45, -v74
v_mul_f32_e32 v74, v77, v80
v_mul_f32_e32 v74, v82, v74
v_mac_f32_e32 v5, v45, v51
v_mul_f32_e32 v45, v83, v74
v_mad_f32 v45, v51, v76, -v45
v_mul_f32_e32 v51, v76, v51
v_mad_f32 v51, v74, v83, -v51
v_mad_f32 v78, v35, v51, v78
v_mad_f32 v75, v31, v51, v75
v_mac_f32_e32 v69, v27, v51
v_mad_f32 v26, v35, v45, v26
v_mad_f32 v25, v31, v45, v25
v_mac_f32_e32 v24, v27, v45
BB7_74: ; %Flow1238
; in Loop: Header=BB7_11 Depth=1
s_or_b64 exec, exec, s[34:35]
BB7_75: ; in Loop: Header=BB7_11 Depth=1
s_or_b64 exec, exec, s[12:13]
v_lshrrev_b32_e32 v27, 13, v65
v_and_b32_e32 v27, 1, v27
v_cmp_eq_u32_e32 vcc, 1, v27
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[12:13], exec, s[4:5]
; mask branch BB7_79
s_cbranch_execz BB7_79
BB7_76: ; in Loop: Header=BB7_11 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[79:82], v55 offset0:80 offset1:81
s_and_b64 s[4:5], exec, s[0:1]
v_cmp_ne_u32_e32 vcc, v60, v23
s_xor_b64 s[4:5], s[4:5], -1
s_or_b64 s[4:5], s[4:5], vcc
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v31, v71, v80
s_and_b64 s[8:9], exec, s[2:3]
s_or_b64 s[4:5], s[8:9], s[4:5]
v_subrev_f32_e32 v27, v70, v79
v_mul_f32_e32 v45, v31, v31
v_cndmask_b32_e64 v51, 0, 1.0, s[4:5]
v_subrev_f32_e32 v35, v72, v81
v_mac_f32_e32 v45, v27, v27
v_mac_f32_e32 v45, v35, v35
v_mul_f32_e32 v51, s26, v51
v_cmp_lt_f32_e32 vcc, v45, v51
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[34:35], exec, s[4:5]
; mask branch BB7_78
s_cbranch_execz BB7_78
BB7_77: ; in Loop: Header=BB7_11 Depth=1
v_max_f32_e32 v45, 0x34cd15ae, v45
v_rsq_f32_e32 v79, v45
s_mov_b32 m0, -1
v_lshrrev_b32_e32 v74, 13, v19
ds_read_b64 v[76:77], v56 offset:320
v_and_b32_e32 v74, 1, v74
v_cmp_eq_u32_e32 vcc, 1, v74
v_mul_f32_e32 v80, v79, v79
v_cndmask_b32_e64 v74, 0, 1.0, vcc
v_mul_f32_e32 v81, v80, v80
v_mul_f32_e32 v81, v74, v81
v_mul_f32_e32 v51, v73, v82
v_mul_f32_e32 v82, v80, v81
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v77, v68, v77
v_mul_f32_e32 v83, v82, v77
v_mad_f32 v81, v81, v80, s42
v_mad_f32 v83, v76, v67, -v83
v_mad_f32 v84, v82, v82, s43
v_mul_f32_e32 v76, v67, v76
v_mul_f32_e32 v81, 0xbe2aaaab, v81
v_mul_f32_e32 v77, v84, v77
v_mul_f32_e32 v76, v76, v81
v_cmp_gt_f32_e32 vcc, s27, v45
v_mac_f32_e32 v76, 0x3daaaaaa, v77
v_cndmask_b32_e64 v77, 0, 1.0, vcc
v_mul_f32_e32 v81, v74, v77
v_mac_f32_e32 v8, v76, v81
v_mul_f32_e32 v76, v9, v45
v_mul_f32_e32 v81, v76, v76
v_mov_b32_e32 v84, 0x3a92b707
v_madak_f32_e32 v84, v84, v81, 0x3ded3cb2
v_mov_b32_e32 v85, 0x3c739487
v_madak_f32_e32 v85, v85, v81, 0x3f01e2bc
v_mad_f32 v84, v84, v81, 1.0
v_mac_f32_e32 v84, v76, v85
v_mov_b32_e32 v85, 0xb2951928
v_madak_f32_e32 v85, v85, v81, 0xb85ffb93
v_mov_b32_e32 v86, 0x35c55945
v_madak_f32_e32 v86, v86, v81, 0x3a83ca0c
v_madak_f32_e32 v85, v85, v81, 0xbc9ded90
v_madak_f32_e32 v86, v86, v81, 0x3d8eaf3b
v_madak_f32_e32 v81, v85, v81, 0xbf409397
v_mac_f32_e32 v81, v76, v86
v_rcp_f32_e32 v76, v84
v_mul_f32_e32 v45, s18, v45
v_mul_f32_e32 v45, v79, v45
v_mov_b32_e32 v84, 0x3fa00000
v_mul_f32_e32 v76, v37, v76
v_mul_f32_e32 v76, v81, v76
v_mul_f32_e32 v81, v74, v80
v_mac_f32_e32 v76, v79, v81
v_and_b32_e32 v81, s50, v45
v_cmp_gt_f32_e64 s[4:5], v84, v81
v_mul_f32_e32 v84, v81, v81
v_rcp_f32_e32 v85, v84
v_add_f32_e32 v86, -1.0, v81
v_mov_b32_e32 v87, 0xbd777f97
v_mov_b32_e32 v88, 0x6f800000
v_cndmask_b32_e64 v85, v85, v86, s[4:5]
v_mov_b32_e32 v86, 0x3f580000
v_cmp_gt_f32_e64 s[8:9], v86, v81
v_cndmask_b32_e64 v84, v85, v84, s[8:9]
v_mov_b32_e32 v86, 0xc11d077e
v_mov_b32_e32 v85, 0x4036db6e
v_madak_f32_e32 v86, v86, v84, 0xc2a2932b
v_cmp_gt_f32_e32 vcc, v85, v81
v_mov_b32_e32 v85, 0xc3f1c275
v_madak_f32_e32 v85, v85, v84, 0xc480230b
v_madak_f32_e32 v86, v84, v86, 0xc3389ae7
v_madak_f32_e32 v85, v84, v85, 0xc41f6441
v_madak_f32_e32 v86, v84, v86, 0xc322658c
v_madak_f32_e32 v85, v84, v85, 0xc320a2ea
v_madak_f32_e32 v86, v84, v86, 0xc2798057
v_madak_f32_e32 v85, v84, v85, 0xc18e104b
v_madak_f32_e32 v86, v84, v86, 0xc128f022
v_madak_f32_e32 v85, v84, v85, 0xbf4c9dd4
v_madak_f32_e32 v86, v84, v86, 0xbf31a0b7
v_madak_f32_e32 v85, v84, v85, 0xbc21a092
v_madak_f32_e32 v86, v84, v86, 0xbc21a093
v_madak_f32_e32 v87, v87, v84, 0x40d23f7c
v_cndmask_b32_e32 v85, v85, v86, vcc
v_mov_b32_e32 v86, 0xc1b38712
v_madak_f32_e32 v86, v86, v84, 0x43ed43a7
v_madak_f32_e32 v87, v84, v87, 0x42d9451f
v_madak_f32_e32 v86, v84, v86, 0x451f90ce
v_madak_f32_e32 v87, v84, v87, 0x43d6810b
v_madak_f32_e32 v86, v84, v86, 0x4547fdbb
v_madak_f32_e32 v87, v84, v87, 0x442158c9
v_madak_f32_e32 v86, v84, v86, 0x44c01759
v_madak_f32_e32 v87, v84, v87, 0x43d9486f
v_madak_f32_e32 v86, v84, v86, 0x43a2e571
v_madak_f32_e32 v87, v84, v87, 0x4309a863
v_madak_f32_e32 v86, v84, v86, 0x41f2b459
v_madak_f32_e32 v87, v84, v87, 0x419d35ce
v_cndmask_b32_e32 v86, v86, v87, vcc
v_mov_b32_e32 v87, 0xbb0df9c0
v_madak_f32_e32 v87, v87, v84, 0x3d1151b3
v_madak_f32_e32 v87, v84, v87, 0xbde31cc2
v_madak_f32_e32 v87, v84, v87, 0x3ea2fe54
v_madak_f32_e32 v87, v84, v87, 0xbebe9208
v_madak_f32_e32 v87, v84, v87, 0x3ed46805
v_madak_f32_e32 v87, v84, v87, 0xbb1acdc6
v_cndmask_b32_e64 v85, v85, v87, s[4:5]
v_mov_b32_e32 v87, 0x3c445aa3
v_madak_f32_e32 v87, v87, v84, 0x3c5f6e13
v_madak_f32_e32 v87, v84, v87, 0x3e013307
v_madak_f32_e32 v87, v84, v87, 0x3d931ae7
v_madak_f32_e32 v87, v84, v87, 0x3f0a5785
v_madak_f32_e32 v87, v84, v87, 0x3dd9f331
v_cndmask_b32_e64 v86, v86, v87, s[4:5]
v_mov_b32_e32 v87, 0xb684e21a
v_madak_f32_e32 v87, v87, v84, 0x390aee49
v_madak_f32_e32 v87, v84, v87, 0x3ba68116
v_madak_f32_e32 v87, v84, v87, 0x3d852a63
v_madak_f32_e32 v87, v84, v87, 0x3ecbbbce
v_cndmask_b32_e64 v86, v86, v87, s[8:9]
v_mad_f32 v86, v84, v86, 1.0
v_and_b32_e32 v91, s51, v45
v_mov_b32_e32 v92, 0xbf100000
v_cmp_gt_f32_e64 vcc, |v86|, v88
v_mov_b32_e32 v89, 0x2f800000
v_mad_f32 v92, v91, -v91, v92
v_cndmask_b32_e32 v90, 1.0, v89, vcc
v_cmp_gt_f32_e32 vcc, 0, v92
v_cndmask_b32_e64 v93, 0.5, -0.5, vcc
v_mov_b32_e32 v94, 0x3fb8aa3b
v_mac_f32_e32 v93, v94, v92
v_mov_b32_e32 v87, 0xb7c756b1
v_cvt_i32_f32_e32 v93, v93
v_madak_f32_e32 v87, v87, v84, 0xbbbd1489
v_madak_f32_e32 v87, v84, v87, 0xbce9528f
v_madak_f32_e32 v87, v84, v87, 0xbea66beb
v_mul_f32_e32 v86, v90, v86
v_madak_f32_e32 v84, v84, v87, 0x3e0375d4
v_rcp_f32_e32 v86, v86
v_cvt_f32_i32_e32 v87, v93
v_cndmask_b32_e64 v84, v85, v84, s[8:9]
v_mov_b32_e32 v85, 0xbf317180
v_mul_f32_e32 v84, v86, v84
v_mad_f32 v86, v85, v87, v92
v_mov_b32_e32 v95, 0xb717f7d1
v_mad_f32 v96, v95, v87, v86
v_mul_f32_e32 v97, v96, v96
v_mov_b32_e32 v98, 0xb5ddea0e
v_mov_b32_e32 v99, 0x3331bb4c
v_mad_f32 v100, v99, v97, v98
v_mov_b32_e32 v101, 0x388ab355
v_mad_f32 v100, v100, v97, v101
v_mov_b32_e32 v102, 0xbb360b61
v_mad_f32 v100, v100, v97, v102
v_mov_b32_e32 v103, 0x3e2aaaab
v_mad_f32 v100, v100, v97, v103
v_mad_f32 v97, -v97, v100, v96
v_mul_f32_e32 v96, v97, v96
v_sub_f32_e32 v97, 2.0, v97
v_cmp_gt_f32_e64 vcc, |v97|, v88
v_cndmask_b32_e32 v100, 1.0, v89, vcc
v_mul_f32_e32 v97, v100, v97
v_rcp_f32_e32 v97, v97
v_mul_f32_e32 v96, v97, v96
v_mul_f32_e32 v96, v96, v100
v_mad_f32 v87, -v87, v95, -v96
v_subrev_f32_e32 v86, v86, v87
v_lshlrev_b32_e32 v87, 23, v93
v_sub_f32_e32 v86, 1.0, v86
v_add_i32_e32 v86, vcc, v86, v87
v_mov_b32_e32 v87, 0xc2aeac4f
v_cmp_ge_f32_e32 vcc, v92, v87
v_mov_b32_e32 v93, 0x42b17218
v_cndmask_b32_e32 v86, 0, v86, vcc
v_cmp_lt_f32_e32 vcc, v92, v93
v_mov_b32_e32 v96, 0x7f800000
v_cndmask_b32_e32 v86, v96, v86, vcc
v_cmp_u_f32_e32 vcc, v92, v92
v_cndmask_b32_e32 v86, v86, v92, vcc
v_subrev_f32_e32 v92, v81, v91
v_mul_f32_e32 v97, v84, v90
v_add_f32_e32 v91, v81, v91
v_mad_f32 v91, v91, v92, v97
v_cmp_gt_f32_e32 vcc, 0, v91
v_cndmask_b32_e64 v92, 0.5, -0.5, vcc
v_mac_f32_e32 v92, v94, v91
v_cvt_i32_f32_e32 v92, v92
v_madak_f32_e32 v84, v90, v84, 0x3f58560b
v_cvt_f32_i32_e32 v94, v92
v_lshlrev_b32_e32 v92, 23, v92
v_mad_f32 v85, v85, v94, v91
v_mad_f32 v100, v95, v94, v85
v_mul_f32_e32 v104, v100, v100
v_mac_f32_e32 v98, v99, v104
v_mac_f32_e32 v101, v98, v104
v_mac_f32_e32 v102, v101, v104
v_mac_f32_e32 v103, v102, v104
v_mad_f32 v98, -v104, v103, v100
v_mul_f32_e32 v99, v98, v100
v_sub_f32_e32 v98, 2.0, v98
v_cmp_gt_f32_e64 vcc, |v98|, v88
v_cndmask_b32_e32 v100, 1.0, v89, vcc
v_mul_f32_e32 v98, v100, v98
v_rcp_f32_e32 v98, v98
v_mul_f32_e32 v98, v98, v99
v_mul_f32_e32 v98, v98, v100
v_mad_f32 v94, -v94, v95, -v98
v_subrev_f32_e32 v85, v85, v94
v_sub_f32_e32 v85, 1.0, v85
v_add_i32_e32 v85, vcc, v85, v92
v_cmp_ge_f32_e32 vcc, v91, v87
v_cndmask_b32_e32 v85, 0, v85, vcc
v_cmp_lt_f32_e32 vcc, v91, v93
v_cndmask_b32_e32 v85, v96, v85, vcc
v_cmp_u_f32_e32 vcc, v91, v91
v_cndmask_b32_e32 v85, v85, v91, vcc
v_cmp_gt_f32_e64 vcc, |v81|, v88
v_mul_f32_e32 v85, v85, v86
v_cndmask_b32_e32 v86, 1.0, v89, vcc
v_mul_f32_e32 v87, v86, v81
v_rcp_f32_e32 v87, v87
v_mov_b32_e32 v88, 0x40c00000
v_cmp_gt_f32_e32 vcc, v88, v81
v_mov_b32_e32 v88, 0x31800000
v_cmp_gt_f32_e64 s[10:11], v88, v81
v_mul_f32_e32 v81, v87, v85
v_mad_f32 v81, -v86, v81, 1.0
v_cndmask_b32_e32 v81, 1.0, v81, vcc
v_cndmask_b32_e64 v81, v81, v84, s[4:5]
v_and_b32_e32 v84, s52, v45
v_or_b32_e32 v81, v84, v81
v_mad_f32 v84, v97, v45, v45
v_cndmask_b32_e64 v81, v81, v84, s[8:9]
v_mul_f32_e32 v84, 0x3f8375d4, v45
v_mac_f32_e32 v84, 0x41000000, v45
v_mul_f32_e32 v84, 0x3e000000, v84
v_cndmask_b32_e64 v81, v81, v84, s[10:11]
v_cmp_u_f32_e32 vcc, v45, v45
v_cndmask_b32_e32 v45, v81, v45, vcc
v_subrev_f32_e32 v45, v45, v74
v_mul_f32_e32 v74, s19, v74
v_mad_f32 v45, v79, v45, -v74
v_mul_f32_e32 v74, v77, v80
v_mul_f32_e32 v74, v82, v74
v_mac_f32_e32 v5, v45, v51
v_mul_f32_e32 v45, v83, v74
v_mad_f32 v45, v51, v76, -v45
v_mul_f32_e32 v51, v76, v51
v_mad_f32 v51, v74, v83, -v51
v_mad_f32 v78, v35, v51, v78
v_mad_f32 v75, v31, v51, v75
v_mac_f32_e32 v69, v27, v51
v_mad_f32 v22, v35, v45, v22
v_mad_f32 v21, v31, v45, v21
v_mac_f32_e32 v20, v27, v45
BB7_78: ; %Flow1237
; in Loop: Header=BB7_11 Depth=1
s_or_b64 exec, exec, s[34:35]
BB7_79: ; in Loop: Header=BB7_11 Depth=1
s_or_b64 exec, exec, s[12:13]
v_lshrrev_b32_e32 v27, 14, v65
v_and_b32_e32 v27, 1, v27
v_cmp_eq_u32_e32 vcc, 1, v27
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[12:13], exec, s[4:5]
; mask branch BB7_83
s_cbranch_execz BB7_83
BB7_80: ; in Loop: Header=BB7_11 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[79:82], v55 offset0:96 offset1:97
s_and_b64 s[4:5], exec, s[0:1]
v_cmp_ne_u32_e32 vcc, v59, v23
s_xor_b64 s[4:5], s[4:5], -1
s_or_b64 s[4:5], s[4:5], vcc
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v31, v71, v80
s_and_b64 s[8:9], exec, s[2:3]
s_or_b64 s[4:5], s[8:9], s[4:5]
v_subrev_f32_e32 v27, v70, v79
v_mul_f32_e32 v45, v31, v31
v_cndmask_b32_e64 v51, 0, 1.0, s[4:5]
v_subrev_f32_e32 v35, v72, v81
v_mac_f32_e32 v45, v27, v27
v_mac_f32_e32 v45, v35, v35
v_mul_f32_e32 v51, s26, v51
v_cmp_lt_f32_e32 vcc, v45, v51
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[34:35], exec, s[4:5]
; mask branch BB7_82
s_cbranch_execz BB7_82
BB7_81: ; in Loop: Header=BB7_11 Depth=1
v_max_f32_e32 v45, 0x34cd15ae, v45
v_rsq_f32_e32 v79, v45
s_mov_b32 m0, -1
v_lshrrev_b32_e32 v74, 14, v19
ds_read_b64 v[76:77], v56 offset:384
v_and_b32_e32 v74, 1, v74
v_cmp_eq_u32_e32 vcc, 1, v74
v_mul_f32_e32 v80, v79, v79
v_cndmask_b32_e64 v74, 0, 1.0, vcc
v_mul_f32_e32 v81, v80, v80
v_mul_f32_e32 v81, v74, v81
v_mul_f32_e32 v51, v73, v82
v_mul_f32_e32 v82, v80, v81
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v77, v68, v77
v_mul_f32_e32 v83, v82, v77
v_mad_f32 v81, v81, v80, s42
v_mad_f32 v83, v76, v67, -v83
v_mad_f32 v84, v82, v82, s43
v_mul_f32_e32 v76, v67, v76
v_mul_f32_e32 v81, 0xbe2aaaab, v81
v_mul_f32_e32 v77, v84, v77
v_mul_f32_e32 v76, v76, v81
v_cmp_gt_f32_e32 vcc, s27, v45
v_mac_f32_e32 v76, 0x3daaaaaa, v77
v_cndmask_b32_e64 v77, 0, 1.0, vcc
v_mul_f32_e32 v81, v74, v77
v_mac_f32_e32 v8, v76, v81
v_mul_f32_e32 v76, v9, v45
v_mul_f32_e32 v81, v76, v76
v_mov_b32_e32 v84, 0x3a92b707
v_madak_f32_e32 v84, v84, v81, 0x3ded3cb2
v_mov_b32_e32 v85, 0x3c739487
v_madak_f32_e32 v85, v85, v81, 0x3f01e2bc
v_mad_f32 v84, v84, v81, 1.0
v_mac_f32_e32 v84, v76, v85
v_mov_b32_e32 v85, 0xb2951928
v_madak_f32_e32 v85, v85, v81, 0xb85ffb93
v_mov_b32_e32 v86, 0x35c55945
v_madak_f32_e32 v86, v86, v81, 0x3a83ca0c
v_madak_f32_e32 v85, v85, v81, 0xbc9ded90
v_madak_f32_e32 v86, v86, v81, 0x3d8eaf3b
v_madak_f32_e32 v81, v85, v81, 0xbf409397
v_mac_f32_e32 v81, v76, v86
v_rcp_f32_e32 v76, v84
v_mul_f32_e32 v45, s18, v45
v_mul_f32_e32 v45, v79, v45
v_mov_b32_e32 v84, 0x3fa00000
v_mul_f32_e32 v76, v37, v76
v_mul_f32_e32 v76, v81, v76
v_mul_f32_e32 v81, v74, v80
v_mac_f32_e32 v76, v79, v81
v_and_b32_e32 v81, s50, v45
v_cmp_gt_f32_e64 s[4:5], v84, v81
v_mul_f32_e32 v84, v81, v81
v_rcp_f32_e32 v85, v84
v_add_f32_e32 v86, -1.0, v81
v_mov_b32_e32 v87, 0xbd777f97
v_mov_b32_e32 v88, 0x6f800000
v_cndmask_b32_e64 v85, v85, v86, s[4:5]
v_mov_b32_e32 v86, 0x3f580000
v_cmp_gt_f32_e64 s[8:9], v86, v81
v_cndmask_b32_e64 v84, v85, v84, s[8:9]
v_mov_b32_e32 v86, 0xc11d077e
v_mov_b32_e32 v85, 0x4036db6e
v_madak_f32_e32 v86, v86, v84, 0xc2a2932b
v_cmp_gt_f32_e32 vcc, v85, v81
v_mov_b32_e32 v85, 0xc3f1c275
v_madak_f32_e32 v85, v85, v84, 0xc480230b
v_madak_f32_e32 v86, v84, v86, 0xc3389ae7
v_madak_f32_e32 v85, v84, v85, 0xc41f6441
v_madak_f32_e32 v86, v84, v86, 0xc322658c
v_madak_f32_e32 v85, v84, v85, 0xc320a2ea
v_madak_f32_e32 v86, v84, v86, 0xc2798057
v_madak_f32_e32 v85, v84, v85, 0xc18e104b
v_madak_f32_e32 v86, v84, v86, 0xc128f022
v_madak_f32_e32 v85, v84, v85, 0xbf4c9dd4
v_madak_f32_e32 v86, v84, v86, 0xbf31a0b7
v_madak_f32_e32 v85, v84, v85, 0xbc21a092
v_madak_f32_e32 v86, v84, v86, 0xbc21a093
v_madak_f32_e32 v87, v87, v84, 0x40d23f7c
v_cndmask_b32_e32 v85, v85, v86, vcc
v_mov_b32_e32 v86, 0xc1b38712
v_madak_f32_e32 v86, v86, v84, 0x43ed43a7
v_madak_f32_e32 v87, v84, v87, 0x42d9451f
v_madak_f32_e32 v86, v84, v86, 0x451f90ce
v_madak_f32_e32 v87, v84, v87, 0x43d6810b
v_madak_f32_e32 v86, v84, v86, 0x4547fdbb
v_madak_f32_e32 v87, v84, v87, 0x442158c9
v_madak_f32_e32 v86, v84, v86, 0x44c01759
v_madak_f32_e32 v87, v84, v87, 0x43d9486f
v_madak_f32_e32 v86, v84, v86, 0x43a2e571
v_madak_f32_e32 v87, v84, v87, 0x4309a863
v_madak_f32_e32 v86, v84, v86, 0x41f2b459
v_madak_f32_e32 v87, v84, v87, 0x419d35ce
v_cndmask_b32_e32 v86, v86, v87, vcc
v_mov_b32_e32 v87, 0xbb0df9c0
v_madak_f32_e32 v87, v87, v84, 0x3d1151b3
v_madak_f32_e32 v87, v84, v87, 0xbde31cc2
v_madak_f32_e32 v87, v84, v87, 0x3ea2fe54
v_madak_f32_e32 v87, v84, v87, 0xbebe9208
v_madak_f32_e32 v87, v84, v87, 0x3ed46805
v_madak_f32_e32 v87, v84, v87, 0xbb1acdc6
v_cndmask_b32_e64 v85, v85, v87, s[4:5]
v_mov_b32_e32 v87, 0x3c445aa3
v_madak_f32_e32 v87, v87, v84, 0x3c5f6e13
v_madak_f32_e32 v87, v84, v87, 0x3e013307
v_madak_f32_e32 v87, v84, v87, 0x3d931ae7
v_madak_f32_e32 v87, v84, v87, 0x3f0a5785
v_madak_f32_e32 v87, v84, v87, 0x3dd9f331
v_cndmask_b32_e64 v86, v86, v87, s[4:5]
v_mov_b32_e32 v87, 0xb684e21a
v_madak_f32_e32 v87, v87, v84, 0x390aee49
v_madak_f32_e32 v87, v84, v87, 0x3ba68116
v_madak_f32_e32 v87, v84, v87, 0x3d852a63
v_madak_f32_e32 v87, v84, v87, 0x3ecbbbce
v_cndmask_b32_e64 v86, v86, v87, s[8:9]
v_mad_f32 v86, v84, v86, 1.0
v_and_b32_e32 v91, s51, v45
v_mov_b32_e32 v92, 0xbf100000
v_cmp_gt_f32_e64 vcc, |v86|, v88
v_mov_b32_e32 v89, 0x2f800000
v_mad_f32 v92, v91, -v91, v92
v_cndmask_b32_e32 v90, 1.0, v89, vcc
v_cmp_gt_f32_e32 vcc, 0, v92
v_cndmask_b32_e64 v93, 0.5, -0.5, vcc
v_mov_b32_e32 v94, 0x3fb8aa3b
v_mac_f32_e32 v93, v94, v92
v_mov_b32_e32 v87, 0xb7c756b1
v_cvt_i32_f32_e32 v93, v93
v_madak_f32_e32 v87, v87, v84, 0xbbbd1489
v_madak_f32_e32 v87, v84, v87, 0xbce9528f
v_madak_f32_e32 v87, v84, v87, 0xbea66beb
v_mul_f32_e32 v86, v90, v86
v_madak_f32_e32 v84, v84, v87, 0x3e0375d4
v_rcp_f32_e32 v86, v86
v_cvt_f32_i32_e32 v87, v93
v_cndmask_b32_e64 v84, v85, v84, s[8:9]
v_mov_b32_e32 v85, 0xbf317180
v_mul_f32_e32 v84, v86, v84
v_mad_f32 v86, v85, v87, v92
v_mov_b32_e32 v95, 0xb717f7d1
v_mad_f32 v96, v95, v87, v86
v_mul_f32_e32 v97, v96, v96
v_mov_b32_e32 v98, 0xb5ddea0e
v_mov_b32_e32 v99, 0x3331bb4c
v_mad_f32 v100, v99, v97, v98
v_mov_b32_e32 v101, 0x388ab355
v_mad_f32 v100, v100, v97, v101
v_mov_b32_e32 v102, 0xbb360b61
v_mad_f32 v100, v100, v97, v102
v_mov_b32_e32 v103, 0x3e2aaaab
v_mad_f32 v100, v100, v97, v103
v_mad_f32 v97, -v97, v100, v96
v_mul_f32_e32 v96, v97, v96
v_sub_f32_e32 v97, 2.0, v97
v_cmp_gt_f32_e64 vcc, |v97|, v88
v_cndmask_b32_e32 v100, 1.0, v89, vcc
v_mul_f32_e32 v97, v100, v97
v_rcp_f32_e32 v97, v97
v_mul_f32_e32 v96, v97, v96
v_mul_f32_e32 v96, v96, v100
v_mad_f32 v87, -v87, v95, -v96
v_subrev_f32_e32 v86, v86, v87
v_lshlrev_b32_e32 v87, 23, v93
v_sub_f32_e32 v86, 1.0, v86
v_add_i32_e32 v86, vcc, v86, v87
v_mov_b32_e32 v87, 0xc2aeac4f
v_cmp_ge_f32_e32 vcc, v92, v87
v_mov_b32_e32 v93, 0x42b17218
v_cndmask_b32_e32 v86, 0, v86, vcc
v_cmp_lt_f32_e32 vcc, v92, v93
v_mov_b32_e32 v96, 0x7f800000
v_cndmask_b32_e32 v86, v96, v86, vcc
v_cmp_u_f32_e32 vcc, v92, v92
v_cndmask_b32_e32 v86, v86, v92, vcc
v_subrev_f32_e32 v92, v81, v91
v_mul_f32_e32 v97, v84, v90
v_add_f32_e32 v91, v81, v91
v_mad_f32 v91, v91, v92, v97
v_cmp_gt_f32_e32 vcc, 0, v91
v_cndmask_b32_e64 v92, 0.5, -0.5, vcc
v_mac_f32_e32 v92, v94, v91
v_cvt_i32_f32_e32 v92, v92
v_madak_f32_e32 v84, v90, v84, 0x3f58560b
v_cvt_f32_i32_e32 v94, v92
v_lshlrev_b32_e32 v92, 23, v92
v_mad_f32 v85, v85, v94, v91
v_mad_f32 v100, v95, v94, v85
v_mul_f32_e32 v104, v100, v100
v_mac_f32_e32 v98, v99, v104
v_mac_f32_e32 v101, v98, v104
v_mac_f32_e32 v102, v101, v104
v_mac_f32_e32 v103, v102, v104
v_mad_f32 v98, -v104, v103, v100
v_mul_f32_e32 v99, v98, v100
v_sub_f32_e32 v98, 2.0, v98
v_cmp_gt_f32_e64 vcc, |v98|, v88
v_cndmask_b32_e32 v100, 1.0, v89, vcc
v_mul_f32_e32 v98, v100, v98
v_rcp_f32_e32 v98, v98
v_mul_f32_e32 v98, v98, v99
v_mul_f32_e32 v98, v98, v100
v_mad_f32 v94, -v94, v95, -v98
v_subrev_f32_e32 v85, v85, v94
v_sub_f32_e32 v85, 1.0, v85
v_add_i32_e32 v85, vcc, v85, v92
v_cmp_ge_f32_e32 vcc, v91, v87
v_cndmask_b32_e32 v85, 0, v85, vcc
v_cmp_lt_f32_e32 vcc, v91, v93
v_cndmask_b32_e32 v85, v96, v85, vcc
v_cmp_u_f32_e32 vcc, v91, v91
v_cndmask_b32_e32 v85, v85, v91, vcc
v_cmp_gt_f32_e64 vcc, |v81|, v88
v_mul_f32_e32 v85, v85, v86
v_cndmask_b32_e32 v86, 1.0, v89, vcc
v_mul_f32_e32 v87, v86, v81
v_rcp_f32_e32 v87, v87
v_mov_b32_e32 v88, 0x40c00000
v_cmp_gt_f32_e32 vcc, v88, v81
v_mov_b32_e32 v88, 0x31800000
v_cmp_gt_f32_e64 s[10:11], v88, v81
v_mul_f32_e32 v81, v87, v85
v_mad_f32 v81, -v86, v81, 1.0
v_cndmask_b32_e32 v81, 1.0, v81, vcc
v_cndmask_b32_e64 v81, v81, v84, s[4:5]
v_and_b32_e32 v84, s52, v45
v_or_b32_e32 v81, v84, v81
v_mad_f32 v84, v97, v45, v45
v_cndmask_b32_e64 v81, v81, v84, s[8:9]
v_mul_f32_e32 v84, 0x3f8375d4, v45
v_mac_f32_e32 v84, 0x41000000, v45
v_mul_f32_e32 v84, 0x3e000000, v84
v_cndmask_b32_e64 v81, v81, v84, s[10:11]
v_cmp_u_f32_e32 vcc, v45, v45
v_cndmask_b32_e32 v45, v81, v45, vcc
v_subrev_f32_e32 v45, v45, v74
v_mul_f32_e32 v74, s19, v74
v_mad_f32 v45, v79, v45, -v74
v_mul_f32_e32 v74, v77, v80
v_mul_f32_e32 v74, v82, v74
v_mac_f32_e32 v5, v45, v51
v_mul_f32_e32 v45, v83, v74
v_mad_f32 v45, v51, v76, -v45
v_mul_f32_e32 v51, v76, v51
v_mad_f32 v51, v74, v83, -v51
v_mad_f32 v78, v35, v51, v78
v_mad_f32 v75, v31, v51, v75
v_mac_f32_e32 v69, v27, v51
v_mad_f32 v18, v35, v45, v18
v_mad_f32 v17, v31, v45, v17
v_mac_f32_e32 v16, v27, v45
BB7_82: ; %Flow1236
; in Loop: Header=BB7_11 Depth=1
s_or_b64 exec, exec, s[34:35]
BB7_83: ; in Loop: Header=BB7_11 Depth=1
s_or_b64 exec, exec, s[12:13]
v_lshrrev_b32_e32 v27, 15, v65
v_and_b32_e32 v27, 1, v27
v_cmp_eq_u32_e32 vcc, 1, v27
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[12:13], exec, s[4:5]
; mask branch BB7_87
s_cbranch_execz BB7_87
BB7_84: ; in Loop: Header=BB7_11 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[79:82], v55 offset0:112 offset1:113
s_and_b64 s[4:5], exec, s[0:1]
v_cmp_ne_u32_e32 vcc, v38, v23
s_xor_b64 s[4:5], s[4:5], -1
s_or_b64 s[4:5], s[4:5], vcc
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v27, v71, v80
s_and_b64 s[8:9], exec, s[2:3]
s_or_b64 s[4:5], s[8:9], s[4:5]
v_subrev_f32_e32 v23, v70, v79
v_mul_f32_e32 v35, v27, v27
v_cndmask_b32_e64 v45, 0, 1.0, s[4:5]
v_subrev_f32_e32 v31, v72, v81
v_mac_f32_e32 v35, v23, v23
v_mac_f32_e32 v35, v31, v31
v_mul_f32_e32 v45, s26, v45
v_cmp_lt_f32_e32 vcc, v35, v45
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[34:35], exec, s[4:5]
; mask branch BB7_86
s_cbranch_execz BB7_86
BB7_85: ; in Loop: Header=BB7_11 Depth=1
v_lshrrev_b32_e32 v51, 15, v19
v_and_b32_e32 v51, 1, v51
v_max_f32_e32 v35, 0x34cd15ae, v35
v_cmp_eq_u32_e32 vcc, 1, v51
v_rsq_f32_e32 v51, v35
v_mul_f32_e32 v45, v73, v82
v_cndmask_b32_e64 v72, 0, 1.0, vcc
s_mov_b32 m0, -1
v_mul_f32_e32 v73, v51, v51
v_mul_f32_e32 v70, v73, v73
v_mul_f32_e32 v74, v72, v70
ds_read_b64 v[70:71], v56 offset:448
v_cmp_gt_f32_e32 vcc, s27, v35
v_mov_b32_e32 v77, 0x3c739487
v_mov_b32_e32 v79, 0x35c55945
v_mov_b32_e32 v80, 0x3fa00000
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v68, v68, v71
v_mul_f32_e32 v71, v73, v74
v_mul_f32_e32 v76, v71, v68
v_mad_f32 v76, v70, v67, -v76
v_mul_f32_e32 v67, v67, v70
v_mad_f32 v70, v71, v71, s43
v_mul_f32_e32 v68, v70, v68
v_mad_f32 v70, v74, v73, s42
v_mul_f32_e32 v70, 0xbe2aaaab, v70
v_mul_f32_e32 v67, v67, v70
v_mac_f32_e32 v67, 0x3daaaaaa, v68
v_cndmask_b32_e64 v68, 0, 1.0, vcc
v_mul_f32_e32 v70, v72, v68
v_mac_f32_e32 v8, v67, v70
v_mul_f32_e32 v67, v9, v35
v_mul_f32_e32 v70, v67, v67
v_mov_b32_e32 v74, 0x3a92b707
v_madak_f32_e32 v74, v74, v70, 0x3ded3cb2
v_madak_f32_e32 v77, v77, v70, 0x3f01e2bc
v_mad_f32 v74, v74, v70, 1.0
v_mac_f32_e32 v74, v67, v77
v_mov_b32_e32 v77, 0xb2951928
v_madak_f32_e32 v77, v77, v70, 0xb85ffb93
v_madak_f32_e32 v79, v79, v70, 0x3a83ca0c
v_madak_f32_e32 v77, v77, v70, 0xbc9ded90
v_madak_f32_e32 v79, v79, v70, 0x3d8eaf3b
v_madak_f32_e32 v70, v77, v70, 0xbf409397
v_mac_f32_e32 v70, v67, v79
v_rcp_f32_e32 v67, v74
v_mul_f32_e32 v35, s18, v35
v_mul_f32_e32 v35, v51, v35
v_mov_b32_e32 v81, 0xbd777f97
v_mul_f32_e32 v67, v37, v67
v_mul_f32_e32 v67, v70, v67
v_mul_f32_e32 v70, v72, v73
v_mac_f32_e32 v67, v51, v70
v_and_b32_e32 v70, s50, v35
v_mul_f32_e32 v74, v70, v70
v_rcp_f32_e32 v77, v74
v_cmp_gt_f32_e64 s[4:5], v80, v70
v_add_f32_e32 v79, -1.0, v70
v_mov_b32_e32 v80, 0xc11d077e
v_cndmask_b32_e64 v77, v77, v79, s[4:5]
v_mov_b32_e32 v79, 0x3f580000
v_cmp_gt_f32_e64 s[8:9], v79, v70
v_cndmask_b32_e64 v74, v77, v74, s[8:9]
v_mov_b32_e32 v77, 0xc3f1c275
v_madak_f32_e32 v80, v80, v74, 0xc2a2932b
v_madak_f32_e32 v77, v77, v74, 0xc480230b
v_mov_b32_e32 v79, 0xc1b38712
v_madak_f32_e32 v80, v74, v80, 0xc3389ae7
v_madak_f32_e32 v81, v81, v74, 0x40d23f7c
v_madak_f32_e32 v77, v74, v77, 0xc41f6441
v_madak_f32_e32 v79, v79, v74, 0x43ed43a7
v_madak_f32_e32 v80, v74, v80, 0xc322658c
v_madak_f32_e32 v81, v74, v81, 0x42d9451f
v_madak_f32_e32 v77, v74, v77, 0xc320a2ea
v_madak_f32_e32 v79, v74, v79, 0x451f90ce
v_madak_f32_e32 v80, v74, v80, 0xc2798057
v_madak_f32_e32 v81, v74, v81, 0x43d6810b
v_madak_f32_e32 v77, v74, v77, 0xc18e104b
v_madak_f32_e32 v79, v74, v79, 0x4547fdbb
v_madak_f32_e32 v80, v74, v80, 0xc128f022
v_madak_f32_e32 v81, v74, v81, 0x442158c9
v_madak_f32_e32 v77, v74, v77, 0xbf4c9dd4
v_madak_f32_e32 v79, v74, v79, 0x44c01759
v_madak_f32_e32 v80, v74, v80, 0xbf31a0b7
v_madak_f32_e32 v81, v74, v81, 0x43d9486f
v_mov_b32_e32 v82, 0x4036db6e
v_madak_f32_e32 v79, v74, v79, 0x43a2e571
v_madak_f32_e32 v81, v74, v81, 0x4309a863
v_madak_f32_e32 v77, v74, v77, 0xbc21a092
v_madak_f32_e32 v80, v74, v80, 0xbc21a093
v_cmp_gt_f32_e32 vcc, v82, v70
v_cndmask_b32_e32 v77, v77, v80, vcc
v_mov_b32_e32 v80, 0xbb0df9c0
v_madak_f32_e32 v79, v74, v79, 0x41f2b459
v_madak_f32_e32 v81, v74, v81, 0x419d35ce
v_madak_f32_e32 v80, v80, v74, 0x3d1151b3
v_cndmask_b32_e32 v79, v79, v81, vcc
v_mov_b32_e32 v81, 0x3c445aa3
v_madak_f32_e32 v80, v74, v80, 0xbde31cc2
v_madak_f32_e32 v81, v81, v74, 0x3c5f6e13
v_madak_f32_e32 v80, v74, v80, 0x3ea2fe54
v_madak_f32_e32 v81, v74, v81, 0x3e013307
v_madak_f32_e32 v80, v74, v80, 0xbebe9208
v_madak_f32_e32 v81, v74, v81, 0x3d931ae7
v_madak_f32_e32 v80, v74, v80, 0x3ed46805
v_madak_f32_e32 v81, v74, v81, 0x3f0a5785
v_madak_f32_e32 v80, v74, v80, 0xbb1acdc6
v_madak_f32_e32 v81, v74, v81, 0x3dd9f331
v_cndmask_b32_e64 v77, v77, v80, s[4:5]
v_mov_b32_e32 v80, 0xb7c756b1
v_cndmask_b32_e64 v79, v79, v81, s[4:5]
v_mov_b32_e32 v81, 0xb684e21a
v_madak_f32_e32 v80, v80, v74, 0xbbbd1489
v_madak_f32_e32 v81, v81, v74, 0x390aee49
v_madak_f32_e32 v80, v74, v80, 0xbce9528f
v_madak_f32_e32 v81, v74, v81, 0x3ba68116
v_madak_f32_e32 v80, v74, v80, 0xbea66beb
v_madak_f32_e32 v81, v74, v81, 0x3d852a63
v_madak_f32_e32 v80, v74, v80, 0x3e0375d4
v_madak_f32_e32 v81, v74, v81, 0x3ecbbbce
v_cndmask_b32_e64 v79, v79, v81, s[8:9]
v_cndmask_b32_e64 v77, v77, v80, s[8:9]
v_and_b32_e32 v80, s51, v35
v_mov_b32_e32 v81, 0xbf100000
v_mad_f32 v81, v80, -v80, v81
v_cmp_gt_f32_e64 s[10:11], 0, v81
v_cndmask_b32_e64 v82, 0.5, -0.5, s[10:11]
v_mov_b32_e32 v83, 0x3fb8aa3b
v_mac_f32_e32 v82, v83, v81
v_cvt_i32_f32_e32 v82, v82
v_mov_b32_e32 v87, 0xbf317180
v_mov_b32_e32 v89, 0xb717f7d1
v_mov_b32_e32 v92, 0xb5ddea0e
v_cvt_f32_i32_e32 v86, v82
v_mov_b32_e32 v93, 0x3331bb4c
v_mov_b32_e32 v95, 0x388ab355
v_mov_b32_e32 v96, 0xbb360b61
v_mad_f32 v88, v87, v86, v81
v_mad_f32 v90, v89, v86, v88
v_mul_f32_e32 v91, v90, v90
v_mad_f32 v94, v93, v91, v92
v_mad_f32 v94, v94, v91, v95
v_mad_f32 v94, v94, v91, v96
v_mov_b32_e32 v97, 0x3e2aaaab
v_mad_f32 v94, v94, v91, v97
v_mad_f32 v91, -v91, v94, v90
v_mad_f32 v74, v74, v79, 1.0
v_mov_b32_e32 v79, 0x6f800000
v_cmp_gt_f32_e64 vcc, |v74|, v79
v_mov_b32_e32 v84, 0x2f800000
v_sub_f32_e32 v94, 2.0, v91
v_cndmask_b32_e32 v85, 1.0, v84, vcc
v_cmp_gt_f32_e64 vcc, |v94|, v79
v_cndmask_b32_e32 v98, 1.0, v84, vcc
v_mul_f32_e32 v94, v98, v94
v_rcp_f32_e32 v94, v94
v_mul_f32_e32 v74, v85, v74
v_mul_f32_e32 v90, v91, v90
v_rcp_f32_e32 v74, v74
v_mul_f32_e32 v90, v94, v90
v_mul_f32_e32 v90, v90, v98
v_mad_f32 v86, -v86, v89, -v90
v_subrev_f32_e32 v86, v88, v86
v_mul_f32_e32 v74, v74, v77
v_subrev_f32_e32 v88, v70, v80
v_mul_f32_e32 v77, v74, v85
v_add_f32_e32 v80, v70, v80
v_sub_f32_e32 v86, 1.0, v86
v_lshlrev_b32_e32 v82, 23, v82
v_add_i32_e32 v82, vcc, v86, v82
v_mad_f32 v80, v80, v88, v77
v_cmp_gt_f32_e32 vcc, 0, v80
v_cndmask_b32_e64 v88, 0.5, -0.5, vcc
v_mac_f32_e32 v88, v83, v80
v_cvt_i32_f32_e32 v83, v88
v_mov_b32_e32 v86, 0xc2aeac4f
v_cmp_ge_f32_e32 vcc, v81, v86
v_mov_b32_e32 v88, 0x42b17218
v_cvt_f32_i32_e32 v90, v83
v_cndmask_b32_e32 v82, 0, v82, vcc
v_cmp_lt_f32_e32 vcc, v81, v88
v_mov_b32_e32 v91, 0x7f800000
v_mad_f32 v87, v87, v90, v80
v_mad_f32 v94, v89, v90, v87
v_mul_f32_e32 v98, v94, v94
v_mac_f32_e32 v92, v93, v98
v_mac_f32_e32 v95, v92, v98
v_mac_f32_e32 v96, v95, v98
v_mac_f32_e32 v97, v96, v98
v_mad_f32 v92, -v98, v97, v94
v_sub_f32_e32 v93, 2.0, v92
v_cndmask_b32_e32 v82, v91, v82, vcc
v_cmp_gt_f32_e64 vcc, |v93|, v79
v_cndmask_b32_e32 v95, 1.0, v84, vcc
v_mul_f32_e32 v93, v95, v93
v_rcp_f32_e32 v93, v93
v_cmp_u_f32_e32 vcc, v81, v81
v_cndmask_b32_e32 v81, v82, v81, vcc
v_mul_f32_e32 v82, v92, v94
v_mul_f32_e32 v82, v93, v82
v_mul_f32_e32 v82, v82, v95
v_mad_f32 v82, -v90, v89, -v82
v_subrev_f32_e32 v82, v87, v82
v_sub_f32_e32 v82, 1.0, v82
v_lshlrev_b32_e32 v83, 23, v83
v_add_i32_e32 v82, vcc, v82, v83
v_cmp_ge_f32_e32 vcc, v80, v86
v_cndmask_b32_e32 v82, 0, v82, vcc
v_cmp_lt_f32_e64 s[10:11], v80, v88
v_cndmask_b32_e64 v82, v91, v82, s[10:11]
v_cmp_u_f32_e32 vcc, v80, v80
v_cndmask_b32_e32 v80, v82, v80, vcc
v_cmp_gt_f32_e64 vcc, |v70|, v79
v_cndmask_b32_e32 v79, 1.0, v84, vcc
v_mul_f32_e32 v80, v80, v81
v_mul_f32_e32 v81, v79, v70
v_rcp_f32_e32 v81, v81
v_mul_f32_e32 v80, v81, v80
v_mad_f32 v79, -v79, v80, 1.0
v_mov_b32_e32 v80, 0x40c00000
v_cmp_gt_f32_e32 vcc, v80, v70
v_mov_b32_e32 v80, 0x31800000
v_cmp_gt_f32_e64 s[10:11], v80, v70
v_madak_f32_e32 v70, v85, v74, 0x3f58560b
v_cndmask_b32_e32 v74, 1.0, v79, vcc
v_cndmask_b32_e64 v70, v74, v70, s[4:5]
v_and_b32_e32 v74, s52, v35
v_or_b32_e32 v70, v74, v70
v_mad_f32 v74, v77, v35, v35
v_cndmask_b32_e64 v70, v70, v74, s[8:9]
v_mul_f32_e32 v74, 0x3f8375d4, v35
v_mac_f32_e32 v74, 0x41000000, v35
v_mul_f32_e32 v74, 0x3e000000, v74
v_cndmask_b32_e64 v70, v70, v74, s[10:11]
v_cmp_u_f32_e32 vcc, v35, v35
v_cndmask_b32_e32 v35, v70, v35, vcc
v_subrev_f32_e32 v35, v35, v72
v_mul_f32_e32 v70, s19, v72
v_mad_f32 v35, v51, v35, -v70
v_mul_f32_e32 v51, v68, v73
v_mul_f32_e32 v51, v71, v51
v_mac_f32_e32 v5, v35, v45
v_mul_f32_e32 v35, v76, v51
v_mad_f32 v35, v45, v67, -v35
v_mul_f32_e32 v45, v67, v45
v_mad_f32 v45, v51, v76, -v45
v_mad_f32 v78, v31, v45, v78
v_mad_f32 v75, v27, v45, v75
v_mac_f32_e32 v69, v23, v45
v_mad_f32 v15, v31, v35, v15
v_mad_f32 v14, v27, v35, v14
v_mac_f32_e32 v13, v23, v35
BB7_86: ; %Flow1235
; in Loop: Header=BB7_11 Depth=1
s_or_b64 exec, exec, s[34:35]
BB7_87: ; in Loop: Header=BB7_11 Depth=1
s_or_b64 exec, exec, s[12:13]
s_mov_b32 m0, -1
v_cmp_gt_i32_e32 vcc, 3, v2
ds_write_b32 v6, v69
ds_write_b32 v7, v75
ds_write_b32 v12, v78
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[4:5], exec, s[4:5]
s_waitcnt lgkmcnt(0)
; mask branch BB7_93
s_cbranch_execz BB7_93
BB7_88: ; in Loop: Header=BB7_11 Depth=1
v_lshlrev_b32_e32 v27, 6, v2
v_add_i32_e32 v23, vcc, v11, v27
v_lshlrev_b32_e32 v23, 2, v23
v_add_i32_e32 v31, vcc, s15, v23
s_mov_b32 m0, -1
ds_read_b32 v23, v31
v_add_i32_e32 v35, vcc, 8, v11
v_or_b32_e32 v45, 1, v11
v_cmp_lt_i32_e32 vcc, v45, v35
s_and_saveexec_b64 s[8:9], vcc
s_xor_b64 s[8:9], exec, s[8:9]
s_waitcnt lgkmcnt(0)
; mask branch BB7_90
s_cbranch_execz BB7_90
BB7_89: ; in Loop: Header=BB7_11 Depth=1
s_mov_b32 m0, -1
ds_read2_b32 v[67:68], v31 offset0:1 offset1:2
v_or_b32_e32 v35, 3, v11
v_add_i32_e32 v27, vcc, v35, v27
v_lshlrev_b32_e32 v27, 2, v27
ds_read2_b32 v[69:70], v31 offset0:3 offset1:4
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v23, v23, v67
v_add_i32_e32 v27, vcc, s15, v27
v_add_f32_e32 v23, v68, v23
ds_read2_b32 v[67:68], v27 offset0:2 offset1:3
ds_read_b32 v31, v31 offset:28
v_add_f32_e32 v23, v69, v23
v_add_f32_e32 v23, v70, v23
s_waitcnt lgkmcnt(0)
v_add_f32_e32 v23, v67, v23
v_add_f32_e32 v23, v68, v23
v_add_f32_e32 v23, v31, v23
BB7_90: ; %._crit_edge.i72
; in Loop: Header=BB7_11 Depth=1
s_or_b64 exec, exec, s[8:9]
v_mul_lo_i32 v27, v66, 3
v_mov_b32_e32 v31, s29
s_mov_b64 s[8:9], s[28:29]
s_mov_b64 s[10:11], s[46:47]
v_add_i32_e32 v66, vcc, v27, v2
v_ashrrev_i32_e32 v67, 31, v66
v_lshl_b64 v[68:69], v[66:67], 2
v_add_i32_e32 v66, vcc, s28, v68
v_addc_u32_e32 v67, vcc, v69, v31, vcc
buffer_load_dword v69, v[68:69], s[8:11], 0 addr64
s_mov_b64 s[8:9], 0
s_waitcnt vmcnt(0)
BB7_91: ; Parent Loop BB7_11 Depth=1
; => This Inner Loop Header: Depth=2
v_add_f32_e32 v68, v23, v69
v_mov_b32_e32 v71, v69
v_mov_b32_e32 v70, v68
buffer_atomic_cmpswap v[70:71], v[66:67], s[44:47], 0 addr64 glc
v_mov_b32_e32 v27, -1
v_mov_b32_e32 v27, 0xf000
s_waitcnt vmcnt(0) expcnt(0)
v_cmp_eq_u32_e32 vcc, v70, v69
s_or_b64 s[8:9], vcc, s[8:9]
v_mov_b32_e32 v69, v70
s_andn2_b64 exec, exec, s[8:9]
s_cbranch_execnz BB7_91
; BB#92: ; %Flow1233
; in Loop: Header=BB7_11 Depth=1
s_or_b64 exec, exec, s[8:9]
BB7_93: ; %Flow1234
; in Loop: Header=BB7_11 Depth=1
s_or_b64 exec, exec, s[4:5]
BB7_94: ; %Flow1243
; in Loop: Header=BB7_11 Depth=1
s_or_b64 exec, exec, s[30:31]
v_and_b32_e32 v23, 0xff0000, v65
v_cmp_ne_u32_e32 vcc, 0, v23
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[30:31], exec, s[4:5]
; mask branch BB7_134
s_cbranch_execz BB7_134
BB7_95: ; in Loop: Header=BB7_11 Depth=1
s_mov_b32 m0, -1
ds_read_b32 v23, v54 offset:8
s_mov_b64 s[8:9], s[32:33]
s_mov_b64 s[10:11], s[46:47]
s_waitcnt lgkmcnt(0)
v_lshlrev_b32_e32 v27, 3, v23
v_add_i32_e32 v66, vcc, v27, v1
v_ashrrev_i32_e32 v67, 31, v66
v_lshl_b64 v[68:69], v[66:67], 4
buffer_load_dwordx4 v[70:73], v[68:69], s[8:11], 0 addr64
v_lshl_b64 v[74:75], v[66:67], 3
s_mov_b64 s[8:9], s[36:37]
buffer_load_dwordx2 v[67:68], v[74:75], s[8:11], 0 addr64
v_lshrrev_b32_e32 v27, 16, v65
v_mov_b32_e32 v69, 0
v_and_b32_e32 v27, 1, v27
v_cmp_eq_u32_e32 vcc, 1, v27
v_mov_b32_e32 v75, v69
v_mov_b32_e32 v78, v69
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[34:35], exec, s[4:5]
s_waitcnt vmcnt(0)
; mask branch BB7_99
s_cbranch_execz BB7_99
BB7_96: ; in Loop: Header=BB7_11 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[79:82], v55 offset1:1
s_and_b64 s[4:5], exec, s[0:1]
v_cmp_ne_u32_e32 vcc, v41, v23
s_xor_b64 s[4:5], s[4:5], -1
s_or_b64 s[4:5], s[4:5], vcc
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v27, v71, v80
s_and_b64 s[8:9], exec, s[2:3]
s_or_b64 s[4:5], s[8:9], s[4:5]
v_subrev_f32_e32 v31, v70, v79
v_mul_f32_e32 v45, v27, v27
v_cndmask_b32_e64 v51, 0, 1.0, s[4:5]
v_subrev_f32_e32 v35, v72, v81
v_mac_f32_e32 v45, v31, v31
v_mov_b32_e32 v69, 0
v_mac_f32_e32 v45, v35, v35
v_mul_f32_e32 v51, s26, v51
v_cmp_lt_f32_e32 vcc, v45, v51
v_mov_b32_e32 v75, v69
v_mov_b32_e32 v78, v69
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[38:39], exec, s[4:5]
; mask branch BB7_98
s_cbranch_execz BB7_98
BB7_97: ; in Loop: Header=BB7_11 Depth=1
v_lshrrev_b32_e32 v69, 16, v19
v_and_b32_e32 v69, 1, v69
v_max_f32_e32 v45, 0x34cd15ae, v45
v_cmp_eq_u32_e32 vcc, 1, v69
v_rsq_f32_e32 v69, v45
v_cndmask_b32_e64 v76, 0, 1.0, vcc
s_mov_b32 m0, -1
v_cmp_gt_f32_e32 vcc, s27, v45
v_mul_f32_e32 v77, v69, v69
v_mul_f32_e32 v74, v77, v77
v_mul_f32_e32 v78, v76, v74
ds_read_b64 v[74:75], v56
v_mul_f32_e32 v79, v77, v78
v_mad_f32 v78, v78, v77, s42
v_mad_f32 v81, v79, v79, s43
v_mul_f32_e32 v78, 0xbe2aaaab, v78
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v75, v68, v75
v_mul_f32_e32 v80, v79, v75
v_mad_f32 v80, v74, v67, -v80
v_mul_f32_e32 v74, v67, v74
v_mul_f32_e32 v75, v81, v75
v_mul_f32_e32 v74, v74, v78
v_mac_f32_e32 v74, 0x3daaaaaa, v75
v_cndmask_b32_e64 v75, 0, 1.0, vcc
v_mul_f32_e32 v78, v76, v75
v_mac_f32_e32 v8, v74, v78
v_mul_f32_e32 v74, v9, v45
v_mul_f32_e32 v78, v74, v74
v_mov_b32_e32 v81, 0x3a92b707
v_madak_f32_e32 v81, v81, v78, 0x3ded3cb2
v_mul_f32_e32 v51, v73, v82
v_mov_b32_e32 v82, 0x3c739487
v_madak_f32_e32 v82, v82, v78, 0x3f01e2bc
v_mad_f32 v81, v81, v78, 1.0
v_mac_f32_e32 v81, v74, v82
v_mov_b32_e32 v82, 0xb2951928
v_madak_f32_e32 v82, v82, v78, 0xb85ffb93
v_mov_b32_e32 v83, 0x35c55945
v_mul_f32_e32 v45, s18, v45
v_madak_f32_e32 v83, v83, v78, 0x3a83ca0c
v_mul_f32_e32 v45, v69, v45
v_madak_f32_e32 v82, v82, v78, 0xbc9ded90
v_madak_f32_e32 v83, v83, v78, 0x3d8eaf3b
v_madak_f32_e32 v78, v82, v78, 0xbf409397
v_and_b32_e32 v82, s50, v45
v_mov_b32_e32 v84, 0x3fa00000
v_cmp_gt_f32_e64 s[4:5], v84, v82
v_mul_f32_e32 v84, v82, v82
v_rcp_f32_e32 v85, v84
v_add_f32_e32 v86, -1.0, v82
v_mov_b32_e32 v87, 0xbd777f97
v_and_b32_e32 v89, s51, v45
v_cndmask_b32_e64 v85, v85, v86, s[4:5]
v_mov_b32_e32 v86, 0x3f580000
v_cmp_gt_f32_e64 s[8:9], v86, v82
v_cndmask_b32_e64 v84, v85, v84, s[8:9]
v_mov_b32_e32 v86, 0xc11d077e
v_mov_b32_e32 v85, 0x4036db6e
v_madak_f32_e32 v86, v86, v84, 0xc2a2932b
v_cmp_gt_f32_e32 vcc, v85, v82
v_mov_b32_e32 v85, 0xc3f1c275
v_madak_f32_e32 v85, v85, v84, 0xc480230b
v_madak_f32_e32 v86, v84, v86, 0xc3389ae7
v_madak_f32_e32 v85, v84, v85, 0xc41f6441
v_madak_f32_e32 v86, v84, v86, 0xc322658c
v_madak_f32_e32 v85, v84, v85, 0xc320a2ea
v_madak_f32_e32 v86, v84, v86, 0xc2798057
v_madak_f32_e32 v85, v84, v85, 0xc18e104b
v_madak_f32_e32 v86, v84, v86, 0xc128f022
v_madak_f32_e32 v85, v84, v85, 0xbf4c9dd4
v_madak_f32_e32 v86, v84, v86, 0xbf31a0b7
v_madak_f32_e32 v85, v84, v85, 0xbc21a092
v_madak_f32_e32 v86, v84, v86, 0xbc21a093
v_madak_f32_e32 v87, v87, v84, 0x40d23f7c
v_cndmask_b32_e32 v85, v85, v86, vcc
v_mov_b32_e32 v86, 0xc1b38712
v_madak_f32_e32 v86, v86, v84, 0x43ed43a7
v_madak_f32_e32 v87, v84, v87, 0x42d9451f
v_madak_f32_e32 v86, v84, v86, 0x451f90ce
v_madak_f32_e32 v87, v84, v87, 0x43d6810b
v_madak_f32_e32 v86, v84, v86, 0x4547fdbb
v_madak_f32_e32 v87, v84, v87, 0x442158c9
v_madak_f32_e32 v86, v84, v86, 0x44c01759
v_madak_f32_e32 v87, v84, v87, 0x43d9486f
v_madak_f32_e32 v86, v84, v86, 0x43a2e571
v_madak_f32_e32 v87, v84, v87, 0x4309a863
v_madak_f32_e32 v86, v84, v86, 0x41f2b459
v_madak_f32_e32 v87, v84, v87, 0x419d35ce
v_cndmask_b32_e32 v86, v86, v87, vcc
v_mov_b32_e32 v87, 0xbb0df9c0
v_madak_f32_e32 v87, v87, v84, 0x3d1151b3
v_madak_f32_e32 v87, v84, v87, 0xbde31cc2
v_madak_f32_e32 v87, v84, v87, 0x3ea2fe54
v_madak_f32_e32 v87, v84, v87, 0xbebe9208
v_madak_f32_e32 v87, v84, v87, 0x3ed46805
v_madak_f32_e32 v87, v84, v87, 0xbb1acdc6
v_cndmask_b32_e64 v85, v85, v87, s[4:5]
v_mov_b32_e32 v87, 0x3c445aa3
v_madak_f32_e32 v87, v87, v84, 0x3c5f6e13
v_madak_f32_e32 v87, v84, v87, 0x3e013307
v_madak_f32_e32 v87, v84, v87, 0x3d931ae7
v_madak_f32_e32 v87, v84, v87, 0x3f0a5785
v_madak_f32_e32 v87, v84, v87, 0x3dd9f331
v_cndmask_b32_e64 v86, v86, v87, s[4:5]
v_mov_b32_e32 v87, 0xb7c756b1
v_madak_f32_e32 v87, v87, v84, 0xbbbd1489
v_madak_f32_e32 v87, v84, v87, 0xbce9528f
v_madak_f32_e32 v87, v84, v87, 0xbea66beb
v_madak_f32_e32 v87, v84, v87, 0x3e0375d4
v_cndmask_b32_e64 v85, v85, v87, s[8:9]
v_mov_b32_e32 v87, 0xb684e21a
v_madak_f32_e32 v87, v87, v84, 0x390aee49
v_madak_f32_e32 v87, v84, v87, 0x3ba68116
v_madak_f32_e32 v87, v84, v87, 0x3d852a63
v_madak_f32_e32 v87, v84, v87, 0x3ecbbbce
v_cndmask_b32_e64 v86, v86, v87, s[8:9]
v_mad_f32 v84, v84, v86, 1.0
v_mov_b32_e32 v86, 0x6f800000
v_cmp_gt_f32_e64 vcc, |v84|, v86
v_mov_b32_e32 v87, 0x2f800000
v_cndmask_b32_e32 v88, 1.0, v87, vcc
v_mul_f32_e32 v84, v88, v84
v_mov_b32_e32 v90, 0xbf100000
v_rcp_f32_e32 v84, v84
v_mad_f32 v90, v89, -v89, v90
v_cmp_gt_f32_e32 vcc, 0, v90
v_cndmask_b32_e64 v91, 0.5, -0.5, vcc
v_mov_b32_e32 v92, 0x3fb8aa3b
v_mac_f32_e32 v91, v92, v90
v_mul_f32_e32 v84, v84, v85
v_cvt_i32_f32_e32 v91, v91
v_subrev_f32_e32 v105, v82, v89
v_mul_f32_e32 v93, v84, v88
v_add_f32_e32 v89, v82, v89
v_mad_f32 v89, v89, v105, v93
v_cmp_gt_f32_e64 s[10:11], 0, v89
v_cndmask_b32_e64 v105, 0.5, -0.5, s[10:11]
v_cvt_f32_i32_e32 v85, v91
v_mac_f32_e32 v105, v92, v89
v_cvt_i32_f32_e32 v92, v105
v_mov_b32_e32 v94, 0xbf317180
v_mad_f32 v95, v94, v85, v90
v_mov_b32_e32 v96, 0xb717f7d1
v_mad_f32 v97, v96, v85, v95
v_mul_f32_e32 v98, v97, v97
v_mov_b32_e32 v99, 0xb5ddea0e
v_mov_b32_e32 v100, 0x3331bb4c
v_cvt_f32_i32_e32 v106, v92
v_mad_f32 v101, v100, v98, v99
v_mov_b32_e32 v102, 0x388ab355
v_mad_f32 v101, v101, v98, v102
v_mov_b32_e32 v103, 0xbb360b61
v_mad_f32 v101, v101, v98, v103
v_mov_b32_e32 v104, 0x3e2aaaab
v_mad_f32 v101, v101, v98, v104
v_mac_f32_e32 v78, v74, v83
v_mad_f32 v83, v94, v106, v89
v_mad_f32 v98, -v98, v101, v97
v_mad_f32 v94, v96, v106, v83
v_mul_f32_e32 v74, v98, v97
v_mul_f32_e32 v97, v94, v94
v_mac_f32_e32 v99, v100, v97
v_sub_f32_e32 v101, 2.0, v98
v_mac_f32_e32 v102, v99, v97
v_cmp_gt_f32_e64 vcc, |v101|, v86
v_mac_f32_e32 v103, v102, v97
v_mac_f32_e32 v104, v103, v97
v_cndmask_b32_e32 v105, 1.0, v87, vcc
v_mul_f32_e32 v101, v105, v101
v_mad_f32 v97, -v97, v104, v94
v_rcp_f32_e32 v101, v101
v_sub_f32_e32 v98, 2.0, v97
v_cmp_gt_f32_e64 vcc, |v98|, v86
v_rcp_f32_e32 v81, v81
v_cndmask_b32_e32 v99, 1.0, v87, vcc
v_mul_f32_e32 v98, v99, v98
v_mul_f32_e32 v74, v101, v74
v_rcp_f32_e32 v98, v98
v_mul_f32_e32 v74, v74, v105
v_mad_f32 v74, -v85, v96, -v74
v_mul_f32_e32 v81, v37, v81
v_mul_f32_e32 v85, v97, v94
v_subrev_f32_e32 v74, v95, v74
v_mul_f32_e32 v78, v78, v81
v_mul_f32_e32 v81, v76, v77
v_mul_f32_e32 v85, v98, v85
v_mac_f32_e32 v78, v69, v81
v_lshlrev_b32_e32 v81, 23, v91
v_sub_f32_e32 v74, 1.0, v74
v_mul_f32_e32 v85, v85, v99
v_add_i32_e32 v74, vcc, v74, v81
v_mov_b32_e32 v81, 0xc2aeac4f
v_mad_f32 v85, -v106, v96, -v85
v_cmp_ge_f32_e32 vcc, v90, v81
v_mov_b32_e32 v91, 0x42b17218
v_subrev_f32_e32 v83, v83, v85
v_cndmask_b32_e32 v74, 0, v74, vcc
v_cmp_lt_f32_e32 vcc, v90, v91
v_mov_b32_e32 v94, 0x7f800000
v_cndmask_b32_e32 v74, v94, v74, vcc
v_cmp_u_f32_e32 vcc, v90, v90
v_sub_f32_e32 v83, 1.0, v83
v_lshlrev_b32_e32 v85, 23, v92
v_cndmask_b32_e32 v74, v74, v90, vcc
v_add_i32_e32 v83, vcc, v83, v85
v_cmp_ge_f32_e32 vcc, v89, v81
v_cndmask_b32_e32 v81, 0, v83, vcc
v_cmp_lt_f32_e32 vcc, v89, v91
v_cndmask_b32_e32 v81, v94, v81, vcc
v_cmp_u_f32_e32 vcc, v89, v89
v_cndmask_b32_e32 v81, v81, v89, vcc
v_mul_f32_e32 v74, v81, v74
v_mov_b32_e32 v81, 0x40c00000
v_cmp_gt_f32_e64 s[10:11], v81, v82
v_mov_b32_e32 v81, 0x31800000
v_cmp_gt_f32_e64 vcc, |v82|, v86
v_cmp_gt_f32_e64 s[12:13], v81, v82
v_cndmask_b32_e32 v81, 1.0, v87, vcc
v_mul_f32_e32 v82, v81, v82
v_rcp_f32_e32 v82, v82
v_cmp_u_f32_e32 vcc, v45, v45
v_mul_f32_e32 v74, v82, v74
v_mad_f32 v74, -v81, v74, 1.0
v_madak_f32_e32 v81, v88, v84, 0x3f58560b
v_cndmask_b32_e64 v74, 1.0, v74, s[10:11]
v_cndmask_b32_e64 v74, v74, v81, s[4:5]
v_and_b32_e32 v81, s52, v45
v_or_b32_e32 v74, v81, v74
v_mad_f32 v81, v93, v45, v45
v_cndmask_b32_e64 v74, v74, v81, s[8:9]
v_mul_f32_e32 v81, 0x3f8375d4, v45
v_mac_f32_e32 v81, 0x41000000, v45
v_mul_f32_e32 v81, 0x3e000000, v81
v_cndmask_b32_e64 v74, v74, v81, s[12:13]
v_cndmask_b32_e32 v45, v74, v45, vcc
v_subrev_f32_e32 v45, v45, v76
v_mul_f32_e32 v74, s19, v76
v_mad_f32 v45, v69, v45, -v74
v_mul_f32_e32 v69, v75, v77
v_mul_f32_e32 v69, v79, v69
v_mac_f32_e32 v5, v45, v51
v_mul_f32_e32 v45, v80, v69
v_mad_f32 v45, v51, v78, -v45
v_mul_f32_e32 v51, v78, v51
v_mad_f32 v51, v69, v80, -v51
v_mad_f32 v50, v51, -v35, v50
v_mad_f32 v49, v51, -v27, v49
v_mad_f32 v48, v51, -v31, v48
v_mul_f32_e64 v78, v45, -v35
v_mul_f32_e64 v75, v45, -v27
v_mul_f32_e64 v69, v45, -v31
BB7_98: ; %Flow1231
; in Loop: Header=BB7_11 Depth=1
s_or_b64 exec, exec, s[38:39]
BB7_99: ; in Loop: Header=BB7_11 Depth=1
s_or_b64 exec, exec, s[34:35]
v_lshrrev_b32_e32 v27, 17, v65
v_and_b32_e32 v27, 1, v27
v_cmp_eq_u32_e32 vcc, 1, v27
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[12:13], exec, s[4:5]
; mask branch BB7_103
s_cbranch_execz BB7_103
BB7_100: ; in Loop: Header=BB7_11 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[79:82], v55 offset0:16 offset1:17
s_and_b64 s[4:5], exec, s[0:1]
v_cmp_ne_u32_e32 vcc, v64, v23
s_xor_b64 s[4:5], s[4:5], -1
s_or_b64 s[4:5], s[4:5], vcc
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v31, v71, v80
s_and_b64 s[8:9], exec, s[2:3]
s_or_b64 s[4:5], s[8:9], s[4:5]
v_subrev_f32_e32 v27, v70, v79
v_mul_f32_e32 v45, v31, v31
v_cndmask_b32_e64 v51, 0, 1.0, s[4:5]
v_subrev_f32_e32 v35, v72, v81
v_mac_f32_e32 v45, v27, v27
v_mac_f32_e32 v45, v35, v35
v_mul_f32_e32 v51, s26, v51
v_cmp_lt_f32_e32 vcc, v45, v51
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[34:35], exec, s[4:5]
; mask branch BB7_102
s_cbranch_execz BB7_102
BB7_101: ; in Loop: Header=BB7_11 Depth=1
v_max_f32_e32 v45, 0x34cd15ae, v45
v_rsq_f32_e32 v79, v45
s_mov_b32 m0, -1
v_lshrrev_b32_e32 v74, 17, v19
ds_read_b64 v[76:77], v56 offset:64
v_and_b32_e32 v74, 1, v74
v_cmp_eq_u32_e32 vcc, 1, v74
v_mul_f32_e32 v80, v79, v79
v_cndmask_b32_e64 v74, 0, 1.0, vcc
v_mul_f32_e32 v81, v80, v80
v_mul_f32_e32 v81, v74, v81
v_mul_f32_e32 v51, v73, v82
v_mul_f32_e32 v82, v80, v81
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v77, v68, v77
v_mul_f32_e32 v83, v82, v77
v_mad_f32 v81, v81, v80, s42
v_mad_f32 v83, v76, v67, -v83
v_mad_f32 v84, v82, v82, s43
v_mul_f32_e32 v76, v67, v76
v_mul_f32_e32 v81, 0xbe2aaaab, v81
v_mul_f32_e32 v77, v84, v77
v_mul_f32_e32 v76, v76, v81
v_cmp_gt_f32_e32 vcc, s27, v45
v_mac_f32_e32 v76, 0x3daaaaaa, v77
v_cndmask_b32_e64 v77, 0, 1.0, vcc
v_mul_f32_e32 v81, v74, v77
v_mac_f32_e32 v8, v76, v81
v_mul_f32_e32 v76, v9, v45
v_mul_f32_e32 v81, v76, v76
v_mov_b32_e32 v84, 0x3a92b707
v_madak_f32_e32 v84, v84, v81, 0x3ded3cb2
v_mov_b32_e32 v85, 0x3c739487
v_madak_f32_e32 v85, v85, v81, 0x3f01e2bc
v_mad_f32 v84, v84, v81, 1.0
v_mac_f32_e32 v84, v76, v85
v_mov_b32_e32 v85, 0xb2951928
v_madak_f32_e32 v85, v85, v81, 0xb85ffb93
v_mov_b32_e32 v86, 0x35c55945
v_madak_f32_e32 v86, v86, v81, 0x3a83ca0c
v_madak_f32_e32 v85, v85, v81, 0xbc9ded90
v_madak_f32_e32 v86, v86, v81, 0x3d8eaf3b
v_madak_f32_e32 v81, v85, v81, 0xbf409397
v_mac_f32_e32 v81, v76, v86
v_rcp_f32_e32 v76, v84
v_mul_f32_e32 v45, s18, v45
v_mul_f32_e32 v45, v79, v45
v_mov_b32_e32 v84, 0x3fa00000
v_mul_f32_e32 v76, v37, v76
v_mul_f32_e32 v76, v81, v76
v_mul_f32_e32 v81, v74, v80
v_mac_f32_e32 v76, v79, v81
v_and_b32_e32 v81, s50, v45
v_cmp_gt_f32_e64 s[4:5], v84, v81
v_mul_f32_e32 v84, v81, v81
v_rcp_f32_e32 v85, v84
v_add_f32_e32 v86, -1.0, v81
v_mov_b32_e32 v87, 0xbd777f97
v_mov_b32_e32 v88, 0x6f800000
v_cndmask_b32_e64 v85, v85, v86, s[4:5]
v_mov_b32_e32 v86, 0x3f580000
v_cmp_gt_f32_e64 s[8:9], v86, v81
v_cndmask_b32_e64 v84, v85, v84, s[8:9]
v_mov_b32_e32 v86, 0xc11d077e
v_mov_b32_e32 v85, 0x4036db6e
v_madak_f32_e32 v86, v86, v84, 0xc2a2932b
v_cmp_gt_f32_e32 vcc, v85, v81
v_mov_b32_e32 v85, 0xc3f1c275
v_madak_f32_e32 v85, v85, v84, 0xc480230b
v_madak_f32_e32 v86, v84, v86, 0xc3389ae7
v_madak_f32_e32 v85, v84, v85, 0xc41f6441
v_madak_f32_e32 v86, v84, v86, 0xc322658c
v_madak_f32_e32 v85, v84, v85, 0xc320a2ea
v_madak_f32_e32 v86, v84, v86, 0xc2798057
v_madak_f32_e32 v85, v84, v85, 0xc18e104b
v_madak_f32_e32 v86, v84, v86, 0xc128f022
v_madak_f32_e32 v85, v84, v85, 0xbf4c9dd4
v_madak_f32_e32 v86, v84, v86, 0xbf31a0b7
v_madak_f32_e32 v85, v84, v85, 0xbc21a092
v_madak_f32_e32 v86, v84, v86, 0xbc21a093
v_madak_f32_e32 v87, v87, v84, 0x40d23f7c
v_cndmask_b32_e32 v85, v85, v86, vcc
v_mov_b32_e32 v86, 0xc1b38712
v_madak_f32_e32 v86, v86, v84, 0x43ed43a7
v_madak_f32_e32 v87, v84, v87, 0x42d9451f
v_madak_f32_e32 v86, v84, v86, 0x451f90ce
v_madak_f32_e32 v87, v84, v87, 0x43d6810b
v_madak_f32_e32 v86, v84, v86, 0x4547fdbb
v_madak_f32_e32 v87, v84, v87, 0x442158c9
v_madak_f32_e32 v86, v84, v86, 0x44c01759
v_madak_f32_e32 v87, v84, v87, 0x43d9486f
v_madak_f32_e32 v86, v84, v86, 0x43a2e571
v_madak_f32_e32 v87, v84, v87, 0x4309a863
v_madak_f32_e32 v86, v84, v86, 0x41f2b459
v_madak_f32_e32 v87, v84, v87, 0x419d35ce
v_cndmask_b32_e32 v86, v86, v87, vcc
v_mov_b32_e32 v87, 0xbb0df9c0
v_madak_f32_e32 v87, v87, v84, 0x3d1151b3
v_madak_f32_e32 v87, v84, v87, 0xbde31cc2
v_madak_f32_e32 v87, v84, v87, 0x3ea2fe54
v_madak_f32_e32 v87, v84, v87, 0xbebe9208
v_madak_f32_e32 v87, v84, v87, 0x3ed46805
v_madak_f32_e32 v87, v84, v87, 0xbb1acdc6
v_cndmask_b32_e64 v85, v85, v87, s[4:5]
v_mov_b32_e32 v87, 0x3c445aa3
v_madak_f32_e32 v87, v87, v84, 0x3c5f6e13
v_madak_f32_e32 v87, v84, v87, 0x3e013307
v_madak_f32_e32 v87, v84, v87, 0x3d931ae7
v_madak_f32_e32 v87, v84, v87, 0x3f0a5785
v_madak_f32_e32 v87, v84, v87, 0x3dd9f331
v_cndmask_b32_e64 v86, v86, v87, s[4:5]
v_mov_b32_e32 v87, 0xb684e21a
v_madak_f32_e32 v87, v87, v84, 0x390aee49
v_madak_f32_e32 v87, v84, v87, 0x3ba68116
v_madak_f32_e32 v87, v84, v87, 0x3d852a63
v_madak_f32_e32 v87, v84, v87, 0x3ecbbbce
v_cndmask_b32_e64 v86, v86, v87, s[8:9]
v_mad_f32 v86, v84, v86, 1.0
v_and_b32_e32 v91, s51, v45
v_mov_b32_e32 v92, 0xbf100000
v_cmp_gt_f32_e64 vcc, |v86|, v88
v_mov_b32_e32 v89, 0x2f800000
v_mad_f32 v92, v91, -v91, v92
v_cndmask_b32_e32 v90, 1.0, v89, vcc
v_cmp_gt_f32_e32 vcc, 0, v92
v_cndmask_b32_e64 v93, 0.5, -0.5, vcc
v_mov_b32_e32 v94, 0x3fb8aa3b
v_mac_f32_e32 v93, v94, v92
v_mov_b32_e32 v87, 0xb7c756b1
v_cvt_i32_f32_e32 v93, v93
v_madak_f32_e32 v87, v87, v84, 0xbbbd1489
v_madak_f32_e32 v87, v84, v87, 0xbce9528f
v_madak_f32_e32 v87, v84, v87, 0xbea66beb
v_mul_f32_e32 v86, v90, v86
v_madak_f32_e32 v84, v84, v87, 0x3e0375d4
v_rcp_f32_e32 v86, v86
v_cvt_f32_i32_e32 v87, v93
v_cndmask_b32_e64 v84, v85, v84, s[8:9]
v_mov_b32_e32 v85, 0xbf317180
v_mul_f32_e32 v84, v86, v84
v_mad_f32 v86, v85, v87, v92
v_mov_b32_e32 v95, 0xb717f7d1
v_mad_f32 v96, v95, v87, v86
v_mul_f32_e32 v97, v96, v96
v_mov_b32_e32 v98, 0xb5ddea0e
v_mov_b32_e32 v99, 0x3331bb4c
v_mad_f32 v100, v99, v97, v98
v_mov_b32_e32 v101, 0x388ab355
v_mad_f32 v100, v100, v97, v101
v_mov_b32_e32 v102, 0xbb360b61
v_mad_f32 v100, v100, v97, v102
v_mov_b32_e32 v103, 0x3e2aaaab
v_mad_f32 v100, v100, v97, v103
v_mad_f32 v97, -v97, v100, v96
v_mul_f32_e32 v96, v97, v96
v_sub_f32_e32 v97, 2.0, v97
v_cmp_gt_f32_e64 vcc, |v97|, v88
v_cndmask_b32_e32 v100, 1.0, v89, vcc
v_mul_f32_e32 v97, v100, v97
v_rcp_f32_e32 v97, v97
v_mul_f32_e32 v96, v97, v96
v_mul_f32_e32 v96, v96, v100
v_mad_f32 v87, -v87, v95, -v96
v_subrev_f32_e32 v86, v86, v87
v_lshlrev_b32_e32 v87, 23, v93
v_sub_f32_e32 v86, 1.0, v86
v_add_i32_e32 v86, vcc, v86, v87
v_mov_b32_e32 v87, 0xc2aeac4f
v_cmp_ge_f32_e32 vcc, v92, v87
v_mov_b32_e32 v93, 0x42b17218
v_cndmask_b32_e32 v86, 0, v86, vcc
v_cmp_lt_f32_e32 vcc, v92, v93
v_mov_b32_e32 v96, 0x7f800000
v_cndmask_b32_e32 v86, v96, v86, vcc
v_cmp_u_f32_e32 vcc, v92, v92
v_cndmask_b32_e32 v86, v86, v92, vcc
v_subrev_f32_e32 v92, v81, v91
v_mul_f32_e32 v97, v84, v90
v_add_f32_e32 v91, v81, v91
v_mad_f32 v91, v91, v92, v97
v_cmp_gt_f32_e32 vcc, 0, v91
v_cndmask_b32_e64 v92, 0.5, -0.5, vcc
v_mac_f32_e32 v92, v94, v91
v_cvt_i32_f32_e32 v92, v92
v_madak_f32_e32 v84, v90, v84, 0x3f58560b
v_cvt_f32_i32_e32 v94, v92
v_lshlrev_b32_e32 v92, 23, v92
v_mad_f32 v85, v85, v94, v91
v_mad_f32 v100, v95, v94, v85
v_mul_f32_e32 v104, v100, v100
v_mac_f32_e32 v98, v99, v104
v_mac_f32_e32 v101, v98, v104
v_mac_f32_e32 v102, v101, v104
v_mac_f32_e32 v103, v102, v104
v_mad_f32 v98, -v104, v103, v100
v_mul_f32_e32 v99, v98, v100
v_sub_f32_e32 v98, 2.0, v98
v_cmp_gt_f32_e64 vcc, |v98|, v88
v_cndmask_b32_e32 v100, 1.0, v89, vcc
v_mul_f32_e32 v98, v100, v98
v_rcp_f32_e32 v98, v98
v_mul_f32_e32 v98, v98, v99
v_mul_f32_e32 v98, v98, v100
v_mad_f32 v94, -v94, v95, -v98
v_subrev_f32_e32 v85, v85, v94
v_sub_f32_e32 v85, 1.0, v85
v_add_i32_e32 v85, vcc, v85, v92
v_cmp_ge_f32_e32 vcc, v91, v87
v_cndmask_b32_e32 v85, 0, v85, vcc
v_cmp_lt_f32_e32 vcc, v91, v93
v_cndmask_b32_e32 v85, v96, v85, vcc
v_cmp_u_f32_e32 vcc, v91, v91
v_cndmask_b32_e32 v85, v85, v91, vcc
v_cmp_gt_f32_e64 vcc, |v81|, v88
v_mul_f32_e32 v85, v85, v86
v_cndmask_b32_e32 v86, 1.0, v89, vcc
v_mul_f32_e32 v87, v86, v81
v_rcp_f32_e32 v87, v87
v_mov_b32_e32 v88, 0x40c00000
v_cmp_gt_f32_e32 vcc, v88, v81
v_mov_b32_e32 v88, 0x31800000
v_cmp_gt_f32_e64 s[10:11], v88, v81
v_mul_f32_e32 v81, v87, v85
v_mad_f32 v81, -v86, v81, 1.0
v_cndmask_b32_e32 v81, 1.0, v81, vcc
v_cndmask_b32_e64 v81, v81, v84, s[4:5]
v_and_b32_e32 v84, s52, v45
v_or_b32_e32 v81, v84, v81
v_mad_f32 v84, v97, v45, v45
v_cndmask_b32_e64 v81, v81, v84, s[8:9]
v_mul_f32_e32 v84, 0x3f8375d4, v45
v_mac_f32_e32 v84, 0x41000000, v45
v_mul_f32_e32 v84, 0x3e000000, v84
v_cndmask_b32_e64 v81, v81, v84, s[10:11]
v_cmp_u_f32_e32 vcc, v45, v45
v_cndmask_b32_e32 v45, v81, v45, vcc
v_subrev_f32_e32 v45, v45, v74
v_mul_f32_e32 v74, s19, v74
v_mad_f32 v45, v79, v45, -v74
v_mul_f32_e32 v74, v77, v80
v_mul_f32_e32 v74, v82, v74
v_mac_f32_e32 v5, v45, v51
v_mul_f32_e32 v45, v83, v74
v_mad_f32 v45, v51, v76, -v45
v_mul_f32_e32 v51, v76, v51
v_mad_f32 v51, v74, v83, -v51
v_mad_f32 v78, v35, v51, v78
v_mad_f32 v75, v31, v51, v75
v_mac_f32_e32 v69, v27, v51
v_mad_f32 v44, v35, v45, v44
v_mad_f32 v43, v31, v45, v43
v_mac_f32_e32 v42, v27, v45
BB7_102: ; %Flow1230
; in Loop: Header=BB7_11 Depth=1
s_or_b64 exec, exec, s[34:35]
BB7_103: ; in Loop: Header=BB7_11 Depth=1
s_or_b64 exec, exec, s[12:13]
v_lshrrev_b32_e32 v27, 18, v65
v_and_b32_e32 v27, 1, v27
v_cmp_eq_u32_e32 vcc, 1, v27
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[12:13], exec, s[4:5]
; mask branch BB7_107
s_cbranch_execz BB7_107
BB7_104: ; in Loop: Header=BB7_11 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[79:82], v55 offset0:32 offset1:33
s_and_b64 s[4:5], exec, s[0:1]
v_cmp_ne_u32_e32 vcc, v63, v23
s_xor_b64 s[4:5], s[4:5], -1
s_or_b64 s[4:5], s[4:5], vcc
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v31, v71, v80
s_and_b64 s[8:9], exec, s[2:3]
s_or_b64 s[4:5], s[8:9], s[4:5]
v_subrev_f32_e32 v27, v70, v79
v_mul_f32_e32 v45, v31, v31
v_cndmask_b32_e64 v51, 0, 1.0, s[4:5]
v_subrev_f32_e32 v35, v72, v81
v_mac_f32_e32 v45, v27, v27
v_mac_f32_e32 v45, v35, v35
v_mul_f32_e32 v51, s26, v51
v_cmp_lt_f32_e32 vcc, v45, v51
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[34:35], exec, s[4:5]
; mask branch BB7_106
s_cbranch_execz BB7_106
BB7_105: ; in Loop: Header=BB7_11 Depth=1
v_max_f32_e32 v45, 0x34cd15ae, v45
v_rsq_f32_e32 v79, v45
s_mov_b32 m0, -1
v_lshrrev_b32_e32 v74, 18, v19
ds_read_b64 v[76:77], v56 offset:128
v_and_b32_e32 v74, 1, v74
v_cmp_eq_u32_e32 vcc, 1, v74
v_mul_f32_e32 v80, v79, v79
v_cndmask_b32_e64 v74, 0, 1.0, vcc
v_mul_f32_e32 v81, v80, v80
v_mul_f32_e32 v81, v74, v81
v_mul_f32_e32 v51, v73, v82
v_mul_f32_e32 v82, v80, v81
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v77, v68, v77
v_mul_f32_e32 v83, v82, v77
v_mad_f32 v81, v81, v80, s42
v_mad_f32 v83, v76, v67, -v83
v_mad_f32 v84, v82, v82, s43
v_mul_f32_e32 v76, v67, v76
v_mul_f32_e32 v81, 0xbe2aaaab, v81
v_mul_f32_e32 v77, v84, v77
v_mul_f32_e32 v76, v76, v81
v_cmp_gt_f32_e32 vcc, s27, v45
v_mac_f32_e32 v76, 0x3daaaaaa, v77
v_cndmask_b32_e64 v77, 0, 1.0, vcc
v_mul_f32_e32 v81, v74, v77
v_mac_f32_e32 v8, v76, v81
v_mul_f32_e32 v76, v9, v45
v_mul_f32_e32 v81, v76, v76
v_mov_b32_e32 v84, 0x3a92b707
v_madak_f32_e32 v84, v84, v81, 0x3ded3cb2
v_mov_b32_e32 v85, 0x3c739487
v_madak_f32_e32 v85, v85, v81, 0x3f01e2bc
v_mad_f32 v84, v84, v81, 1.0
v_mac_f32_e32 v84, v76, v85
v_mov_b32_e32 v85, 0xb2951928
v_madak_f32_e32 v85, v85, v81, 0xb85ffb93
v_mov_b32_e32 v86, 0x35c55945
v_madak_f32_e32 v86, v86, v81, 0x3a83ca0c
v_madak_f32_e32 v85, v85, v81, 0xbc9ded90
v_madak_f32_e32 v86, v86, v81, 0x3d8eaf3b
v_madak_f32_e32 v81, v85, v81, 0xbf409397
v_mac_f32_e32 v81, v76, v86
v_rcp_f32_e32 v76, v84
v_mul_f32_e32 v45, s18, v45
v_mul_f32_e32 v45, v79, v45
v_mov_b32_e32 v84, 0x3fa00000
v_mul_f32_e32 v76, v37, v76
v_mul_f32_e32 v76, v81, v76
v_mul_f32_e32 v81, v74, v80
v_mac_f32_e32 v76, v79, v81
v_and_b32_e32 v81, s50, v45
v_cmp_gt_f32_e64 s[4:5], v84, v81
v_mul_f32_e32 v84, v81, v81
v_rcp_f32_e32 v85, v84
v_add_f32_e32 v86, -1.0, v81
v_mov_b32_e32 v87, 0xbd777f97
v_mov_b32_e32 v88, 0x6f800000
v_cndmask_b32_e64 v85, v85, v86, s[4:5]
v_mov_b32_e32 v86, 0x3f580000
v_cmp_gt_f32_e64 s[8:9], v86, v81
v_cndmask_b32_e64 v84, v85, v84, s[8:9]
v_mov_b32_e32 v86, 0xc11d077e
v_mov_b32_e32 v85, 0x4036db6e
v_madak_f32_e32 v86, v86, v84, 0xc2a2932b
v_cmp_gt_f32_e32 vcc, v85, v81
v_mov_b32_e32 v85, 0xc3f1c275
v_madak_f32_e32 v85, v85, v84, 0xc480230b
v_madak_f32_e32 v86, v84, v86, 0xc3389ae7
v_madak_f32_e32 v85, v84, v85, 0xc41f6441
v_madak_f32_e32 v86, v84, v86, 0xc322658c
v_madak_f32_e32 v85, v84, v85, 0xc320a2ea
v_madak_f32_e32 v86, v84, v86, 0xc2798057
v_madak_f32_e32 v85, v84, v85, 0xc18e104b
v_madak_f32_e32 v86, v84, v86, 0xc128f022
v_madak_f32_e32 v85, v84, v85, 0xbf4c9dd4
v_madak_f32_e32 v86, v84, v86, 0xbf31a0b7
v_madak_f32_e32 v85, v84, v85, 0xbc21a092
v_madak_f32_e32 v86, v84, v86, 0xbc21a093
v_madak_f32_e32 v87, v87, v84, 0x40d23f7c
v_cndmask_b32_e32 v85, v85, v86, vcc
v_mov_b32_e32 v86, 0xc1b38712
v_madak_f32_e32 v86, v86, v84, 0x43ed43a7
v_madak_f32_e32 v87, v84, v87, 0x42d9451f
v_madak_f32_e32 v86, v84, v86, 0x451f90ce
v_madak_f32_e32 v87, v84, v87, 0x43d6810b
v_madak_f32_e32 v86, v84, v86, 0x4547fdbb
v_madak_f32_e32 v87, v84, v87, 0x442158c9
v_madak_f32_e32 v86, v84, v86, 0x44c01759
v_madak_f32_e32 v87, v84, v87, 0x43d9486f
v_madak_f32_e32 v86, v84, v86, 0x43a2e571
v_madak_f32_e32 v87, v84, v87, 0x4309a863
v_madak_f32_e32 v86, v84, v86, 0x41f2b459
v_madak_f32_e32 v87, v84, v87, 0x419d35ce
v_cndmask_b32_e32 v86, v86, v87, vcc
v_mov_b32_e32 v87, 0xbb0df9c0
v_madak_f32_e32 v87, v87, v84, 0x3d1151b3
v_madak_f32_e32 v87, v84, v87, 0xbde31cc2
v_madak_f32_e32 v87, v84, v87, 0x3ea2fe54
v_madak_f32_e32 v87, v84, v87, 0xbebe9208
v_madak_f32_e32 v87, v84, v87, 0x3ed46805
v_madak_f32_e32 v87, v84, v87, 0xbb1acdc6
v_cndmask_b32_e64 v85, v85, v87, s[4:5]
v_mov_b32_e32 v87, 0x3c445aa3
v_madak_f32_e32 v87, v87, v84, 0x3c5f6e13
v_madak_f32_e32 v87, v84, v87, 0x3e013307
v_madak_f32_e32 v87, v84, v87, 0x3d931ae7
v_madak_f32_e32 v87, v84, v87, 0x3f0a5785
v_madak_f32_e32 v87, v84, v87, 0x3dd9f331
v_cndmask_b32_e64 v86, v86, v87, s[4:5]
v_mov_b32_e32 v87, 0xb684e21a
v_madak_f32_e32 v87, v87, v84, 0x390aee49
v_madak_f32_e32 v87, v84, v87, 0x3ba68116
v_madak_f32_e32 v87, v84, v87, 0x3d852a63
v_madak_f32_e32 v87, v84, v87, 0x3ecbbbce
v_cndmask_b32_e64 v86, v86, v87, s[8:9]
v_mad_f32 v86, v84, v86, 1.0
v_and_b32_e32 v91, s51, v45
v_mov_b32_e32 v92, 0xbf100000
v_cmp_gt_f32_e64 vcc, |v86|, v88
v_mov_b32_e32 v89, 0x2f800000
v_mad_f32 v92, v91, -v91, v92
v_cndmask_b32_e32 v90, 1.0, v89, vcc
v_cmp_gt_f32_e32 vcc, 0, v92
v_cndmask_b32_e64 v93, 0.5, -0.5, vcc
v_mov_b32_e32 v94, 0x3fb8aa3b
v_mac_f32_e32 v93, v94, v92
v_mov_b32_e32 v87, 0xb7c756b1
v_cvt_i32_f32_e32 v93, v93
v_madak_f32_e32 v87, v87, v84, 0xbbbd1489
v_madak_f32_e32 v87, v84, v87, 0xbce9528f
v_madak_f32_e32 v87, v84, v87, 0xbea66beb
v_mul_f32_e32 v86, v90, v86
v_madak_f32_e32 v84, v84, v87, 0x3e0375d4
v_rcp_f32_e32 v86, v86
v_cvt_f32_i32_e32 v87, v93
v_cndmask_b32_e64 v84, v85, v84, s[8:9]
v_mov_b32_e32 v85, 0xbf317180
v_mul_f32_e32 v84, v86, v84
v_mad_f32 v86, v85, v87, v92
v_mov_b32_e32 v95, 0xb717f7d1
v_mad_f32 v96, v95, v87, v86
v_mul_f32_e32 v97, v96, v96
v_mov_b32_e32 v98, 0xb5ddea0e
v_mov_b32_e32 v99, 0x3331bb4c
v_mad_f32 v100, v99, v97, v98
v_mov_b32_e32 v101, 0x388ab355
v_mad_f32 v100, v100, v97, v101
v_mov_b32_e32 v102, 0xbb360b61
v_mad_f32 v100, v100, v97, v102
v_mov_b32_e32 v103, 0x3e2aaaab
v_mad_f32 v100, v100, v97, v103
v_mad_f32 v97, -v97, v100, v96
v_mul_f32_e32 v96, v97, v96
v_sub_f32_e32 v97, 2.0, v97
v_cmp_gt_f32_e64 vcc, |v97|, v88
v_cndmask_b32_e32 v100, 1.0, v89, vcc
v_mul_f32_e32 v97, v100, v97
v_rcp_f32_e32 v97, v97
v_mul_f32_e32 v96, v97, v96
v_mul_f32_e32 v96, v96, v100
v_mad_f32 v87, -v87, v95, -v96
v_subrev_f32_e32 v86, v86, v87
v_lshlrev_b32_e32 v87, 23, v93
v_sub_f32_e32 v86, 1.0, v86
v_add_i32_e32 v86, vcc, v86, v87
v_mov_b32_e32 v87, 0xc2aeac4f
v_cmp_ge_f32_e32 vcc, v92, v87
v_mov_b32_e32 v93, 0x42b17218
v_cndmask_b32_e32 v86, 0, v86, vcc
v_cmp_lt_f32_e32 vcc, v92, v93
v_mov_b32_e32 v96, 0x7f800000
v_cndmask_b32_e32 v86, v96, v86, vcc
v_cmp_u_f32_e32 vcc, v92, v92
v_cndmask_b32_e32 v86, v86, v92, vcc
v_subrev_f32_e32 v92, v81, v91
v_mul_f32_e32 v97, v84, v90
v_add_f32_e32 v91, v81, v91
v_mad_f32 v91, v91, v92, v97
v_cmp_gt_f32_e32 vcc, 0, v91
v_cndmask_b32_e64 v92, 0.5, -0.5, vcc
v_mac_f32_e32 v92, v94, v91
v_cvt_i32_f32_e32 v92, v92
v_madak_f32_e32 v84, v90, v84, 0x3f58560b
v_cvt_f32_i32_e32 v94, v92
v_lshlrev_b32_e32 v92, 23, v92
v_mad_f32 v85, v85, v94, v91
v_mad_f32 v100, v95, v94, v85
v_mul_f32_e32 v104, v100, v100
v_mac_f32_e32 v98, v99, v104
v_mac_f32_e32 v101, v98, v104
v_mac_f32_e32 v102, v101, v104
v_mac_f32_e32 v103, v102, v104
v_mad_f32 v98, -v104, v103, v100
v_mul_f32_e32 v99, v98, v100
v_sub_f32_e32 v98, 2.0, v98
v_cmp_gt_f32_e64 vcc, |v98|, v88
v_cndmask_b32_e32 v100, 1.0, v89, vcc
v_mul_f32_e32 v98, v100, v98
v_rcp_f32_e32 v98, v98
v_mul_f32_e32 v98, v98, v99
v_mul_f32_e32 v98, v98, v100
v_mad_f32 v94, -v94, v95, -v98
v_subrev_f32_e32 v85, v85, v94
v_sub_f32_e32 v85, 1.0, v85
v_add_i32_e32 v85, vcc, v85, v92
v_cmp_ge_f32_e32 vcc, v91, v87
v_cndmask_b32_e32 v85, 0, v85, vcc
v_cmp_lt_f32_e32 vcc, v91, v93
v_cndmask_b32_e32 v85, v96, v85, vcc
v_cmp_u_f32_e32 vcc, v91, v91
v_cndmask_b32_e32 v85, v85, v91, vcc
v_cmp_gt_f32_e64 vcc, |v81|, v88
v_mul_f32_e32 v85, v85, v86
v_cndmask_b32_e32 v86, 1.0, v89, vcc
v_mul_f32_e32 v87, v86, v81
v_rcp_f32_e32 v87, v87
v_mov_b32_e32 v88, 0x40c00000
v_cmp_gt_f32_e32 vcc, v88, v81
v_mov_b32_e32 v88, 0x31800000
v_cmp_gt_f32_e64 s[10:11], v88, v81
v_mul_f32_e32 v81, v87, v85
v_mad_f32 v81, -v86, v81, 1.0
v_cndmask_b32_e32 v81, 1.0, v81, vcc
v_cndmask_b32_e64 v81, v81, v84, s[4:5]
v_and_b32_e32 v84, s52, v45
v_or_b32_e32 v81, v84, v81
v_mad_f32 v84, v97, v45, v45
v_cndmask_b32_e64 v81, v81, v84, s[8:9]
v_mul_f32_e32 v84, 0x3f8375d4, v45
v_mac_f32_e32 v84, 0x41000000, v45
v_mul_f32_e32 v84, 0x3e000000, v84
v_cndmask_b32_e64 v81, v81, v84, s[10:11]
v_cmp_u_f32_e32 vcc, v45, v45
v_cndmask_b32_e32 v45, v81, v45, vcc
v_subrev_f32_e32 v45, v45, v74
v_mul_f32_e32 v74, s19, v74
v_mad_f32 v45, v79, v45, -v74
v_mul_f32_e32 v74, v77, v80
v_mul_f32_e32 v74, v82, v74
v_mac_f32_e32 v5, v45, v51
v_mul_f32_e32 v45, v83, v74
v_mad_f32 v45, v51, v76, -v45
v_mul_f32_e32 v51, v76, v51
v_mad_f32 v51, v74, v83, -v51
v_mad_f32 v78, v35, v51, v78
v_mad_f32 v75, v31, v51, v75
v_mac_f32_e32 v69, v27, v51
v_mad_f32 v34, v35, v45, v34
v_mad_f32 v33, v31, v45, v33
v_mac_f32_e32 v32, v27, v45
BB7_106: ; %Flow1229
; in Loop: Header=BB7_11 Depth=1
s_or_b64 exec, exec, s[34:35]
BB7_107: ; in Loop: Header=BB7_11 Depth=1
s_or_b64 exec, exec, s[12:13]
v_lshrrev_b32_e32 v27, 19, v65
v_and_b32_e32 v27, 1, v27
v_cmp_eq_u32_e32 vcc, 1, v27
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[12:13], exec, s[4:5]
; mask branch BB7_111
s_cbranch_execz BB7_111
BB7_108: ; in Loop: Header=BB7_11 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[79:82], v55 offset0:48 offset1:49
s_and_b64 s[4:5], exec, s[0:1]
v_cmp_ne_u32_e32 vcc, v62, v23
s_xor_b64 s[4:5], s[4:5], -1
s_or_b64 s[4:5], s[4:5], vcc
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v31, v71, v80
s_and_b64 s[8:9], exec, s[2:3]
s_or_b64 s[4:5], s[8:9], s[4:5]
v_subrev_f32_e32 v27, v70, v79
v_mul_f32_e32 v45, v31, v31
v_cndmask_b32_e64 v51, 0, 1.0, s[4:5]
v_subrev_f32_e32 v35, v72, v81
v_mac_f32_e32 v45, v27, v27
v_mac_f32_e32 v45, v35, v35
v_mul_f32_e32 v51, s26, v51
v_cmp_lt_f32_e32 vcc, v45, v51
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[34:35], exec, s[4:5]
; mask branch BB7_110
s_cbranch_execz BB7_110
BB7_109: ; in Loop: Header=BB7_11 Depth=1
v_max_f32_e32 v45, 0x34cd15ae, v45
v_rsq_f32_e32 v79, v45
s_mov_b32 m0, -1
v_lshrrev_b32_e32 v74, 19, v19
ds_read_b64 v[76:77], v56 offset:192
v_and_b32_e32 v74, 1, v74
v_cmp_eq_u32_e32 vcc, 1, v74
v_mul_f32_e32 v80, v79, v79
v_cndmask_b32_e64 v74, 0, 1.0, vcc
v_mul_f32_e32 v81, v80, v80
v_mul_f32_e32 v81, v74, v81
v_mul_f32_e32 v51, v73, v82
v_mul_f32_e32 v82, v80, v81
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v77, v68, v77
v_mul_f32_e32 v83, v82, v77
v_mad_f32 v81, v81, v80, s42
v_mad_f32 v83, v76, v67, -v83
v_mad_f32 v84, v82, v82, s43
v_mul_f32_e32 v76, v67, v76
v_mul_f32_e32 v81, 0xbe2aaaab, v81
v_mul_f32_e32 v77, v84, v77
v_mul_f32_e32 v76, v76, v81
v_cmp_gt_f32_e32 vcc, s27, v45
v_mac_f32_e32 v76, 0x3daaaaaa, v77
v_cndmask_b32_e64 v77, 0, 1.0, vcc
v_mul_f32_e32 v81, v74, v77
v_mac_f32_e32 v8, v76, v81
v_mul_f32_e32 v76, v9, v45
v_mul_f32_e32 v81, v76, v76
v_mov_b32_e32 v84, 0x3a92b707
v_madak_f32_e32 v84, v84, v81, 0x3ded3cb2
v_mov_b32_e32 v85, 0x3c739487
v_madak_f32_e32 v85, v85, v81, 0x3f01e2bc
v_mad_f32 v84, v84, v81, 1.0
v_mac_f32_e32 v84, v76, v85
v_mov_b32_e32 v85, 0xb2951928
v_madak_f32_e32 v85, v85, v81, 0xb85ffb93
v_mov_b32_e32 v86, 0x35c55945
v_madak_f32_e32 v86, v86, v81, 0x3a83ca0c
v_madak_f32_e32 v85, v85, v81, 0xbc9ded90
v_madak_f32_e32 v86, v86, v81, 0x3d8eaf3b
v_madak_f32_e32 v81, v85, v81, 0xbf409397
v_mac_f32_e32 v81, v76, v86
v_rcp_f32_e32 v76, v84
v_mul_f32_e32 v45, s18, v45
v_mul_f32_e32 v45, v79, v45
v_mov_b32_e32 v84, 0x3fa00000
v_mul_f32_e32 v76, v37, v76
v_mul_f32_e32 v76, v81, v76
v_mul_f32_e32 v81, v74, v80
v_mac_f32_e32 v76, v79, v81
v_and_b32_e32 v81, s50, v45
v_cmp_gt_f32_e64 s[4:5], v84, v81
v_mul_f32_e32 v84, v81, v81
v_rcp_f32_e32 v85, v84
v_add_f32_e32 v86, -1.0, v81
v_mov_b32_e32 v87, 0xbd777f97
v_mov_b32_e32 v88, 0x6f800000
v_cndmask_b32_e64 v85, v85, v86, s[4:5]
v_mov_b32_e32 v86, 0x3f580000
v_cmp_gt_f32_e64 s[8:9], v86, v81
v_cndmask_b32_e64 v84, v85, v84, s[8:9]
v_mov_b32_e32 v86, 0xc11d077e
v_mov_b32_e32 v85, 0x4036db6e
v_madak_f32_e32 v86, v86, v84, 0xc2a2932b
v_cmp_gt_f32_e32 vcc, v85, v81
v_mov_b32_e32 v85, 0xc3f1c275
v_madak_f32_e32 v85, v85, v84, 0xc480230b
v_madak_f32_e32 v86, v84, v86, 0xc3389ae7
v_madak_f32_e32 v85, v84, v85, 0xc41f6441
v_madak_f32_e32 v86, v84, v86, 0xc322658c
v_madak_f32_e32 v85, v84, v85, 0xc320a2ea
v_madak_f32_e32 v86, v84, v86, 0xc2798057
v_madak_f32_e32 v85, v84, v85, 0xc18e104b
v_madak_f32_e32 v86, v84, v86, 0xc128f022
v_madak_f32_e32 v85, v84, v85, 0xbf4c9dd4
v_madak_f32_e32 v86, v84, v86, 0xbf31a0b7
v_madak_f32_e32 v85, v84, v85, 0xbc21a092
v_madak_f32_e32 v86, v84, v86, 0xbc21a093
v_madak_f32_e32 v87, v87, v84, 0x40d23f7c
v_cndmask_b32_e32 v85, v85, v86, vcc
v_mov_b32_e32 v86, 0xc1b38712
v_madak_f32_e32 v86, v86, v84, 0x43ed43a7
v_madak_f32_e32 v87, v84, v87, 0x42d9451f
v_madak_f32_e32 v86, v84, v86, 0x451f90ce
v_madak_f32_e32 v87, v84, v87, 0x43d6810b
v_madak_f32_e32 v86, v84, v86, 0x4547fdbb
v_madak_f32_e32 v87, v84, v87, 0x442158c9
v_madak_f32_e32 v86, v84, v86, 0x44c01759
v_madak_f32_e32 v87, v84, v87, 0x43d9486f
v_madak_f32_e32 v86, v84, v86, 0x43a2e571
v_madak_f32_e32 v87, v84, v87, 0x4309a863
v_madak_f32_e32 v86, v84, v86, 0x41f2b459
v_madak_f32_e32 v87, v84, v87, 0x419d35ce
v_cndmask_b32_e32 v86, v86, v87, vcc
v_mov_b32_e32 v87, 0xbb0df9c0
v_madak_f32_e32 v87, v87, v84, 0x3d1151b3
v_madak_f32_e32 v87, v84, v87, 0xbde31cc2
v_madak_f32_e32 v87, v84, v87, 0x3ea2fe54
v_madak_f32_e32 v87, v84, v87, 0xbebe9208
v_madak_f32_e32 v87, v84, v87, 0x3ed46805
v_madak_f32_e32 v87, v84, v87, 0xbb1acdc6
v_cndmask_b32_e64 v85, v85, v87, s[4:5]
v_mov_b32_e32 v87, 0x3c445aa3
v_madak_f32_e32 v87, v87, v84, 0x3c5f6e13
v_madak_f32_e32 v87, v84, v87, 0x3e013307
v_madak_f32_e32 v87, v84, v87, 0x3d931ae7
v_madak_f32_e32 v87, v84, v87, 0x3f0a5785
v_madak_f32_e32 v87, v84, v87, 0x3dd9f331
v_cndmask_b32_e64 v86, v86, v87, s[4:5]
v_mov_b32_e32 v87, 0xb684e21a
v_madak_f32_e32 v87, v87, v84, 0x390aee49
v_madak_f32_e32 v87, v84, v87, 0x3ba68116
v_madak_f32_e32 v87, v84, v87, 0x3d852a63
v_madak_f32_e32 v87, v84, v87, 0x3ecbbbce
v_cndmask_b32_e64 v86, v86, v87, s[8:9]
v_mad_f32 v86, v84, v86, 1.0
v_and_b32_e32 v91, s51, v45
v_mov_b32_e32 v92, 0xbf100000
v_cmp_gt_f32_e64 vcc, |v86|, v88
v_mov_b32_e32 v89, 0x2f800000
v_mad_f32 v92, v91, -v91, v92
v_cndmask_b32_e32 v90, 1.0, v89, vcc
v_cmp_gt_f32_e32 vcc, 0, v92
v_cndmask_b32_e64 v93, 0.5, -0.5, vcc
v_mov_b32_e32 v94, 0x3fb8aa3b
v_mac_f32_e32 v93, v94, v92
v_mov_b32_e32 v87, 0xb7c756b1
v_cvt_i32_f32_e32 v93, v93
v_madak_f32_e32 v87, v87, v84, 0xbbbd1489
v_madak_f32_e32 v87, v84, v87, 0xbce9528f
v_madak_f32_e32 v87, v84, v87, 0xbea66beb
v_mul_f32_e32 v86, v90, v86
v_madak_f32_e32 v84, v84, v87, 0x3e0375d4
v_rcp_f32_e32 v86, v86
v_cvt_f32_i32_e32 v87, v93
v_cndmask_b32_e64 v84, v85, v84, s[8:9]
v_mov_b32_e32 v85, 0xbf317180
v_mul_f32_e32 v84, v86, v84
v_mad_f32 v86, v85, v87, v92
v_mov_b32_e32 v95, 0xb717f7d1
v_mad_f32 v96, v95, v87, v86
v_mul_f32_e32 v97, v96, v96
v_mov_b32_e32 v98, 0xb5ddea0e
v_mov_b32_e32 v99, 0x3331bb4c
v_mad_f32 v100, v99, v97, v98
v_mov_b32_e32 v101, 0x388ab355
v_mad_f32 v100, v100, v97, v101
v_mov_b32_e32 v102, 0xbb360b61
v_mad_f32 v100, v100, v97, v102
v_mov_b32_e32 v103, 0x3e2aaaab
v_mad_f32 v100, v100, v97, v103
v_mad_f32 v97, -v97, v100, v96
v_mul_f32_e32 v96, v97, v96
v_sub_f32_e32 v97, 2.0, v97
v_cmp_gt_f32_e64 vcc, |v97|, v88
v_cndmask_b32_e32 v100, 1.0, v89, vcc
v_mul_f32_e32 v97, v100, v97
v_rcp_f32_e32 v97, v97
v_mul_f32_e32 v96, v97, v96
v_mul_f32_e32 v96, v96, v100
v_mad_f32 v87, -v87, v95, -v96
v_subrev_f32_e32 v86, v86, v87
v_lshlrev_b32_e32 v87, 23, v93
v_sub_f32_e32 v86, 1.0, v86
v_add_i32_e32 v86, vcc, v86, v87
v_mov_b32_e32 v87, 0xc2aeac4f
v_cmp_ge_f32_e32 vcc, v92, v87
v_mov_b32_e32 v93, 0x42b17218
v_cndmask_b32_e32 v86, 0, v86, vcc
v_cmp_lt_f32_e32 vcc, v92, v93
v_mov_b32_e32 v96, 0x7f800000
v_cndmask_b32_e32 v86, v96, v86, vcc
v_cmp_u_f32_e32 vcc, v92, v92
v_cndmask_b32_e32 v86, v86, v92, vcc
v_subrev_f32_e32 v92, v81, v91
v_mul_f32_e32 v97, v84, v90
v_add_f32_e32 v91, v81, v91
v_mad_f32 v91, v91, v92, v97
v_cmp_gt_f32_e32 vcc, 0, v91
v_cndmask_b32_e64 v92, 0.5, -0.5, vcc
v_mac_f32_e32 v92, v94, v91
v_cvt_i32_f32_e32 v92, v92
v_madak_f32_e32 v84, v90, v84, 0x3f58560b
v_cvt_f32_i32_e32 v94, v92
v_lshlrev_b32_e32 v92, 23, v92
v_mad_f32 v85, v85, v94, v91
v_mad_f32 v100, v95, v94, v85
v_mul_f32_e32 v104, v100, v100
v_mac_f32_e32 v98, v99, v104
v_mac_f32_e32 v101, v98, v104
v_mac_f32_e32 v102, v101, v104
v_mac_f32_e32 v103, v102, v104
v_mad_f32 v98, -v104, v103, v100
v_mul_f32_e32 v99, v98, v100
v_sub_f32_e32 v98, 2.0, v98
v_cmp_gt_f32_e64 vcc, |v98|, v88
v_cndmask_b32_e32 v100, 1.0, v89, vcc
v_mul_f32_e32 v98, v100, v98
v_rcp_f32_e32 v98, v98
v_mul_f32_e32 v98, v98, v99
v_mul_f32_e32 v98, v98, v100
v_mad_f32 v94, -v94, v95, -v98
v_subrev_f32_e32 v85, v85, v94
v_sub_f32_e32 v85, 1.0, v85
v_add_i32_e32 v85, vcc, v85, v92
v_cmp_ge_f32_e32 vcc, v91, v87
v_cndmask_b32_e32 v85, 0, v85, vcc
v_cmp_lt_f32_e32 vcc, v91, v93
v_cndmask_b32_e32 v85, v96, v85, vcc
v_cmp_u_f32_e32 vcc, v91, v91
v_cndmask_b32_e32 v85, v85, v91, vcc
v_cmp_gt_f32_e64 vcc, |v81|, v88
v_mul_f32_e32 v85, v85, v86
v_cndmask_b32_e32 v86, 1.0, v89, vcc
v_mul_f32_e32 v87, v86, v81
v_rcp_f32_e32 v87, v87
v_mov_b32_e32 v88, 0x40c00000
v_cmp_gt_f32_e32 vcc, v88, v81
v_mov_b32_e32 v88, 0x31800000
v_cmp_gt_f32_e64 s[10:11], v88, v81
v_mul_f32_e32 v81, v87, v85
v_mad_f32 v81, -v86, v81, 1.0
v_cndmask_b32_e32 v81, 1.0, v81, vcc
v_cndmask_b32_e64 v81, v81, v84, s[4:5]
v_and_b32_e32 v84, s52, v45
v_or_b32_e32 v81, v84, v81
v_mad_f32 v84, v97, v45, v45
v_cndmask_b32_e64 v81, v81, v84, s[8:9]
v_mul_f32_e32 v84, 0x3f8375d4, v45
v_mac_f32_e32 v84, 0x41000000, v45
v_mul_f32_e32 v84, 0x3e000000, v84
v_cndmask_b32_e64 v81, v81, v84, s[10:11]
v_cmp_u_f32_e32 vcc, v45, v45
v_cndmask_b32_e32 v45, v81, v45, vcc
v_subrev_f32_e32 v45, v45, v74
v_mul_f32_e32 v74, s19, v74
v_mad_f32 v45, v79, v45, -v74
v_mul_f32_e32 v74, v77, v80
v_mul_f32_e32 v74, v82, v74
v_mac_f32_e32 v5, v45, v51
v_mul_f32_e32 v45, v83, v74
v_mad_f32 v45, v51, v76, -v45
v_mul_f32_e32 v51, v76, v51
v_mad_f32 v51, v74, v83, -v51
v_mad_f32 v78, v35, v51, v78
v_mad_f32 v75, v31, v51, v75
v_mac_f32_e32 v69, v27, v51
v_mad_f32 v30, v35, v45, v30
v_mad_f32 v29, v31, v45, v29
v_mac_f32_e32 v28, v27, v45
BB7_110: ; %Flow1228
; in Loop: Header=BB7_11 Depth=1
s_or_b64 exec, exec, s[34:35]
BB7_111: ; in Loop: Header=BB7_11 Depth=1
s_or_b64 exec, exec, s[12:13]
v_lshrrev_b32_e32 v27, 20, v65
v_and_b32_e32 v27, 1, v27
v_cmp_eq_u32_e32 vcc, 1, v27
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[12:13], exec, s[4:5]
; mask branch BB7_115
s_cbranch_execz BB7_115
BB7_112: ; in Loop: Header=BB7_11 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[79:82], v55 offset0:64 offset1:65
s_and_b64 s[4:5], exec, s[0:1]
v_cmp_ne_u32_e32 vcc, v61, v23
s_xor_b64 s[4:5], s[4:5], -1
s_or_b64 s[4:5], s[4:5], vcc
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v31, v71, v80
s_and_b64 s[8:9], exec, s[2:3]
s_or_b64 s[4:5], s[8:9], s[4:5]
v_subrev_f32_e32 v27, v70, v79
v_mul_f32_e32 v45, v31, v31
v_cndmask_b32_e64 v51, 0, 1.0, s[4:5]
v_subrev_f32_e32 v35, v72, v81
v_mac_f32_e32 v45, v27, v27
v_mac_f32_e32 v45, v35, v35
v_mul_f32_e32 v51, s26, v51
v_cmp_lt_f32_e32 vcc, v45, v51
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[34:35], exec, s[4:5]
; mask branch BB7_114
s_cbranch_execz BB7_114
BB7_113: ; in Loop: Header=BB7_11 Depth=1
v_max_f32_e32 v45, 0x34cd15ae, v45
v_rsq_f32_e32 v79, v45
s_mov_b32 m0, -1
v_lshrrev_b32_e32 v74, 20, v19
ds_read_b64 v[76:77], v56 offset:256
v_and_b32_e32 v74, 1, v74
v_cmp_eq_u32_e32 vcc, 1, v74
v_mul_f32_e32 v80, v79, v79
v_cndmask_b32_e64 v74, 0, 1.0, vcc
v_mul_f32_e32 v81, v80, v80
v_mul_f32_e32 v81, v74, v81
v_mul_f32_e32 v51, v73, v82
v_mul_f32_e32 v82, v80, v81
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v77, v68, v77
v_mul_f32_e32 v83, v82, v77
v_mad_f32 v81, v81, v80, s42
v_mad_f32 v83, v76, v67, -v83
v_mad_f32 v84, v82, v82, s43
v_mul_f32_e32 v76, v67, v76
v_mul_f32_e32 v81, 0xbe2aaaab, v81
v_mul_f32_e32 v77, v84, v77
v_mul_f32_e32 v76, v76, v81
v_cmp_gt_f32_e32 vcc, s27, v45
v_mac_f32_e32 v76, 0x3daaaaaa, v77
v_cndmask_b32_e64 v77, 0, 1.0, vcc
v_mul_f32_e32 v81, v74, v77
v_mac_f32_e32 v8, v76, v81
v_mul_f32_e32 v76, v9, v45
v_mul_f32_e32 v81, v76, v76
v_mov_b32_e32 v84, 0x3a92b707
v_madak_f32_e32 v84, v84, v81, 0x3ded3cb2
v_mov_b32_e32 v85, 0x3c739487
v_madak_f32_e32 v85, v85, v81, 0x3f01e2bc
v_mad_f32 v84, v84, v81, 1.0
v_mac_f32_e32 v84, v76, v85
v_mov_b32_e32 v85, 0xb2951928
v_madak_f32_e32 v85, v85, v81, 0xb85ffb93
v_mov_b32_e32 v86, 0x35c55945
v_madak_f32_e32 v86, v86, v81, 0x3a83ca0c
v_madak_f32_e32 v85, v85, v81, 0xbc9ded90
v_madak_f32_e32 v86, v86, v81, 0x3d8eaf3b
v_madak_f32_e32 v81, v85, v81, 0xbf409397
v_mac_f32_e32 v81, v76, v86
v_rcp_f32_e32 v76, v84
v_mul_f32_e32 v45, s18, v45
v_mul_f32_e32 v45, v79, v45
v_mov_b32_e32 v84, 0x3fa00000
v_mul_f32_e32 v76, v37, v76
v_mul_f32_e32 v76, v81, v76
v_mul_f32_e32 v81, v74, v80
v_mac_f32_e32 v76, v79, v81
v_and_b32_e32 v81, s50, v45
v_cmp_gt_f32_e64 s[4:5], v84, v81
v_mul_f32_e32 v84, v81, v81
v_rcp_f32_e32 v85, v84
v_add_f32_e32 v86, -1.0, v81
v_mov_b32_e32 v87, 0xbd777f97
v_mov_b32_e32 v88, 0x6f800000
v_cndmask_b32_e64 v85, v85, v86, s[4:5]
v_mov_b32_e32 v86, 0x3f580000
v_cmp_gt_f32_e64 s[8:9], v86, v81
v_cndmask_b32_e64 v84, v85, v84, s[8:9]
v_mov_b32_e32 v86, 0xc11d077e
v_mov_b32_e32 v85, 0x4036db6e
v_madak_f32_e32 v86, v86, v84, 0xc2a2932b
v_cmp_gt_f32_e32 vcc, v85, v81
v_mov_b32_e32 v85, 0xc3f1c275
v_madak_f32_e32 v85, v85, v84, 0xc480230b
v_madak_f32_e32 v86, v84, v86, 0xc3389ae7
v_madak_f32_e32 v85, v84, v85, 0xc41f6441
v_madak_f32_e32 v86, v84, v86, 0xc322658c
v_madak_f32_e32 v85, v84, v85, 0xc320a2ea
v_madak_f32_e32 v86, v84, v86, 0xc2798057
v_madak_f32_e32 v85, v84, v85, 0xc18e104b
v_madak_f32_e32 v86, v84, v86, 0xc128f022
v_madak_f32_e32 v85, v84, v85, 0xbf4c9dd4
v_madak_f32_e32 v86, v84, v86, 0xbf31a0b7
v_madak_f32_e32 v85, v84, v85, 0xbc21a092
v_madak_f32_e32 v86, v84, v86, 0xbc21a093
v_madak_f32_e32 v87, v87, v84, 0x40d23f7c
v_cndmask_b32_e32 v85, v85, v86, vcc
v_mov_b32_e32 v86, 0xc1b38712
v_madak_f32_e32 v86, v86, v84, 0x43ed43a7
v_madak_f32_e32 v87, v84, v87, 0x42d9451f
v_madak_f32_e32 v86, v84, v86, 0x451f90ce
v_madak_f32_e32 v87, v84, v87, 0x43d6810b
v_madak_f32_e32 v86, v84, v86, 0x4547fdbb
v_madak_f32_e32 v87, v84, v87, 0x442158c9
v_madak_f32_e32 v86, v84, v86, 0x44c01759
v_madak_f32_e32 v87, v84, v87, 0x43d9486f
v_madak_f32_e32 v86, v84, v86, 0x43a2e571
v_madak_f32_e32 v87, v84, v87, 0x4309a863
v_madak_f32_e32 v86, v84, v86, 0x41f2b459
v_madak_f32_e32 v87, v84, v87, 0x419d35ce
v_cndmask_b32_e32 v86, v86, v87, vcc
v_mov_b32_e32 v87, 0xbb0df9c0
v_madak_f32_e32 v87, v87, v84, 0x3d1151b3
v_madak_f32_e32 v87, v84, v87, 0xbde31cc2
v_madak_f32_e32 v87, v84, v87, 0x3ea2fe54
v_madak_f32_e32 v87, v84, v87, 0xbebe9208
v_madak_f32_e32 v87, v84, v87, 0x3ed46805
v_madak_f32_e32 v87, v84, v87, 0xbb1acdc6
v_cndmask_b32_e64 v85, v85, v87, s[4:5]
v_mov_b32_e32 v87, 0x3c445aa3
v_madak_f32_e32 v87, v87, v84, 0x3c5f6e13
v_madak_f32_e32 v87, v84, v87, 0x3e013307
v_madak_f32_e32 v87, v84, v87, 0x3d931ae7
v_madak_f32_e32 v87, v84, v87, 0x3f0a5785
v_madak_f32_e32 v87, v84, v87, 0x3dd9f331
v_cndmask_b32_e64 v86, v86, v87, s[4:5]
v_mov_b32_e32 v87, 0xb684e21a
v_madak_f32_e32 v87, v87, v84, 0x390aee49
v_madak_f32_e32 v87, v84, v87, 0x3ba68116
v_madak_f32_e32 v87, v84, v87, 0x3d852a63
v_madak_f32_e32 v87, v84, v87, 0x3ecbbbce
v_cndmask_b32_e64 v86, v86, v87, s[8:9]
v_mad_f32 v86, v84, v86, 1.0
v_and_b32_e32 v91, s51, v45
v_mov_b32_e32 v92, 0xbf100000
v_cmp_gt_f32_e64 vcc, |v86|, v88
v_mov_b32_e32 v89, 0x2f800000
v_mad_f32 v92, v91, -v91, v92
v_cndmask_b32_e32 v90, 1.0, v89, vcc
v_cmp_gt_f32_e32 vcc, 0, v92
v_cndmask_b32_e64 v93, 0.5, -0.5, vcc
v_mov_b32_e32 v94, 0x3fb8aa3b
v_mac_f32_e32 v93, v94, v92
v_mov_b32_e32 v87, 0xb7c756b1
v_cvt_i32_f32_e32 v93, v93
v_madak_f32_e32 v87, v87, v84, 0xbbbd1489
v_madak_f32_e32 v87, v84, v87, 0xbce9528f
v_madak_f32_e32 v87, v84, v87, 0xbea66beb
v_mul_f32_e32 v86, v90, v86
v_madak_f32_e32 v84, v84, v87, 0x3e0375d4
v_rcp_f32_e32 v86, v86
v_cvt_f32_i32_e32 v87, v93
v_cndmask_b32_e64 v84, v85, v84, s[8:9]
v_mov_b32_e32 v85, 0xbf317180
v_mul_f32_e32 v84, v86, v84
v_mad_f32 v86, v85, v87, v92
v_mov_b32_e32 v95, 0xb717f7d1
v_mad_f32 v96, v95, v87, v86
v_mul_f32_e32 v97, v96, v96
v_mov_b32_e32 v98, 0xb5ddea0e
v_mov_b32_e32 v99, 0x3331bb4c
v_mad_f32 v100, v99, v97, v98
v_mov_b32_e32 v101, 0x388ab355
v_mad_f32 v100, v100, v97, v101
v_mov_b32_e32 v102, 0xbb360b61
v_mad_f32 v100, v100, v97, v102
v_mov_b32_e32 v103, 0x3e2aaaab
v_mad_f32 v100, v100, v97, v103
v_mad_f32 v97, -v97, v100, v96
v_mul_f32_e32 v96, v97, v96
v_sub_f32_e32 v97, 2.0, v97
v_cmp_gt_f32_e64 vcc, |v97|, v88
v_cndmask_b32_e32 v100, 1.0, v89, vcc
v_mul_f32_e32 v97, v100, v97
v_rcp_f32_e32 v97, v97
v_mul_f32_e32 v96, v97, v96
v_mul_f32_e32 v96, v96, v100
v_mad_f32 v87, -v87, v95, -v96
v_subrev_f32_e32 v86, v86, v87
v_lshlrev_b32_e32 v87, 23, v93
v_sub_f32_e32 v86, 1.0, v86
v_add_i32_e32 v86, vcc, v86, v87
v_mov_b32_e32 v87, 0xc2aeac4f
v_cmp_ge_f32_e32 vcc, v92, v87
v_mov_b32_e32 v93, 0x42b17218
v_cndmask_b32_e32 v86, 0, v86, vcc
v_cmp_lt_f32_e32 vcc, v92, v93
v_mov_b32_e32 v96, 0x7f800000
v_cndmask_b32_e32 v86, v96, v86, vcc
v_cmp_u_f32_e32 vcc, v92, v92
v_cndmask_b32_e32 v86, v86, v92, vcc
v_subrev_f32_e32 v92, v81, v91
v_mul_f32_e32 v97, v84, v90
v_add_f32_e32 v91, v81, v91
v_mad_f32 v91, v91, v92, v97
v_cmp_gt_f32_e32 vcc, 0, v91
v_cndmask_b32_e64 v92, 0.5, -0.5, vcc
v_mac_f32_e32 v92, v94, v91
v_cvt_i32_f32_e32 v92, v92
v_madak_f32_e32 v84, v90, v84, 0x3f58560b
v_cvt_f32_i32_e32 v94, v92
v_lshlrev_b32_e32 v92, 23, v92
v_mad_f32 v85, v85, v94, v91
v_mad_f32 v100, v95, v94, v85
v_mul_f32_e32 v104, v100, v100
v_mac_f32_e32 v98, v99, v104
v_mac_f32_e32 v101, v98, v104
v_mac_f32_e32 v102, v101, v104
v_mac_f32_e32 v103, v102, v104
v_mad_f32 v98, -v104, v103, v100
v_mul_f32_e32 v99, v98, v100
v_sub_f32_e32 v98, 2.0, v98
v_cmp_gt_f32_e64 vcc, |v98|, v88
v_cndmask_b32_e32 v100, 1.0, v89, vcc
v_mul_f32_e32 v98, v100, v98
v_rcp_f32_e32 v98, v98
v_mul_f32_e32 v98, v98, v99
v_mul_f32_e32 v98, v98, v100
v_mad_f32 v94, -v94, v95, -v98
v_subrev_f32_e32 v85, v85, v94
v_sub_f32_e32 v85, 1.0, v85
v_add_i32_e32 v85, vcc, v85, v92
v_cmp_ge_f32_e32 vcc, v91, v87
v_cndmask_b32_e32 v85, 0, v85, vcc
v_cmp_lt_f32_e32 vcc, v91, v93
v_cndmask_b32_e32 v85, v96, v85, vcc
v_cmp_u_f32_e32 vcc, v91, v91
v_cndmask_b32_e32 v85, v85, v91, vcc
v_cmp_gt_f32_e64 vcc, |v81|, v88
v_mul_f32_e32 v85, v85, v86
v_cndmask_b32_e32 v86, 1.0, v89, vcc
v_mul_f32_e32 v87, v86, v81
v_rcp_f32_e32 v87, v87
v_mov_b32_e32 v88, 0x40c00000
v_cmp_gt_f32_e32 vcc, v88, v81
v_mov_b32_e32 v88, 0x31800000
v_cmp_gt_f32_e64 s[10:11], v88, v81
v_mul_f32_e32 v81, v87, v85
v_mad_f32 v81, -v86, v81, 1.0
v_cndmask_b32_e32 v81, 1.0, v81, vcc
v_cndmask_b32_e64 v81, v81, v84, s[4:5]
v_and_b32_e32 v84, s52, v45
v_or_b32_e32 v81, v84, v81
v_mad_f32 v84, v97, v45, v45
v_cndmask_b32_e64 v81, v81, v84, s[8:9]
v_mul_f32_e32 v84, 0x3f8375d4, v45
v_mac_f32_e32 v84, 0x41000000, v45
v_mul_f32_e32 v84, 0x3e000000, v84
v_cndmask_b32_e64 v81, v81, v84, s[10:11]
v_cmp_u_f32_e32 vcc, v45, v45
v_cndmask_b32_e32 v45, v81, v45, vcc
v_subrev_f32_e32 v45, v45, v74
v_mul_f32_e32 v74, s19, v74
v_mad_f32 v45, v79, v45, -v74
v_mul_f32_e32 v74, v77, v80
v_mul_f32_e32 v74, v82, v74
v_mac_f32_e32 v5, v45, v51
v_mul_f32_e32 v45, v83, v74
v_mad_f32 v45, v51, v76, -v45
v_mul_f32_e32 v51, v76, v51
v_mad_f32 v51, v74, v83, -v51
v_mad_f32 v78, v35, v51, v78
v_mad_f32 v75, v31, v51, v75
v_mac_f32_e32 v69, v27, v51
v_mad_f32 v26, v35, v45, v26
v_mad_f32 v25, v31, v45, v25
v_mac_f32_e32 v24, v27, v45
BB7_114: ; %Flow1227
; in Loop: Header=BB7_11 Depth=1
s_or_b64 exec, exec, s[34:35]
BB7_115: ; in Loop: Header=BB7_11 Depth=1
s_or_b64 exec, exec, s[12:13]
v_lshrrev_b32_e32 v27, 21, v65
v_and_b32_e32 v27, 1, v27
v_cmp_eq_u32_e32 vcc, 1, v27
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[12:13], exec, s[4:5]
; mask branch BB7_119
s_cbranch_execz BB7_119
BB7_116: ; in Loop: Header=BB7_11 Depth=1
s_mov_b32 m0, -1
ds_read2_b64 v[79:82], v55 offset0:80 offset1:81
s_and_b64 s[4:5], exec, s[0:1]
v_cmp_ne_u32_e32 vcc, v60, v23
s_xor_b64 s[4:5], s[4:5], -1
s_or_b64 s[4:5], s[4:5], vcc
s_waitcnt lgkmcnt(0)
v_subrev_f32_e32 v31, v71, v80
s_and_b64 s[8:9], exec, s[2:3]
s_or_b64 s[4:5], s[8:9], s[4:5]
v_subrev_f32_e32 v27, v70, v79
v_mul_f32_e32 v45, v31, v31
v_cndmask_b32_e64 v51, 0, 1.0, s[4:5]
v_subrev_f32_e32 v35, v72, v81
v_mac_f32_e32 v45, v27, v27
v_mac_f32_e32 v45, v35, v35
v_mul_f32_e32 v51, s26, v51
v_cmp_lt_f32_e32 vcc, v45, v51
s_and_saveexec_b64 s[4:5], vcc
s_xor_b64 s[34:35], exec, s[4:5]
; mask branch BB7_118
s_cbranch_execz BB7_118
BB7_117: ; in Loop: Header=BB7_11 Depth=1
v_max_f32_e32 v45, 0x34cd15ae, v45
v_rsq_f32_e32 v79, v45
s_mov_b32 m0, -1
v_lshrrev_b32_e32 v74, 21, v19
ds_read_b64 v[76:77], v56 offset:320
v_and_b32_e32 v74, 1, v74
v_cmp_eq_u32_e32 vcc, 1, v74
v_mul_f32_e32 v80, v79, v79
v_cndmask_b32_e64 v74, 0, 1.0, vcc
v_mul_f32_e32 v81, v80, v80
v_mul_f32_e32 v81, v74, v81
v_mul_f32_e32 v51, v73, v82
v_mul_f32_e32 v82, v80, v81
s_waitcnt lgkmcnt(0)
v_mul_f32_e32 v77, v68, v77
v_mul_f32_e32 v83, v82, v77
v_mad_f32 v81, v81, v80, s42
v_mad_f32 v83, v76, v67, -v83
v_mad_f32 v84, v82, v82, s43
v_mul_f32_e32 v76, v67, v76
v_mul_f32_e32 v81, 0xbe2aaaab, v81
v_mul_f32_e32 v77, v84, v77
v_mul_f32_e32 v76, v76, v81
v_cmp_gt_f32_e32 vcc, s27, v45
v_mac_f32_e32 v76, 0x3daaaaaa, v77
v_cndmask_b32_e64 v77, 0, 1.0, vcc
v_m
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment