Created
June 6, 2017 00:43
-
-
Save hughperkins/ff9db47558c86a7a265341ffb5168bd2 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// origKernelName: _ZN10tensorflow12_GLOBAL__N_113SplitOpKernelIfEEvPKT_iiiNS_21CudaDeviceArrayStructIPS2_Li8EEE | |
// uniqueKernelName: _ZN10tensorflow12_GLOBAL__N_113SplitOpKernelIfEEvPKT_iiiNS_21CudaDeviceArrayStructIPS2_Li8EEE_1_2 | |
// shortKernelName: _ZN10tensorflow12_GL | |
struct tensorflow__CudaDeviceArrayStruct { | |
int f0; | |
unsigned long f1[8]; | |
global float** f2; | |
}; | |
struct GlobalVars { | |
local int *scratch; | |
global char *clmem0; | |
unsigned long clmem_vmem_offset0; | |
}; | |
inline global float *getGlobalPointer(unsigned long vmemloc, struct GlobalVars *globalVars) { | |
return (global float *)(globalVars->clmem0 + vmemloc - globalVars->clmem_vmem_offset0); | |
} | |
kernel void _ZN10tensorflow12_GL(global char* clmem0, unsigned long clmem_vmem_offset0, global char* clmem1, unsigned long clmem_vmem_offset1, global char* clmem2, unsigned long clmem_vmem_offset2, long v9_offset, int v10, int v11, int v12, long v13_offset, local int *scratch); | |
kernel void _ZN10tensorflow12_GL(global char* clmem0, unsigned long clmem_vmem_offset0, global char* clmem1, unsigned long clmem_vmem_offset1, global char* clmem2, unsigned long clmem_vmem_offset2, long v9_offset, int v10, int v11, int v12, long v13_offset, local int *scratch) { | |
global struct tensorflow__CudaDeviceArrayStruct* v13 = (global struct tensorflow__CudaDeviceArrayStruct*)(clmem2 + v13_offset); | |
global float* v9 = (global float*)(clmem1 + v9_offset); | |
struct GlobalVars globalVars = { scratch, clmem0, clmem_vmem_offset0 }; | |
struct GlobalVars *pGlobalVars = &globalVars; | |
/* vmem */ unsigned long* v19; | |
/* vmem */ unsigned long* v20; | |
/* vmem */ unsigned long* v22; | |
global float* v43; | |
global float* v50; | |
global float* v54; | |
global int* v55; | |
int v16; | |
int v24; | |
int v25; | |
int v26; | |
int v27; | |
int v29; | |
int v30; | |
int v32; | |
int v33; | |
int v35; | |
int v38; | |
int v52; | |
int v57; | |
v1:; | |
/* int* v14 = getelementptr v13 <unk> <unk> */; | |
/* int v16 = load v14 */; | |
v16 = (&(v13[0].f0))[0]; | |
/* bool v17 = icmp v16 <unk> */; | |
/* if(v17) */ | |
if (v16 < 9) { | |
goto v2; | |
} else { | |
goto v3; | |
} | |
v2:; | |
/* float** v19 = getelementptr v13 v15 <unk> v15 */; | |
v19 = (/* vmem */ unsigned long)(&(v13[0].f1[0])); | |
/* vmem unsigned long* v20 = phi v19 */ | |
v20 = v19; | |
goto v4; | |
v3:; | |
/* float*** v21 = getelementptr v13 v15 <unk> */; | |
/* float** v22 = load v21 */; | |
v22 = (/* vmem */ unsigned long)(&(v13[0].f2))[0]; | |
/* vmem unsigned long* v20 = phi v22 */ | |
v20 = v22; | |
goto v4; | |
v4:; | |
/* int v23 = mul v11 v10 */; | |
/* int v24 = mul v23 v12 */; | |
v24 = (v11 * v10) * v12; | |
/* int v25 = sdiv v11 v16 */; | |
v25 = v11 / v16; | |
/* int v26 = call <unk> */; | |
v26 = get_group_id(0); | |
/* int v27 = call <unk> */; | |
v27 = get_local_size(0); | |
/* int v28 = mul v27 v26 */; | |
/* int v29 = call <unk> */; | |
v29 = get_local_id(0); | |
/* int v30 = add v28 v29 */; | |
v30 = (v27 * v26) + v29; | |
/* bool v31 = icmp v30 v24 */; | |
/* if(v31) */ | |
if (v30 < v24) { | |
goto v5; | |
} else { | |
goto v7; | |
} | |
v5:; | |
/* int v32 = mul v12 v11 */; | |
v32 = v12 * v11; | |
/* int v33 = call <unk> */; | |
v33 = get_num_groups(0); | |
/* int v34 = mul v33 v27 */; | |
/* int v35 = phi v30 */ | |
v35 = v30; | |
goto v8; | |
v6:; | |
goto v7; | |
v7:; | |
return; | |
v8:; | |
/* int v36 = sdiv v35 v32 */; | |
/* int v37 = srem v35 v32 */; | |
/* int v38 = sdiv v37 v12 */; | |
v38 = (v35 % v32) / v12; | |
/* int v39 = srem v35 v12 */; | |
/* int v40 = sdiv v38 v25 */; | |
/* long v41 = sext v40 */; | |
/* float** v42 = getelementptr v20 v41 */; | |
/* float* v43 = load v42 */; | |
global float* v43_gptrstep = getGlobalPointer((float*)(&((&v20)[(long)(v38 / v25)]))[0], pGlobalVars); | |
v43 = v43_gptrstep; | |
/* int v44 = mul v36 v25 */; | |
/* int v45 = srem v38 v25 */; | |
/* int v46 = add v45 v44 */; | |
/* int v47 = mul v46 v12 */; | |
/* int v48 = add v47 v39 */; | |
/* long v49 = sext v35 */; | |
/* float* v50 = getelementptr v9 v49 */; | |
v50 = (&(v9[(long)v35])); | |
/* int* v51 = bitcast v50 */; | |
/* int v52 = load v51 */; | |
v52 = ((global int*)v50)[0]; | |
/* long v53 = sext v48 */; | |
/* float* v54 = getelementptr v43 v53 */; | |
v54 = (&(v43[(long)((((v38 % v25) + ((v35 / v32) * v25)) * v12) + (v35 % v12))])); | |
/* int* v55 = bitcast v54 */; | |
v55 = (global int*)v54; | |
/* void v56 = store v52 v55 */; | |
v55[0] = v52; | |
/* int v57 = add v35 v34 */; | |
v57 = v35 + (v33 * v27); | |
/* bool v58 = icmp v57 v24 */; | |
/* if(v58) */ | |
if (v57 < v24) { | |
/* int v35 = phi v57 */ | |
v35 = v57; | |
goto v8; | |
} else { | |
goto v6; | |
} | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment