Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save hughperkins/ff9db47558c86a7a265341ffb5168bd2 to your computer and use it in GitHub Desktop.
Save hughperkins/ff9db47558c86a7a265341ffb5168bd2 to your computer and use it in GitHub Desktop.
// origKernelName: _ZN10tensorflow12_GLOBAL__N_113SplitOpKernelIfEEvPKT_iiiNS_21CudaDeviceArrayStructIPS2_Li8EEE
// uniqueKernelName: _ZN10tensorflow12_GLOBAL__N_113SplitOpKernelIfEEvPKT_iiiNS_21CudaDeviceArrayStructIPS2_Li8EEE_1_2
// shortKernelName: _ZN10tensorflow12_GL
struct tensorflow__CudaDeviceArrayStruct {
int f0;
unsigned long f1[8];
global float** f2;
};
struct GlobalVars {
local int *scratch;
global char *clmem0;
unsigned long clmem_vmem_offset0;
};
inline global float *getGlobalPointer(unsigned long vmemloc, struct GlobalVars *globalVars) {
return (global float *)(globalVars->clmem0 + vmemloc - globalVars->clmem_vmem_offset0);
}
kernel void _ZN10tensorflow12_GL(global char* clmem0, unsigned long clmem_vmem_offset0, global char* clmem1, unsigned long clmem_vmem_offset1, global char* clmem2, unsigned long clmem_vmem_offset2, long v9_offset, int v10, int v11, int v12, long v13_offset, local int *scratch);
kernel void _ZN10tensorflow12_GL(global char* clmem0, unsigned long clmem_vmem_offset0, global char* clmem1, unsigned long clmem_vmem_offset1, global char* clmem2, unsigned long clmem_vmem_offset2, long v9_offset, int v10, int v11, int v12, long v13_offset, local int *scratch) {
global struct tensorflow__CudaDeviceArrayStruct* v13 = (global struct tensorflow__CudaDeviceArrayStruct*)(clmem2 + v13_offset);
global float* v9 = (global float*)(clmem1 + v9_offset);
struct GlobalVars globalVars = { scratch, clmem0, clmem_vmem_offset0 };
struct GlobalVars *pGlobalVars = &globalVars;
/* vmem */ unsigned long* v19;
/* vmem */ unsigned long* v20;
/* vmem */ unsigned long* v22;
global float* v43;
global float* v50;
global float* v54;
global int* v55;
int v16;
int v24;
int v25;
int v26;
int v27;
int v29;
int v30;
int v32;
int v33;
int v35;
int v38;
int v52;
int v57;
v1:;
/* int* v14 = getelementptr v13 <unk> <unk> */;
/* int v16 = load v14 */;
v16 = (&(v13[0].f0))[0];
/* bool v17 = icmp v16 <unk> */;
/* if(v17) */
if (v16 < 9) {
goto v2;
} else {
goto v3;
}
v2:;
/* float** v19 = getelementptr v13 v15 <unk> v15 */;
v19 = (/* vmem */ unsigned long)(&(v13[0].f1[0]));
/* vmem unsigned long* v20 = phi v19 */
v20 = v19;
goto v4;
v3:;
/* float*** v21 = getelementptr v13 v15 <unk> */;
/* float** v22 = load v21 */;
v22 = (/* vmem */ unsigned long)(&(v13[0].f2))[0];
/* vmem unsigned long* v20 = phi v22 */
v20 = v22;
goto v4;
v4:;
/* int v23 = mul v11 v10 */;
/* int v24 = mul v23 v12 */;
v24 = (v11 * v10) * v12;
/* int v25 = sdiv v11 v16 */;
v25 = v11 / v16;
/* int v26 = call <unk> */;
v26 = get_group_id(0);
/* int v27 = call <unk> */;
v27 = get_local_size(0);
/* int v28 = mul v27 v26 */;
/* int v29 = call <unk> */;
v29 = get_local_id(0);
/* int v30 = add v28 v29 */;
v30 = (v27 * v26) + v29;
/* bool v31 = icmp v30 v24 */;
/* if(v31) */
if (v30 < v24) {
goto v5;
} else {
goto v7;
}
v5:;
/* int v32 = mul v12 v11 */;
v32 = v12 * v11;
/* int v33 = call <unk> */;
v33 = get_num_groups(0);
/* int v34 = mul v33 v27 */;
/* int v35 = phi v30 */
v35 = v30;
goto v8;
v6:;
goto v7;
v7:;
return;
v8:;
/* int v36 = sdiv v35 v32 */;
/* int v37 = srem v35 v32 */;
/* int v38 = sdiv v37 v12 */;
v38 = (v35 % v32) / v12;
/* int v39 = srem v35 v12 */;
/* int v40 = sdiv v38 v25 */;
/* long v41 = sext v40 */;
/* float** v42 = getelementptr v20 v41 */;
/* float* v43 = load v42 */;
global float* v43_gptrstep = getGlobalPointer((float*)(&((&v20)[(long)(v38 / v25)]))[0], pGlobalVars);
v43 = v43_gptrstep;
/* int v44 = mul v36 v25 */;
/* int v45 = srem v38 v25 */;
/* int v46 = add v45 v44 */;
/* int v47 = mul v46 v12 */;
/* int v48 = add v47 v39 */;
/* long v49 = sext v35 */;
/* float* v50 = getelementptr v9 v49 */;
v50 = (&(v9[(long)v35]));
/* int* v51 = bitcast v50 */;
/* int v52 = load v51 */;
v52 = ((global int*)v50)[0];
/* long v53 = sext v48 */;
/* float* v54 = getelementptr v43 v53 */;
v54 = (&(v43[(long)((((v38 % v25) + ((v35 / v32) * v25)) * v12) + (v35 % v12))]));
/* int* v55 = bitcast v54 */;
v55 = (global int*)v54;
/* void v56 = store v52 v55 */;
v55[0] = v52;
/* int v57 = add v35 v34 */;
v57 = v35 + (v33 * v27);
/* bool v58 = icmp v57 v24 */;
/* if(v58) */
if (v57 < v24) {
/* int v35 = phi v57 */
v35 = v57;
goto v8;
} else {
goto v6;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment