Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save hughperkins/c55bb2161a30291fd438b4c4feaf1ed2 to your computer and use it in GitHub Desktop.
Save hughperkins/c55bb2161a30291fd438b4c4feaf1ed2 to your computer and use it in GitHub Desktop.
// __vmem__ is just a marker, so we can see which bits are vmems
// It doesnt actually do anything; compiler ignores it
#define __vmem__
// vmem2 is a pointer to a pointer (so we have to unwrap twice)
#define __vmem2__
struct GlobalVars {
local int *scratch;
global char *clmem0;
unsigned long clmem_vmem_offset0;
};
inline global float *getGlobalPointer(__vmem__ unsigned long vmemloc, const struct GlobalVars* const globalVars) {
return (global float *)(globalVars->clmem0 + vmemloc - globalVars->clmem_vmem_offset0);
}
struct thrust__system__cuda__detail__for_each_n_detail__for_each_kernel {
char f0;
};
struct thrust__system__cuda__detail__bulk___detail__cursor_29 {
char f0;
};
struct thrust__system__cuda__detail__bulk___detail__cursor_28 {
struct thrust__system__cuda__detail__bulk___detail__cursor_29 f0;
};
struct thrust__system__cuda__detail__bulk___detail__cursor_27 {
struct thrust__system__cuda__detail__bulk___detail__cursor_28 f0;
};
struct thrust__system__cuda__detail__for_each_n_detail__for_each_kernel_nopointers {
char f0;
};
struct thrust__system__cuda__detail__bulk___detail__cursor {
struct thrust__system__cuda__detail__bulk___detail__cursor_27 f0;
};
struct class_thrust__iterator_adaptor {
global int* f0;
};
struct class_thrust__pointer {
struct class_thrust__iterator_adaptor f0;
};
struct class_thrust__device_ptr {
struct class_thrust__pointer f0;
};
struct thrust__detail__fill_functor {
int f0;
};
struct thrust__detail__device_generate_functor {
struct thrust__detail__fill_functor f0;
};
struct thrust__system__cuda__detail__bulk___detail__cursor_29_nopointers {
char f0;
};
struct thrust__system__cuda__detail__bulk___detail__cursor_28_nopointers {
struct thrust__system__cuda__detail__bulk___detail__cursor_29_nopointers f0;
};
struct thrust__system__cuda__detail__bulk___detail__cursor_27_nopointers {
struct thrust__system__cuda__detail__bulk___detail__cursor_28_nopointers f0;
};
struct thrust__system__cuda__detail__bulk___detail__cursor_nopointers {
struct thrust__system__cuda__detail__bulk___detail__cursor_27_nopointers f0;
};
struct thrust__detail__wrapped_function {
struct thrust__detail__device_generate_functor f0;
};
struct thrust__detail__cons_35 {
int f0;
};
struct thrust__detail__cons_34 {
struct thrust__detail__wrapped_function f0;
struct thrust__detail__cons_35 f1;
};
struct thrust__detail__cons_33 {
struct class_thrust__device_ptr f0;
struct thrust__detail__cons_34 f1;
};
struct class_thrust__iterator_adaptor_nopointers {
int f0;
};
struct class_thrust__pointer_nopointers {
struct class_thrust__iterator_adaptor_nopointers f0;
};
struct class_thrust__device_ptr_nopointers {
struct class_thrust__pointer_nopointers f0;
};
struct thrust__detail__cons {
struct thrust__system__cuda__detail__bulk___detail__cursor f0;
struct thrust__detail__cons_33 f1;
};
struct class_thrust__tuple {
struct thrust__detail__cons f0;
};
struct class_thrust__system__cuda__detail__bulk___detail__closure {
struct thrust__system__cuda__detail__for_each_n_detail__for_each_kernel f0;
struct class_thrust__tuple f1;
};
struct class_thrust__system__cuda__detail__bulk___agent {
int f0;
};
struct thrust__detail__fill_functor_nopointers {
int f0;
};
struct thrust__detail__device_generate_functor_nopointers {
struct thrust__detail__fill_functor_nopointers f0;
};
struct thrust__detail__wrapped_function_nopointers {
struct thrust__detail__device_generate_functor_nopointers f0;
};
struct class_thrust__system__cuda__detail__bulk___detail__group_detail__group_base_37 {
struct class_thrust__system__cuda__detail__bulk___agent f0;
int f1;
int f2;
};
struct thrust__detail__cons_35_nopointers {
int f0;
};
struct thrust__detail__cons_34_nopointers {
struct thrust__detail__wrapped_function_nopointers f0;
struct thrust__detail__cons_35_nopointers f1;
};
struct thrust__detail__cons_33_nopointers {
struct class_thrust__device_ptr_nopointers f0;
struct thrust__detail__cons_34_nopointers f1;
};
struct thrust__detail__cons_nopointers {
struct thrust__system__cuda__detail__bulk___detail__cursor_nopointers f0;
struct thrust__detail__cons_33_nopointers f1;
};
struct class_thrust__tuple_nopointers {
struct thrust__detail__cons_nopointers f0;
};
struct class_thrust__system__cuda__detail__bulk___detail__closure_nopointers {
struct thrust__system__cuda__detail__for_each_n_detail__for_each_kernel_nopointers f0;
struct class_thrust__tuple_nopointers f1;
};
struct class_thrust__system__cuda__detail__bulk___parallel_group_36 {
struct class_thrust__system__cuda__detail__bulk___detail__group_detail__group_base_37 f0;
};
struct class_thrust__system__cuda__detail__bulk___concurrent_group {
struct class_thrust__system__cuda__detail__bulk___parallel_group_36 f0;
int f1;
};
struct class_thrust__system__cuda__detail__bulk___detail__group_detail__group_base {
struct class_thrust__system__cuda__detail__bulk___concurrent_group f0;
int f1;
int f2;
};
struct class_thrust__system__cuda__detail__bulk___parallel_group {
struct class_thrust__system__cuda__detail__bulk___detail__group_detail__group_base f0;
};
struct class_thrust__system__cuda__detail__bulk___detail__task_base {
struct class_thrust__system__cuda__detail__bulk___detail__closure f0;
struct class_thrust__system__cuda__detail__bulk___parallel_group f1;
};
struct class_thrust__system__cuda__detail__bulk___detail__cuda_task {
struct class_thrust__system__cuda__detail__bulk___detail__task_base f0;
int f1;
char f2[4];
};
struct class_thrust__system__cuda__detail__bulk___agent_nopointers {
int f0;
};
struct class_thrust__system__cuda__detail__bulk___detail__group_detail__group_base_37_nopointers {
struct class_thrust__system__cuda__detail__bulk___agent_nopointers f0;
int f1;
int f2;
};
struct class_thrust__system__cuda__detail__bulk___parallel_group_36_nopointers {
struct class_thrust__system__cuda__detail__bulk___detail__group_detail__group_base_37_nopointers f0;
};
struct class_thrust__system__cuda__detail__bulk___concurrent_group_nopointers {
struct class_thrust__system__cuda__detail__bulk___parallel_group_36_nopointers f0;
int f1;
};
struct class_thrust__system__cuda__detail__bulk___detail__group_detail__group_base_nopointers {
struct class_thrust__system__cuda__detail__bulk___concurrent_group_nopointers f0;
int f1;
int f2;
};
struct class_thrust__system__cuda__detail__bulk___parallel_group_nopointers {
struct class_thrust__system__cuda__detail__bulk___detail__group_detail__group_base_nopointers f0;
};
struct class_thrust__system__cuda__detail__bulk___detail__task_base_nopointers {
struct class_thrust__system__cuda__detail__bulk___detail__closure_nopointers f0;
struct class_thrust__system__cuda__detail__bulk___parallel_group_nopointers f1;
};
struct class_thrust__system__cuda__detail__bulk___detail__cuda_task_nopointers {
struct class_thrust__system__cuda__detail__bulk___detail__task_base_nopointers f0;
int f1;
char f2[4];
};
kernel void _ZN6thrust6system4cuda6detail5bulk_6detail15launch_by_valueILj0ENS4_9cuda_taskINS3_14parallel_groupINS3_16concurrent_groupINS3_5agentILm1EEELm0EEELm0EEENS4_7closureINS2_17for_each_n_detail15for_each_kernelENS_5tupleINS4_6cursorILj0EEENS_10device_ptrIiEENS_6detail16wrapped_functionINSL_23device_generate_functorINSL_12fill_functorIiEEEEvEEjNS_9null_typeESS_SS_SS_SS_SS_EEEEEEEEvT0_(global char* clmem0, unsigned long clmem_vmem_offset0, long v8_nopointers_offset, long v8_ptr0_offset, local int *scratch);
kernel void _ZN6thrust6system4cuda6detail5bulk_6detail15launch_by_valueILj0ENS4_9cuda_taskINS3_14parallel_groupINS3_16concurrent_groupINS3_5agentILm1EEELm0EEELm0EEENS4_7closureINS2_17for_each_n_detail15for_each_kernelENS_5tupleINS4_6cursorILj0EEENS_10device_ptrIiEENS_6detail16wrapped_functionINSL_23device_generate_functorINSL_12fill_functorIiEEEEvEEjNS_9null_typeESS_SS_SS_SS_SS_EEEEEEEEvT0_(global char* clmem0, unsigned long clmem_vmem_offset0, long v8_nopointers_offset, long v8_ptr0_offset, local int *scratch) {
global int* v8_ptr0 = (global int*)(clmem0 + v8_ptr0_offset);
global struct class_thrust__system__cuda__detail__bulk___detail__cuda_task_nopointers* v8_nopointers = (global struct class_thrust__system__cuda__detail__bulk___detail__cuda_task_nopointers*)(clmem0 + v8_nopointers_offset);
struct class_thrust__system__cuda__detail__bulk___detail__cuda_task v8[1];
v8[0].f0.f0.f0.f0 = v8_nopointers[0].f0.f0.f0.f0;
v8[0].f0.f0.f1.f0.f0.f0.f0.f0.f0 = v8_nopointers[0].f0.f0.f1.f0.f0.f0.f0.f0.f0;
v8[0].f0.f0.f1.f0.f1.f0.f0.f0.f0 = 0;
v8[0].f0.f0.f1.f0.f1.f1.f0.f0.f0.f0 = v8_nopointers[0].f0.f0.f1.f0.f1.f1.f0.f0.f0.f0;
v8[0].f0.f0.f1.f0.f1.f1.f1.f0 = v8_nopointers[0].f0.f0.f1.f0.f1.f1.f1.f0;
v8[0].f0.f1.f0.f0.f0.f0.f0.f0 = v8_nopointers[0].f0.f1.f0.f0.f0.f0.f0.f0;
v8[0].f0.f1.f0.f0.f0.f0.f1 = v8_nopointers[0].f0.f1.f0.f0.f0.f0.f1;
v8[0].f0.f1.f0.f0.f0.f0.f2 = v8_nopointers[0].f0.f1.f0.f0.f0.f0.f2;
v8[0].f0.f1.f0.f0.f1 = v8_nopointers[0].f0.f1.f0.f0.f1;
v8[0].f0.f1.f0.f1 = v8_nopointers[0].f0.f1.f0.f1;
v8[0].f0.f1.f0.f2 = v8_nopointers[0].f0.f1.f0.f2;
v8[0].f1 = v8_nopointers[0].f1;
v8[0].f2[0] = v8_nopointers[0].f2[0];
v8[0].f2[1] = v8_nopointers[0].f2[1];
v8[0].f2[2] = v8_nopointers[0].f2[2];
v8[0].f2[3] = v8_nopointers[0].f2[3];
v8[0].f0.f0.f1.f0.f1.f0.f0.f0.f0 = v8_ptr0;
const struct GlobalVars globalVars = { scratch, clmem0, clmem_vmem_offset0 };
const struct GlobalVars* const pGlobalVars = &globalVars;
global int* v56;
global int* v58;
global int* v60;
global int* v64;
int v11;
int v12;
int v13;
int v15;
int v16;
int v21;
int v48;
int v50;
int v51;
int v53;
int v61;
int v63;
local int _ZN6thrust6system4cuda6detail5bulk_6detail20s_data_segment_beginE[0];
long v22;
v1:;
v11 = (&(v8[0].f0.f1.f0.f1))[0];
v12 = get_local_size(0);
v13 = get_local_id(0);
v15 = (&(v8[0].f1))[0];
v16 = get_group_id(0);
if ((v13) == (0)) {
goto v2;
} else {
goto v3;
}
v2:;
v21 = (&(v8[0].f0.f1.f0.f0.f1))[0];
v22 = (long)v21;
((local int*)_ZN6thrust6system4cuda6detail5bulk_6detail12_GLOBAL__N_119s_on_chip_allocatorE)[0] = 0;
((local char**)(&(_ZN6thrust6system4cuda6detail5bulk_6detail12_GLOBAL__N_119s_on_chip_allocatorE[0].f0.f0[8])))[0] = (local char*)_ZN6thrust6system4cuda6detail5bulk_6detail20s_data_segment_beginE;
((local long*)(&(_ZN6thrust6system4cuda6detail5bulk_6detail12_GLOBAL__N_119s_on_chip_allocatorE[0].f0.f0[16])))[0] = v22;
goto v3;
v3:;
barrier(CLK_LOCAL_MEM_FENCE);
v48 = (&(v8[0].f0.f0.f1.f0.f1.f1.f0.f0.f0.f0))[0];
v50 = (&(v8[0].f0.f0.f1.f0.f1.f1.f1.f0))[0];
v51 = v12 * v11;
v53 = ((v16 + v15) * v12) + v13;
if (v53 < v50) {
goto v4;
} else {
goto v7;
}
v4:;
v56 = (&(v8[0].f0.f0.f1.f0.f1.f0.f0.f0.f0))[0];
v58 = (&(v56[(long)v53]));
v60 = v58;
v61 = v53;
goto v5;
v5:;
v60[0] = v48;
v63 = v61 + v51;
v64 = (&(v60[(long)v51]));
if (v63 < v50) {
v60 = v64;
v61 = v63;
goto v5;
} else {
goto v6;
}
v6:;
goto v7;
v7:;
return;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment