Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save singam-sanjay/27be61a184322c696203c74032c30a1d to your computer and use it in GitHub Desktop.
Save singam-sanjay/27be61a184322c696203c74032c30a1d to your computer and use it in GitHub Desktop.
Changes to mainly PPCGCodeGeneration.cpp that highlight/output where it fails to optimize the code.
diff --git a/lib/CodeGen/PPCGCodeGeneration.cpp b/lib/CodeGen/PPCGCodeGeneration.cpp
index 37f7dae..1f6cace 100644
--- a/lib/CodeGen/PPCGCodeGeneration.cpp
+++ b/lib/CodeGen/PPCGCodeGeneration.cpp
@@ -1419,7 +1419,7 @@ void GPUNodeBuilder::createKernel(__isl_take isl_ast_node *KernelStmt) {
Builder.SetInsertPoint(&HostInsertPoint);
Value *Parameters = createLaunchParameters(Kernel, F, SubtreeValues);
- std::string Name = "kernel_" + std::to_string(Kernel->id);
+ std::string Name = S.getFunction().getName().str() + "_kernel_" + std::to_string(Kernel->id);
Value *KernelString = Builder.CreateGlobalStringPtr(ASMString, Name);
Value *NameString = Builder.CreateGlobalStringPtr(Name, Name + "_name");
Value *GPUKernel = createCallGetKernel(KernelString, NameString);
@@ -1460,8 +1460,7 @@ Function *
GPUNodeBuilder::createKernelFunctionDecl(ppcg_kernel *Kernel,
SetVector<Value *> &SubtreeValues) {
std::vector<Type *> Args;
- std::string Identifier = "kernel_" + std::to_string(Kernel->id);
-
+ std::string Identifier = S.getFunction().getName().str() + "_kernel_" + std::to_string(Kernel->id);
for (long i = 0; i < Prog->n_array; i++) {
if (!ppcg_kernel_requires_array_argument(Kernel, i))
continue;
@@ -1666,8 +1665,10 @@ void GPUNodeBuilder::finalizeKernelArguments(ppcg_kernel *Kernel) {
/// code might be incorrect, if we only store at the end of the kernel.
/// To support this case we need to store these scalars back at each
/// memory store or at least before each kernel barrier.
- if (Kernel->n_block != 0 || Kernel->n_grid != 0)
+ if (Kernel->n_block != 0 || Kernel->n_grid != 0) {
BuildSuccessful = 0;
+ llvm::errs() << ":( StoredScalar problem.\n";
+ }
}
void GPUNodeBuilder::createKernelVariables(ppcg_kernel *Kernel, Function *FN) {
@@ -1810,10 +1811,12 @@ std::string GPUNodeBuilder::createKernelASM() {
}
std::string GPUNodeBuilder::finalizeKernelFunction() {
- if (verifyModule(*GPUModule)) {
+ llvm::errs() << GPUModule->getName() << '\n';
+ if (verifyModule(*GPUModule, &(llvm::errs()))) {
BuildSuccessful = false;
return "";
}
+ llvm::errs() << ":) Verified OK.\n";
if (DumpKernelIR)
outs() << *GPUModule << "\n";
@@ -2662,14 +2665,20 @@ public:
/// In case a sequential kernel has more surrounding loops as any parallel
/// kernel, the SCoP is probably mostly sequential. Hence, there is no
/// point in running it on a GPU.
- if (NodeBuilder.DeepestSequential > NodeBuilder.DeepestParallel)
+ if (NodeBuilder.DeepestSequential > NodeBuilder.DeepestParallel) {
SplitBlock->getTerminator()->setOperand(0, Builder.getFalse());
+ llvm::errs() << ":( Cost ineffective.\n";
+ }
- if (!NodeBuilder.BuildSuccessful)
+ if (!NodeBuilder.BuildSuccessful) {
SplitBlock->getTerminator()->setOperand(0, Builder.getFalse());
+ llvm::errs() << ":( Build unsuccessful\n";
+ }
}
bool runOnScop(Scop &CurrentScop) override {
+ llvm::errs() << "PPCG got " << CurrentScop.getName() << '\n';
+
S = &CurrentScop;
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
@@ -2678,8 +2687,10 @@ public:
RI = &getAnalysis<RegionInfoPass>().getRegionInfo();
// We currently do not support scops with invariant loads.
- if (S->hasInvariantAccesses())
+ if (S->hasInvariantAccesses()) {
+ llvm::errs() << ":( Has Invariant accesses.\n";
return false;
+ }
auto PPCGScop = createPPCGScop();
auto PPCGProg = createPPCGProg(PPCGScop);
@@ -2687,6 +2698,8 @@ public:
if (PPCGGen->tree)
generateCode(isl_ast_node_copy(PPCGGen->tree), PPCGProg);
+ else
+ llvm::errs() << ":( PPCGGen->tree == NULL.\n";
freeOptions(PPCGScop);
freePPCGGen(PPCGGen);
diff --git a/tools/GPURuntime/GPUJIT.c b/tools/GPURuntime/GPUJIT.c
index 99a726e..7b21e42 100644
--- a/tools/GPURuntime/GPUJIT.c
+++ b/tools/GPURuntime/GPUJIT.c
@@ -1098,7 +1098,9 @@ static int initialDeviceAPIsCUDA() {
#pragma GCC diagnostic pop
static PollyGPUContext *initContextCUDA() {
+ //DebugMode = 1;
dump_function();
+ //DebugMode = 0;
PollyGPUContext *Context;
CUdevice Device;
@@ -1261,7 +1263,7 @@ static PollyGPUFunction *getKernelCUDA(const char *BinaryBuffer,
((CUDAKernel *)Function->Kernel)->CudaModule,
KernelName);
if (Res != CUDA_SUCCESS) {
- fprintf(stderr, "Loading kernel function failed.\n");
+ fprintf(stderr, "Loading kernel function failed :: %i\n", Res);
exit(-1);
}
@@ -1339,16 +1341,16 @@ static void freeDeviceMemoryCUDA(PollyGPUDevicePtr *Allocation) {
}
static PollyGPUDevicePtr *allocateMemoryForDeviceCUDA(long MemSize) {
- dump_function();
+ dump_function();//fprintf(stderr, "MemSize = %li\n", MemSize);
PollyGPUDevicePtr *DevData = malloc(sizeof(PollyGPUDevicePtr));
if (DevData == 0) {
- fprintf(stderr, "Allocate memory for GPU device memory pointer failed.\n");
+ fprintf(stderr, "%i : Allocate memory for GPU device memory pointer failed.\n", __LINE__);
exit(-1);
}
DevData->DevicePtr = (CUDADevicePtr *)malloc(sizeof(CUDADevicePtr));
if (DevData->DevicePtr == 0) {
- fprintf(stderr, "Allocate memory for GPU device memory pointer failed.\n");
+ fprintf(stderr, "%i : Allocate memory for GPU device memory pointer failed.\n", __LINE__);
exit(-1);
}
@@ -1356,7 +1358,7 @@ static PollyGPUDevicePtr *allocateMemoryForDeviceCUDA(long MemSize) {
CuMemAllocFcnPtr(&(((CUDADevicePtr *)DevData->DevicePtr)->Cuda), MemSize);
if (Res != CUDA_SUCCESS) {
- fprintf(stderr, "Allocate memory for GPU device memory pointer failed.\n");
+ fprintf(stderr, "%i : Allocate memory for GPU device memory pointer failed.\n Reason: %i\n", __LINE__, Res);
exit(-1);
}
@@ -1390,7 +1392,7 @@ static void freeContextCUDA(PollyGPUContext *Context) {
/******************************************************************************/
PollyGPUContext *polly_initContext() {
- DebugMode = getenv("POLLY_DEBUG") != 0;
+ DebugMode = getenv("POLLY_DEBUG") != 0;//1;
CacheMode = getenv("POLLY_NOCACHE") == 0;
dump_function();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment