singam-sanjay/polly_flag_all_PPCGCodeGen_failures.diff

## polly_flag_all_PPCGCodeGen_failures.diff
diff --git a/lib/CodeGen/PPCGCodeGeneration.cpp b/lib/CodeGen/PPCGCodeGeneration.cpp
index 37f7dae..1f6cace 100644
--- a/lib/CodeGen/PPCGCodeGeneration.cpp
+++ b/lib/CodeGen/PPCGCodeGeneration.cpp
@@ -1419,7 +1419,7 @@ void GPUNodeBuilder::createKernel(__isl_take isl_ast_node *KernelStmt) {
   Builder.SetInsertPoint(&HostInsertPoint);
   Value *Parameters = createLaunchParameters(Kernel, F, SubtreeValues);

-  std::string Name = "kernel_" + std::to_string(Kernel->id);
+  std::string Name = S.getFunction().getName().str() + "_kernel_" + std::to_string(Kernel->id);
   Value *KernelString = Builder.CreateGlobalStringPtr(ASMString, Name);
   Value *NameString = Builder.CreateGlobalStringPtr(Name, Name + "_name");
   Value *GPUKernel = createCallGetKernel(KernelString, NameString);
@@ -1460,8 +1460,7 @@ Function *
 GPUNodeBuilder::createKernelFunctionDecl(ppcg_kernel *Kernel,
                                          SetVector<Value *> &SubtreeValues) {
   std::vector<Type *> Args;
-  std::string Identifier = "kernel_" + std::to_string(Kernel->id);
-
+  std::string Identifier = S.getFunction().getName().str() + "_kernel_" + std::to_string(Kernel->id);
   for (long i = 0; i < Prog->n_array; i++) {
     if (!ppcg_kernel_requires_array_argument(Kernel, i))
       continue;
@@ -1666,8 +1665,10 @@ void GPUNodeBuilder::finalizeKernelArguments(ppcg_kernel *Kernel) {
     /// code might be incorrect, if we only store at the end of the kernel.
     /// To support this case we need to store these scalars back at each
     /// memory store or at least before each kernel barrier.
-    if (Kernel->n_block != 0 || Kernel->n_grid != 0)
+    if (Kernel->n_block != 0 || Kernel->n_grid != 0) {
       BuildSuccessful = 0;
+      llvm::errs() << ":( StoredScalar problem.\n";
+    }
 }

 void GPUNodeBuilder::createKernelVariables(ppcg_kernel *Kernel, Function *FN) {
@@ -1810,10 +1811,12 @@ std::string GPUNodeBuilder::createKernelASM() {
 }

 std::string GPUNodeBuilder::finalizeKernelFunction() {
-  if (verifyModule(*GPUModule)) {
+  llvm::errs() << GPUModule->getName() << '\n';
+  if (verifyModule(*GPUModule, &(llvm::errs()))) {
     BuildSuccessful = false;
     return "";
   }
+  llvm::errs() << ":) Verified OK.\n";

   if (DumpKernelIR)
     outs() << *GPUModule << "\n";
@@ -2662,14 +2665,20 @@ public:
     /// In case a sequential kernel has more surrounding loops as any parallel
     /// kernel, the SCoP is probably mostly sequential. Hence, there is no
     /// point in running it on a GPU.
-    if (NodeBuilder.DeepestSequential > NodeBuilder.DeepestParallel)
+    if (NodeBuilder.DeepestSequential > NodeBuilder.DeepestParallel) {
       SplitBlock->getTerminator()->setOperand(0, Builder.getFalse());
+      llvm::errs() << ":( Cost ineffective.\n";
+    }

-    if (!NodeBuilder.BuildSuccessful)
+    if (!NodeBuilder.BuildSuccessful) {
       SplitBlock->getTerminator()->setOperand(0, Builder.getFalse());
+      llvm::errs() << ":( Build unsuccessful\n";
+    }
   }

   bool runOnScop(Scop &CurrentScop) override {
+    llvm::errs() << "PPCG got " << CurrentScop.getName() << '\n';
+
     S = &CurrentScop;
     LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
     DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
@@ -2678,8 +2687,10 @@ public:
     RI = &getAnalysis<RegionInfoPass>().getRegionInfo();

     // We currently do not support scops with invariant loads.
-    if (S->hasInvariantAccesses())
+    if (S->hasInvariantAccesses()) {
+      llvm::errs() << ":( Has Invariant accesses.\n";
       return false;
+    }

     auto PPCGScop = createPPCGScop();
     auto PPCGProg = createPPCGProg(PPCGScop);
@@ -2687,6 +2698,8 @@ public:

     if (PPCGGen->tree)
       generateCode(isl_ast_node_copy(PPCGGen->tree), PPCGProg);
+    else
+      llvm::errs() << ":( PPCGGen->tree == NULL.\n";

     freeOptions(PPCGScop);
     freePPCGGen(PPCGGen);
diff --git a/tools/GPURuntime/GPUJIT.c b/tools/GPURuntime/GPUJIT.c
index 99a726e..7b21e42 100644
--- a/tools/GPURuntime/GPUJIT.c
+++ b/tools/GPURuntime/GPUJIT.c
@@ -1098,7 +1098,9 @@ static int initialDeviceAPIsCUDA() {
 #pragma GCC diagnostic pop

 static PollyGPUContext *initContextCUDA() {
+  //DebugMode = 1;
   dump_function();
+  //DebugMode = 0;
   PollyGPUContext *Context;
   CUdevice Device;

@@ -1261,7 +1263,7 @@ static PollyGPUFunction *getKernelCUDA(const char *BinaryBuffer,
                                   ((CUDAKernel *)Function->Kernel)->CudaModule,
                                   KernelName);
   if (Res != CUDA_SUCCESS) {
-    fprintf(stderr, "Loading kernel function failed.\n");
+    fprintf(stderr, "Loading kernel function failed :: %i\n", Res);
     exit(-1);
   }

@@ -1339,16 +1341,16 @@ static void freeDeviceMemoryCUDA(PollyGPUDevicePtr *Allocation) {
 }

 static PollyGPUDevicePtr *allocateMemoryForDeviceCUDA(long MemSize) {
-  dump_function();
+  dump_function();//fprintf(stderr, "MemSize = %li\n", MemSize);

   PollyGPUDevicePtr *DevData = malloc(sizeof(PollyGPUDevicePtr));
   if (DevData == 0) {
-    fprintf(stderr, "Allocate memory for GPU device memory pointer failed.\n");
+    fprintf(stderr, "%i : Allocate memory for GPU device memory pointer failed.\n", __LINE__);
     exit(-1);
   }
   DevData->DevicePtr = (CUDADevicePtr *)malloc(sizeof(CUDADevicePtr));
   if (DevData->DevicePtr == 0) {
-    fprintf(stderr, "Allocate memory for GPU device memory pointer failed.\n");
+    fprintf(stderr, "%i : Allocate memory for GPU device memory pointer failed.\n", __LINE__);
     exit(-1);
   }

@@ -1356,7 +1358,7 @@ static PollyGPUDevicePtr *allocateMemoryForDeviceCUDA(long MemSize) {
       CuMemAllocFcnPtr(&(((CUDADevicePtr *)DevData->DevicePtr)->Cuda), MemSize);

   if (Res != CUDA_SUCCESS) {
-    fprintf(stderr, "Allocate memory for GPU device memory pointer failed.\n");
+    fprintf(stderr, "%i : Allocate memory for GPU device memory pointer failed.\n Reason: %i\n", __LINE__, Res);
     exit(-1);
   }

@@ -1390,7 +1392,7 @@ static void freeContextCUDA(PollyGPUContext *Context) {
 /******************************************************************************/

 PollyGPUContext *polly_initContext() {
-  DebugMode = getenv("POLLY_DEBUG") != 0;
+  DebugMode = getenv("POLLY_DEBUG") != 0;//1;
   CacheMode = getenv("POLLY_NOCACHE") == 0;

   dump_function();
	diff --git a/lib/CodeGen/PPCGCodeGeneration.cpp b/lib/CodeGen/PPCGCodeGeneration.cpp
	index 37f7dae..1f6cace 100644
	--- a/lib/CodeGen/PPCGCodeGeneration.cpp
	+++ b/lib/CodeGen/PPCGCodeGeneration.cpp
	@@ -1419,7 +1419,7 @@ void GPUNodeBuilder::createKernel(__isl_take isl_ast_node *KernelStmt) {
	Builder.SetInsertPoint(&HostInsertPoint);
	Value *Parameters = createLaunchParameters(Kernel, F, SubtreeValues);

	- std::string Name = "kernel_" + std::to_string(Kernel->id);
	+ std::string Name = S.getFunction().getName().str() + "_kernel_" + std::to_string(Kernel->id);
	Value *KernelString = Builder.CreateGlobalStringPtr(ASMString, Name);
	Value *NameString = Builder.CreateGlobalStringPtr(Name, Name + "_name");
	Value *GPUKernel = createCallGetKernel(KernelString, NameString);
	@@ -1460,8 +1460,7 @@ Function *
	GPUNodeBuilder::createKernelFunctionDecl(ppcg_kernel *Kernel,
	SetVector<Value *> &SubtreeValues) {
	std::vector<Type *> Args;
	- std::string Identifier = "kernel_" + std::to_string(Kernel->id);
	-
	+ std::string Identifier = S.getFunction().getName().str() + "_kernel_" + std::to_string(Kernel->id);
	for (long i = 0; i < Prog->n_array; i++) {
	if (!ppcg_kernel_requires_array_argument(Kernel, i))
	continue;
	@@ -1666,8 +1665,10 @@ void GPUNodeBuilder::finalizeKernelArguments(ppcg_kernel *Kernel) {
	/// code might be incorrect, if we only store at the end of the kernel.
	/// To support this case we need to store these scalars back at each
	/// memory store or at least before each kernel barrier.
	- if (Kernel->n_block != 0 \|\| Kernel->n_grid != 0)
	+ if (Kernel->n_block != 0 \|\| Kernel->n_grid != 0) {
	BuildSuccessful = 0;
	+ llvm::errs() << ":( StoredScalar problem.\n";
	+ }
	}

	void GPUNodeBuilder::createKernelVariables(ppcg_kernel Kernel, Function FN) {
	@@ -1810,10 +1811,12 @@ std::string GPUNodeBuilder::createKernelASM() {
	}

	std::string GPUNodeBuilder::finalizeKernelFunction() {
	- if (verifyModule(*GPUModule)) {
	+ llvm::errs() << GPUModule->getName() << '\n';
	+ if (verifyModule(*GPUModule, &(llvm::errs()))) {
	BuildSuccessful = false;
	return "";
	}
	+ llvm::errs() << ":) Verified OK.\n";

	if (DumpKernelIR)
	outs() << *GPUModule << "\n";
	@@ -2662,14 +2665,20 @@ public:
	/// In case a sequential kernel has more surrounding loops as any parallel
	/// kernel, the SCoP is probably mostly sequential. Hence, there is no
	/// point in running it on a GPU.
	- if (NodeBuilder.DeepestSequential > NodeBuilder.DeepestParallel)
	+ if (NodeBuilder.DeepestSequential > NodeBuilder.DeepestParallel) {
	SplitBlock->getTerminator()->setOperand(0, Builder.getFalse());
	+ llvm::errs() << ":( Cost ineffective.\n";
	+ }

	- if (!NodeBuilder.BuildSuccessful)
	+ if (!NodeBuilder.BuildSuccessful) {
	SplitBlock->getTerminator()->setOperand(0, Builder.getFalse());
	+ llvm::errs() << ":( Build unsuccessful\n";
	+ }
	}

	bool runOnScop(Scop &CurrentScop) override {
	+ llvm::errs() << "PPCG got " << CurrentScop.getName() << '\n';
	+
	S = &CurrentScop;
	LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
	DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
	@@ -2678,8 +2687,10 @@ public:
	RI = &getAnalysis<RegionInfoPass>().getRegionInfo();

	// We currently do not support scops with invariant loads.
	- if (S->hasInvariantAccesses())
	+ if (S->hasInvariantAccesses()) {
	+ llvm::errs() << ":( Has Invariant accesses.\n";
	return false;
	+ }

	auto PPCGScop = createPPCGScop();
	auto PPCGProg = createPPCGProg(PPCGScop);
	@@ -2687,6 +2698,8 @@ public:

	if (PPCGGen->tree)
	generateCode(isl_ast_node_copy(PPCGGen->tree), PPCGProg);
	+ else
	+ llvm::errs() << ":( PPCGGen->tree == NULL.\n";

	freeOptions(PPCGScop);
	freePPCGGen(PPCGGen);
	diff --git a/tools/GPURuntime/GPUJIT.c b/tools/GPURuntime/GPUJIT.c
	index 99a726e..7b21e42 100644
	--- a/tools/GPURuntime/GPUJIT.c
	+++ b/tools/GPURuntime/GPUJIT.c
	@@ -1098,7 +1098,9 @@ static int initialDeviceAPIsCUDA() {
	#pragma GCC diagnostic pop

	static PollyGPUContext *initContextCUDA() {
	+ //DebugMode = 1;
	dump_function();
	+ //DebugMode = 0;
	PollyGPUContext *Context;
	CUdevice Device;

	@@ -1261,7 +1263,7 @@ static PollyGPUFunction getKernelCUDA(const char BinaryBuffer,
	((CUDAKernel *)Function->Kernel)->CudaModule,
	KernelName);
	if (Res != CUDA_SUCCESS) {
	- fprintf(stderr, "Loading kernel function failed.\n");
	+ fprintf(stderr, "Loading kernel function failed :: %i\n", Res);
	exit(-1);
	}

	@@ -1339,16 +1341,16 @@ static void freeDeviceMemoryCUDA(PollyGPUDevicePtr *Allocation) {
	}

	static PollyGPUDevicePtr *allocateMemoryForDeviceCUDA(long MemSize) {
	- dump_function();
	+ dump_function();//fprintf(stderr, "MemSize = %li\n", MemSize);

	PollyGPUDevicePtr *DevData = malloc(sizeof(PollyGPUDevicePtr));
	if (DevData == 0) {
	- fprintf(stderr, "Allocate memory for GPU device memory pointer failed.\n");
	+ fprintf(stderr, "%i : Allocate memory for GPU device memory pointer failed.\n", __LINE__);
	exit(-1);
	}
	DevData->DevicePtr = (CUDADevicePtr *)malloc(sizeof(CUDADevicePtr));
	if (DevData->DevicePtr == 0) {
	- fprintf(stderr, "Allocate memory for GPU device memory pointer failed.\n");
	+ fprintf(stderr, "%i : Allocate memory for GPU device memory pointer failed.\n", __LINE__);
	exit(-1);
	}

	@@ -1356,7 +1358,7 @@ static PollyGPUDevicePtr *allocateMemoryForDeviceCUDA(long MemSize) {
	CuMemAllocFcnPtr(&(((CUDADevicePtr *)DevData->DevicePtr)->Cuda), MemSize);

	if (Res != CUDA_SUCCESS) {
	- fprintf(stderr, "Allocate memory for GPU device memory pointer failed.\n");
	+ fprintf(stderr, "%i : Allocate memory for GPU device memory pointer failed.\n Reason: %i\n", __LINE__, Res);
	exit(-1);
	}

	@@ -1390,7 +1392,7 @@ static void freeContextCUDA(PollyGPUContext *Context) {
	/******************************************************************************/

	PollyGPUContext *polly_initContext() {
	- DebugMode = getenv("POLLY_DEBUG") != 0;
	+ DebugMode = getenv("POLLY_DEBUG") != 0;//1;
	CacheMode = getenv("POLLY_NOCACHE") == 0;

	dump_function();