Created
June 12, 2017 20:20
-
-
Save singam-sanjay/27be61a184322c696203c74032c30a1d to your computer and use it in GitHub Desktop.
Changes to mainly PPCGCodeGeneration.cpp that highlight/output where it fails to optimize the code.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/lib/CodeGen/PPCGCodeGeneration.cpp b/lib/CodeGen/PPCGCodeGeneration.cpp | |
index 37f7dae..1f6cace 100644 | |
--- a/lib/CodeGen/PPCGCodeGeneration.cpp | |
+++ b/lib/CodeGen/PPCGCodeGeneration.cpp | |
@@ -1419,7 +1419,7 @@ void GPUNodeBuilder::createKernel(__isl_take isl_ast_node *KernelStmt) { | |
Builder.SetInsertPoint(&HostInsertPoint); | |
Value *Parameters = createLaunchParameters(Kernel, F, SubtreeValues); | |
- std::string Name = "kernel_" + std::to_string(Kernel->id); | |
+ std::string Name = S.getFunction().getName().str() + "_kernel_" + std::to_string(Kernel->id); | |
Value *KernelString = Builder.CreateGlobalStringPtr(ASMString, Name); | |
Value *NameString = Builder.CreateGlobalStringPtr(Name, Name + "_name"); | |
Value *GPUKernel = createCallGetKernel(KernelString, NameString); | |
@@ -1460,8 +1460,7 @@ Function * | |
GPUNodeBuilder::createKernelFunctionDecl(ppcg_kernel *Kernel, | |
SetVector<Value *> &SubtreeValues) { | |
std::vector<Type *> Args; | |
- std::string Identifier = "kernel_" + std::to_string(Kernel->id); | |
- | |
+ std::string Identifier = S.getFunction().getName().str() + "_kernel_" + std::to_string(Kernel->id); | |
for (long i = 0; i < Prog->n_array; i++) { | |
if (!ppcg_kernel_requires_array_argument(Kernel, i)) | |
continue; | |
@@ -1666,8 +1665,10 @@ void GPUNodeBuilder::finalizeKernelArguments(ppcg_kernel *Kernel) { | |
/// code might be incorrect, if we only store at the end of the kernel. | |
/// To support this case we need to store these scalars back at each | |
/// memory store or at least before each kernel barrier. | |
- if (Kernel->n_block != 0 || Kernel->n_grid != 0) | |
+ if (Kernel->n_block != 0 || Kernel->n_grid != 0) { | |
BuildSuccessful = 0; | |
+ llvm::errs() << ":( StoredScalar problem.\n"; | |
+ } | |
} | |
void GPUNodeBuilder::createKernelVariables(ppcg_kernel *Kernel, Function *FN) { | |
@@ -1810,10 +1811,12 @@ std::string GPUNodeBuilder::createKernelASM() { | |
} | |
std::string GPUNodeBuilder::finalizeKernelFunction() { | |
- if (verifyModule(*GPUModule)) { | |
+ llvm::errs() << GPUModule->getName() << '\n'; | |
+ if (verifyModule(*GPUModule, &(llvm::errs()))) { | |
BuildSuccessful = false; | |
return ""; | |
} | |
+ llvm::errs() << ":) Verified OK.\n"; | |
if (DumpKernelIR) | |
outs() << *GPUModule << "\n"; | |
@@ -2662,14 +2665,20 @@ public: | |
/// In case a sequential kernel has more surrounding loops as any parallel | |
/// kernel, the SCoP is probably mostly sequential. Hence, there is no | |
/// point in running it on a GPU. | |
- if (NodeBuilder.DeepestSequential > NodeBuilder.DeepestParallel) | |
+ if (NodeBuilder.DeepestSequential > NodeBuilder.DeepestParallel) { | |
SplitBlock->getTerminator()->setOperand(0, Builder.getFalse()); | |
+ llvm::errs() << ":( Cost ineffective.\n"; | |
+ } | |
- if (!NodeBuilder.BuildSuccessful) | |
+ if (!NodeBuilder.BuildSuccessful) { | |
SplitBlock->getTerminator()->setOperand(0, Builder.getFalse()); | |
+ llvm::errs() << ":( Build unsuccessful\n"; | |
+ } | |
} | |
bool runOnScop(Scop &CurrentScop) override { | |
+ llvm::errs() << "PPCG got " << CurrentScop.getName() << '\n'; | |
+ | |
S = &CurrentScop; | |
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); | |
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); | |
@@ -2678,8 +2687,10 @@ public: | |
RI = &getAnalysis<RegionInfoPass>().getRegionInfo(); | |
// We currently do not support scops with invariant loads. | |
- if (S->hasInvariantAccesses()) | |
+ if (S->hasInvariantAccesses()) { | |
+ llvm::errs() << ":( Has Invariant accesses.\n"; | |
return false; | |
+ } | |
auto PPCGScop = createPPCGScop(); | |
auto PPCGProg = createPPCGProg(PPCGScop); | |
@@ -2687,6 +2698,8 @@ public: | |
if (PPCGGen->tree) | |
generateCode(isl_ast_node_copy(PPCGGen->tree), PPCGProg); | |
+ else | |
+ llvm::errs() << ":( PPCGGen->tree == NULL.\n"; | |
freeOptions(PPCGScop); | |
freePPCGGen(PPCGGen); | |
diff --git a/tools/GPURuntime/GPUJIT.c b/tools/GPURuntime/GPUJIT.c | |
index 99a726e..7b21e42 100644 | |
--- a/tools/GPURuntime/GPUJIT.c | |
+++ b/tools/GPURuntime/GPUJIT.c | |
@@ -1098,7 +1098,9 @@ static int initialDeviceAPIsCUDA() { | |
#pragma GCC diagnostic pop | |
static PollyGPUContext *initContextCUDA() { | |
+ //DebugMode = 1; | |
dump_function(); | |
+ //DebugMode = 0; | |
PollyGPUContext *Context; | |
CUdevice Device; | |
@@ -1261,7 +1263,7 @@ static PollyGPUFunction *getKernelCUDA(const char *BinaryBuffer, | |
((CUDAKernel *)Function->Kernel)->CudaModule, | |
KernelName); | |
if (Res != CUDA_SUCCESS) { | |
- fprintf(stderr, "Loading kernel function failed.\n"); | |
+ fprintf(stderr, "Loading kernel function failed :: %i\n", Res); | |
exit(-1); | |
} | |
@@ -1339,16 +1341,16 @@ static void freeDeviceMemoryCUDA(PollyGPUDevicePtr *Allocation) { | |
} | |
static PollyGPUDevicePtr *allocateMemoryForDeviceCUDA(long MemSize) { | |
- dump_function(); | |
+ dump_function();//fprintf(stderr, "MemSize = %li\n", MemSize); | |
PollyGPUDevicePtr *DevData = malloc(sizeof(PollyGPUDevicePtr)); | |
if (DevData == 0) { | |
- fprintf(stderr, "Allocate memory for GPU device memory pointer failed.\n"); | |
+ fprintf(stderr, "%i : Allocate memory for GPU device memory pointer failed.\n", __LINE__); | |
exit(-1); | |
} | |
DevData->DevicePtr = (CUDADevicePtr *)malloc(sizeof(CUDADevicePtr)); | |
if (DevData->DevicePtr == 0) { | |
- fprintf(stderr, "Allocate memory for GPU device memory pointer failed.\n"); | |
+ fprintf(stderr, "%i : Allocate memory for GPU device memory pointer failed.\n", __LINE__); | |
exit(-1); | |
} | |
@@ -1356,7 +1358,7 @@ static PollyGPUDevicePtr *allocateMemoryForDeviceCUDA(long MemSize) { | |
CuMemAllocFcnPtr(&(((CUDADevicePtr *)DevData->DevicePtr)->Cuda), MemSize); | |
if (Res != CUDA_SUCCESS) { | |
- fprintf(stderr, "Allocate memory for GPU device memory pointer failed.\n"); | |
+ fprintf(stderr, "%i : Allocate memory for GPU device memory pointer failed.\n Reason: %i\n", __LINE__, Res); | |
exit(-1); | |
} | |
@@ -1390,7 +1392,7 @@ static void freeContextCUDA(PollyGPUContext *Context) { | |
/******************************************************************************/ | |
PollyGPUContext *polly_initContext() { | |
- DebugMode = getenv("POLLY_DEBUG") != 0; | |
+ DebugMode = getenv("POLLY_DEBUG") != 0;//1; | |
CacheMode = getenv("POLLY_NOCACHE") == 0; | |
dump_function(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment