Skip to content

Instantly share code, notes, and snippets.

View jdoerfert's full-sized avatar

Johannes Doerfert jdoerfert

  • Lawrence Livermore National Laboratory
  • Bay Area, CA, USA
  • X @llvm_dev
View GitHub Profile
; __CLANG_OFFLOAD_BUNDLE____START__ openmp-nvptx64-nvida-cuda
; ModuleID = 'test.c'
source_filename = "test.c"
target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64"
target triple = "nvptx64-nvida-cuda"
%class.omptarget_nvptx_ThreadPrivateContext = type { %class.omptarget_nvptx_TeamDescr, [1024 x %class.omptarget_nvptx_TaskDescr], [1024 x %class.omptarget_nvptx_TaskDescr*], %union.anon, [1024 x i32], [1024 x i64], [1024 x i64], [1024 x i64], [1024 x i64], i64, [8 x i8] }
%class.omptarget_nvptx_TeamDescr = type { %class.omptarget_nvptx_TaskDescr, %class.omptarget_nvptx_WorkDescr, i64, [8 x i8], [32 x %struct.__kmpc_data_sharing_worker_slot_static], [1 x %struct.__kmpc_data_sharing_master_slot_static] }
%class.omptarget_nvptx_TaskDescr = type { %"struct.omptarget_nvptx_TaskDescr::SavedLoopDescr_items", %"struct.omptarget_nvptx_TaskDescr::TaskDescr_items", %class.omptarget_nvptx_TaskDescr* }
%"struct.omptarget_nvptx_TaskDescr::SavedLoopDescr_items" = type { i64, i64, i64, i64, i32 }
#include <stdio.h>
int main() {
for (int i = 0; i < 1000; ++i) {
int Count = 0;
#pragma omp target parallel for reduction(+: Count) schedule(dynamic, 2) num_threads(64)
for (int J = 0; J < 1000; ++J) {
Count += J;
}
CHANGED: adce NumBranchesRemoved 560 -> 538 ( -3.929%)
CHANGED: argpromotion NumAggregatesPromoted 1446 -> 16305 ( +1027.593%)
CHANGED: argpromotion NumArgumentsDead 10 -> 8073 (+80630.000%)
CHANGED: argpromotion NumArgumentsPromoted 60 -> 1785 ( +2875.000%)
CHANGED: argpromotion NumByValArgsPromoted 14 -> 27 ( +92.857%)
CHANGED: attributor NumAttributesManifested 30076 -> 133400 ( +343.543%)
CHANGED: attributor NumAttributesValidFixpoint 60246 -> 210087 ( +248.715%)
ADDED: attributor NumAttrsRequiredDeepWrapper n/a -> 22256
CHANGED: attributor NumFnArgumentNoCapture 29011 -> 111490 ( +284.303%)
CHANGED: attributor N
CHANGED: SLP NumVectorInstructions 28566 -> 30799 ( +7.817%)
CHANGED: adce NumBranchesRemoved 560 -> 1512 ( +170.000%)
CHANGED: adce NumRemoved 2198 -> 9096 ( +313.831%)
CHANGED: argpromotion NumAggregatesPromoted 1446 -> 16870 ( +1066.667%)
CHANGED: argpromotion NumArgumentsDead 10 -> 7308 (+72980.000%)
CHANGED: argpromotion NumArgumentsPromoted 60 -> 1810 ( +2916.667%)
CHANGED: argpromotion NumByValArgsPromoted 14 -> 27 ( +92.857%)
CHANGED: asm-printer EmittedInsts 5779646 -> 5717497 ( -1.075%)
CHANGED: assembler EmittedAlignFragments 259544 -> 252360 ( -2.768%)
CHANGED: assembler
// header.h
struct Base {
virtual void foo() {}
};
template<typename A>
struct S : public Base{
virtual void foo();
};
@jdoerfert
jdoerfert / gist:412258b11aa99c9c8eb6835563e71363
Created October 15, 2019 16:34
struct/class definitions and getAAfor calls the issue
struct AANoUnwindImpl : AANoUnwind {
A.getAAFor<AANoUnwind>(*this, IRPosition::callsite_function(ICS));
struct AANoUnwindFunction final : public AANoUnwindImpl {
struct AANoUnwindCallSite final : AANoUnwindImpl {
auto &FnAA = A.getAAFor<AANoUnwind>(*this, FnPos);
class AAReturnedValuesImpl : public AAReturnedValues, public AbstractState {
const auto &RetValAA = A.getAAFor<AAReturnedValues>(
struct AAReturnedValuesFunction final : public AAReturnedValuesImpl {
struct AAReturnedValuesCallSite final : AAReturnedValuesImpl {
# Run grep -E 'class|struct AA|public AA|private AA|getAAFor|AAReturnedFromReturnedValues|AACallSiteReturnedFromReturned|MustBeExecutedContext|AAArgumentFromCallSiteArguments' llvm/lib/Transforms/IPO/Attributor.cpp &>! /tmp/dep_graph_inp
# Filter the first lines untill you hit AANoUnwind and the onies after AAMemoryBehaviorCallSite manually or put two comments in the source like: "// AA HERE"
# Filter the two occurences of getAAFor in front of AANonNull that are otherwise associated with AANoFree
# run the python scrip like:
# python dep_Graph.py /tmp/dep_graph_inp /tmp/dep_graph.dot
import sys
import re
import os
0) hto_coverage_annotate_1/stats/outputDVpD5Z.json.stats vs hto_coverage_plain_0/stats
1) hto_coverage_plain_0/outputlxxkfx.json.stats vs hto_coverage_fastbuild_2/outputrAYVKh.json.stats
CHANGED: adce NumRemoved 9144 ->
@jdoerfert
jdoerfert / poster_out.ll
Last active October 17, 2019 21:54
./bin/opt -attributor --attributor-disable=false poster.ll -S
; ModuleID = 'poster.ll'
source_filename = "poster.ll"
@G = common global i32 0
; Function Attrs: nofree nosync nounwind willreturn writeonly
define internal i32 @local(i32* nocapture nonnull writeonly align 32 dereferenceable(4) %P) #0 {
store i32 41, i32* %P, align 32
br label %end
enum class BindKind {
teams,
parallel,
thread,
};
typedef void (*body_t)(int);
void loop_init_teams(int lb, int ub, body_t body, bool HasInnerLoop) {
if (HasInnerLoop) {