This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
; https://alive2.llvm.org/ce/z/bKDTJd | |
; https://godbolt.org/z/v1qYrb5Ka | |
define float @src(float %0) { | |
entry: | |
%1 = fmul float %0, 0.000000e+00 | |
%2 = fmul float %1, 3.000000e+00 | |
ret float %2 | |
} | |
define float @tgt(float %0) { |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
SRCS = $(shell ls out_*.c) | |
OPT_PROGS = $(patsubst %.c,%.S,$(SRCS)) | |
asm: $(OPT_PROGS) | |
CFLAGS = -O3 -march=native | |
%.S : %.c | |
gcc $(CFLAGS) -S -o $@ $< |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include<stdio.h> | |
#include<assert.h> | |
#include<stdlib.h> | |
#include<string.h> | |
#include<stdint.h> | |
#include<algorithm> | |
#include "mpi.h" | |
#include "accl_util.h" | |
#include <cooperative_groups.h> | |
namespace cg = cooperative_groups; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include<stdio.h> | |
#include<assert.h> | |
#include<stdlib.h> | |
#include<string.h> | |
#include<stdint.h> | |
#include<algorithm> | |
#include "mpi.h" | |
#include "accl_util.h" | |
#include <cooperative_groups.h> | |
namespace cg = cooperative_groups; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import math | |
total_workload = 1024 # byte | |
buffersize = 256 # byte of shared memroy in system | |
dma_cost = 100 # loading one byte requires 100 unit of time | |
compute_cost = 100 # computing one byte requires 100 unit of time | |
# 1 output tile requires 2 input tiles | |
for tilesize in range(1, 256): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
***directRecvCopySend (baseline)*** | |
# size count type time algbw busbw error time algbw busbw error | |
# (B) (elements) (us) (GB/s) (GB/s) (us) (GB/s) (GB/s) | |
1024 32 float 38.90 0.03 0.02 0e+00 39.19 0.03 0.02 0e+00 | |
2048 64 float 39.33 0.05 0.05 0e+00 38.80 0.05 0.05 0e+00 | |
4096 128 float 39.25 0.10 0.09 0e+00 41.12 0.10 0.09 0e+00 | |
8192 256 float 39.28 0.21 0.18 0e+00 39.32 0.21 0.18 0e+00 | |
16384 512 float 40.68 0.40 0.35 0e+00 40.56 0.40 0.35 0e+00 | |
32768 1024 float 42.74 0.77 0.67 0e+00 42.36 0.77 0.68 0e+00 | |
65536 2048 float 45.60 1.44 1.26 0e+00 45.39 1.44 1.26 0e+00 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
case 0: { | |
for (int step = 0 ; step < NUM_STEPS; step ++) { | |
size_t num_srcs = srcoffset_0.c15[step].length; | |
if (threadIdx.x == 0) { | |
bool l; | |
do { | |
l = false; | |
for (size_t t = 0 ; t < num_srcs; t ++) { | |
l ||= (wait_0.b15[step].b[t] && (condvar[srcoffset_0.c15[step].c[t]].x != 1)); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#define NUM_STEPS 15 | |
#include<stdio.h> | |
#include<assert.h> | |
#include<stdlib.h> | |
#include<string.h> | |
#include<stdint.h> | |
#include "mpi.h" | |
#include "accl_util.h" | |
#include "cuda_runtime_api.h" | |
#define CUDACHECK(cmd) do { \ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include<stdio.h> | |
#include<assert.h> | |
#include<stdlib.h> | |
#include<string.h> | |
#include<stdint.h> | |
#include<algorithm> | |
#include "mpi.h" | |
#include "accl_util.h" | |
#include <cooperative_groups.h> | |
namespace cg = cooperative_groups; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#define NUM_STEPS 15 | |
#include<stdio.h> | |
#include<assert.h> | |
#include<stdlib.h> | |
#include<string.h> | |
#include<stdint.h> | |
#include "mpi.h" | |
#include "accl_util.h" | |
#include "cuda_runtime_api.h" | |
#define CUDACHECK(cmd) do { \ |
NewerOlder