Skip to content

Instantly share code, notes, and snippets.

@williewillus
Last active December 10, 2019 09:26
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save williewillus/181d67f747c71ee5bd21264857e16a05 to your computer and use it in GitHub Desktop.
Save williewillus/181d67f747c71ee5bd21264857e16a05 to your computer and use it in GitHub Desktop.
Compilers final project testing

Balloon Program

  • consume.cu is a simple cuda program that allocates a bunch of GPU memory, touches it, and then sleeps infinitely until stopped. Note: If using UVM mode, it will take a while for the kernel touching the memory to complete and for all the data to fully move over to the GPU.

Generating the data

  1. Build, then cd to build/lonestardist
  2. Run test harness: for i in $(seq 1 RUNS); do ctest -R gpu | grep run- | grep Passed > ~/CASE/$i.txt; done
  3. Extract meaningful columns: cd ~/CASE; for i in $(seq 1 RUNS); do sed -r 's/([[:digit:]]+).+Passed (.+) sec/\1 \2/' $i.txt > col_$i.txt; done
  4. Average the runs together, sort: cd ~/CASE; cat col_*.txt | awk -f ~/avg.awk | sort -n > averaged.txt
  5. See report by test number: pr -mt ~/CASE1/averaged.txt ~/CASE2/averaged.txt | awk -f compare.awk
  6. relabel from test id -> test type, average, sort again: cd ~/CASE; awk -f ~/relabel.awk averaged.txt | awk -f ~/avg.awk | sort > byalgo.txt
  7. See report by algorithm: pr -mt ~/CASE1/byalgo.txt ~/CASE2/byalgo.txt | awk -f compare.awk
{
freqs[$1] += $2;
count[$1] += 1;
}
END {
for (test in freqs) {
avg = freqs[test] / count[test];
printf "%s %.2f\n", test, avg;
}
}
{
diff = $4 - $2;
percentage = (diff / $2) * 100;
print $1, percentage;
tot_percentage += percentage;
count++;
if (diff <= 0) {
improved++;
} else {
regressed++;
}
}
END {
print "Improved:", improved;
print "Regressed:", regressed;
print "Average Percentage", tot_percentage/count;
}
#include <atomic>
#include <chrono>
#include <cstdlib>
#include <cstdio>
#include <csignal>
#include <thread>
using std::printf;
using std::atol;
using namespace std::chrono_literals;
static volatile std::sig_atomic_t stop;
__global__ void touch(size_t sz, float *ptr) {
float acc = 0;
for (int i = 0; i < (sz / sizeof(float)); i++) {
acc += ptr[i];
}
ptr[0] = acc;
}
int main(int argc, char* argv[]) {
if (argc < 2) {
printf("%s [MB to allocate] <0/absent = cudaMallocManaged, 1 = cudaMalloc>\n", argv[0]);
return 1;
}
signal(SIGINT, [&](int signal) {
stop = true;
});
size_t sz = atol(argv[1]) * 1024 * 1024;
auto md = argc > 2 ? atol(argv[2]) : 0;
printf("Allocating %u bytes using %s\n", sz, md == 0 ? "cudaMallocmanaged" : "cudaMalloc");
float* ptr = nullptr;
cudaError_t r = md == 0 ? cudaMallocManaged(&ptr, sz) : cudaMalloc(&ptr, sz);
if (r != cudaSuccess) {
printf("Error allocating\n");
}
// simple kernel to touch all the memory gpu side
touch<<<1, 1>>>(sz, ptr);
r = cudaDeviceSynchronize();
if (r != cudaSuccess) {
printf("Error launch and synch\n");
stop = true;
}
while (!stop) {
std::this_thread::sleep_for(1s);
}
cudaFree(ptr);
}
{
if ($1 < 25) {
print "bc", $2
} else if ($1 < 73) {
print "bfs_push", $2
} else if ($1 < 121) {
print "bfs_pull", $2
} else if ($1 < 169) {
print "cc_push", $2
} else if ($1 < 217) {
print "cc_pull", $2
} else if ($1 < 241) {
print "kcore_push", $2
} else if ($1 < 265) {
print "kcore_pull", $2
} else if ($1 < 313) {
print "pagerank_pull", $2
} else if ($1 < 361) {
print "pagerank_push", $2
} else if ($1 < 409) {
print "sssp_push", $2
} else if ($1 < 456) {
print "sssp_pull", $2
} else {
print "ERROR", $2
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment