williewillus/avg.awk

## readme.md

      
    Raw
  

              readme.md
            
          
    Balloon Program


consume.cu is a simple cuda program that allocates a bunch of GPU memory, touches it, and then sleeps infinitely until stopped. Note: If using UVM mode, it will take a while for the kernel touching the memory to complete and for all the data to fully move over to the GPU.

Generating the data


Build, then cd to build/lonestardist
Run test harness:
for i in $(seq 1 RUNS); do ctest -R gpu | grep run- | grep Passed > ~/CASE/$i.txt; done
Extract meaningful columns:
cd ~/CASE; for i in $(seq 1 RUNS); do sed -r 's/([[:digit:]]+).+Passed    (.+) sec/\1 \2/' $i.txt > col_$i.txt; done
Average the runs together, sort:
cd ~/CASE; cat col_*.txt | awk -f ~/avg.awk | sort -n  > averaged.txt
See report by test number:
pr -mt ~/CASE1/averaged.txt ~/CASE2/averaged.txt | awk -f compare.awk
relabel from test id -> test type, average, sort again:
cd ~/CASE; awk -f ~/relabel.awk averaged.txt | awk -f ~/avg.awk | sort > byalgo.txt
See report by algorithm:
pr -mt ~/CASE1/byalgo.txt ~/CASE2/byalgo.txt | awk -f compare.awk


## avg.awk
{
        freqs[$1] += $2;
        count[$1] += 1;
}

END {
        for (test in freqs) {
                avg = freqs[test] / count[test];
                printf "%s %.2f\n", test, avg;
        }
}

## compare.awk
{
        diff = $4 - $2;
        percentage = (diff / $2) * 100;
        print $1, percentage;
        tot_percentage += percentage;
        count++;
        if (diff <= 0) {
                improved++;
        } else {
                regressed++;
        }
}

END {
        print "Improved:", improved;
        print "Regressed:", regressed;
        print "Average Percentage", tot_percentage/count;
}

## consume.cu
#include <atomic>
#include <chrono>
#include <cstdlib>
#include <cstdio>
#include <csignal>
#include <thread>
using std::printf;
using std::atol;
using namespace std::chrono_literals;

static volatile std::sig_atomic_t stop;

__global__ void touch(size_t sz, float *ptr) {
        float acc = 0;
        for (int i = 0; i < (sz / sizeof(float)); i++) {
                acc += ptr[i];
        }
        ptr[0] = acc;
}

int main(int argc, char* argv[]) {
        if (argc < 2) {
                printf("%s [MB to allocate] <0/absent = cudaMallocManaged, 1 = cudaMalloc>\n", argv[0]);
                return 1;
        }
        signal(SIGINT, [&](int signal) {
                stop = true;
        });
        size_t sz = atol(argv[1]) * 1024 * 1024;
        auto md = argc > 2 ? atol(argv[2]) : 0;
        printf("Allocating %u bytes using %s\n", sz, md == 0 ? "cudaMallocmanaged" : "cudaMalloc");
        float* ptr = nullptr;
        cudaError_t r = md == 0 ? cudaMallocManaged(&ptr, sz) : cudaMalloc(&ptr, sz);
        if (r != cudaSuccess) {
                printf("Error allocating\n");
        }

        // simple kernel to touch all the memory gpu side
        touch<<<1, 1>>>(sz, ptr);
        r = cudaDeviceSynchronize();
        if (r != cudaSuccess) {
                printf("Error launch and synch\n");
                stop = true;
        }

        while (!stop) {
                std::this_thread::sleep_for(1s);
        }
        cudaFree(ptr);
}

## relabel.awk
{
        if ($1 < 25) {
                print "bc", $2
        } else if ($1 < 73) {
                print "bfs_push", $2
        } else if ($1 < 121) {
                print "bfs_pull", $2
        } else if ($1 < 169) {
                print "cc_push", $2
        } else if ($1 < 217) {
                print "cc_pull", $2
        } else if ($1 < 241) {
                print "kcore_push", $2
        } else if ($1 < 265) {
                print "kcore_pull", $2
        } else if ($1 < 313) {
                print "pagerank_pull", $2
        } else if ($1 < 361) {
                print "pagerank_push", $2
        } else if ($1 < 409) {
                print "sssp_push", $2
        } else if ($1 < 456) {
                print "sssp_pull", $2
        } else {
                print "ERROR", $2
        }
}
	{
	freqs[$1] += $2;
	count[$1] += 1;
	}

	END {
	for (test in freqs) {
	avg = freqs[test] / count[test];
	printf "%s %.2f\n", test, avg;
	}
	}
	{
	diff = $4 - $2;
	percentage = (diff / $2) * 100;
	print $1, percentage;
	tot_percentage += percentage;
	count++;
	if (diff <= 0) {
	improved++;
	} else {
	regressed++;
	}
	}

	END {
	print "Improved:", improved;
	print "Regressed:", regressed;
	print "Average Percentage", tot_percentage/count;
	}
	#include <atomic>
	#include <chrono>
	#include <cstdlib>
	#include <cstdio>
	#include <csignal>
	#include <thread>
	using std::printf;
	using std::atol;
	using namespace std::chrono_literals;

	static volatile std::sig_atomic_t stop;

	__global__ void touch(size_t sz, float *ptr) {
	float acc = 0;
	for (int i = 0; i < (sz / sizeof(float)); i++) {
	acc += ptr[i];
	}
	ptr[0] = acc;
	}

	int main(int argc, char* argv[]) {
	if (argc < 2) {
	printf("%s [MB to allocate] <0/absent = cudaMallocManaged, 1 = cudaMalloc>\n", argv[0]);
	return 1;
	}
	signal(SIGINT, [&](int signal) {
	stop = true;
	});
	size_t sz = atol(argv[1]) * 1024 * 1024;
	auto md = argc > 2 ? atol(argv[2]) : 0;
	printf("Allocating %u bytes using %s\n", sz, md == 0 ? "cudaMallocmanaged" : "cudaMalloc");
	float* ptr = nullptr;
	cudaError_t r = md == 0 ? cudaMallocManaged(&ptr, sz) : cudaMalloc(&ptr, sz);
	if (r != cudaSuccess) {
	printf("Error allocating\n");
	}

	// simple kernel to touch all the memory gpu side
	touch<<<1, 1>>>(sz, ptr);
	r = cudaDeviceSynchronize();
	if (r != cudaSuccess) {
	printf("Error launch and synch\n");
	stop = true;
	}

	while (!stop) {
	std::this_thread::sleep_for(1s);
	}
	cudaFree(ptr);
	}
	{
	if ($1 < 25) {
	print "bc", $2
	} else if ($1 < 73) {
	print "bfs_push", $2
	} else if ($1 < 121) {
	print "bfs_pull", $2
	} else if ($1 < 169) {
	print "cc_push", $2
	} else if ($1 < 217) {
	print "cc_pull", $2
	} else if ($1 < 241) {
	print "kcore_push", $2
	} else if ($1 < 265) {
	print "kcore_pull", $2
	} else if ($1 < 313) {
	print "pagerank_pull", $2
	} else if ($1 < 361) {
	print "pagerank_push", $2
	} else if ($1 < 409) {
	print "sssp_push", $2
	} else if ($1 < 456) {
	print "sssp_pull", $2
	} else {
	print "ERROR", $2
	}
	}