Skip to content

Instantly share code, notes, and snippets.

@maltempi
Created December 22, 2022 18:37
Show Gist options
  • Save maltempi/09c71c3134c9cfa1bf6c057145eda328 to your computer and use it in GitHub Desktop.
Save maltempi/09c71c3134c9cfa1bf6c057145eda328 to your computer and use it in GitHub Desktop.
cusz+ has memory leak?
#include <dirent.h>
#include <iostream>
#include <math.h>
#include <sstream>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <fstream>
#include <chrono>
#include <float.h>
#include <limits.h>
#include <math.h>
#include "api.hh"
#include "cli/quality_viewer.hh"
#include "cli/timerecord_viewer.hh"
using Compressor = typename cusz::Framework<float>::LorenzoFeaturedCompressor;
//#define MIN(x, y) ((x) < (y) ? (x) : (y))
#define MAX(x, y) ((x) > (y) ? (x) : (y))
typedef struct
{
float *h_uncompressed_data;
float *d_uncompressed_data;
float *h_decompressed_data;
float *d_decompressed_data;
uint8_t *h_compressed_data;
uint8_t *d_compressed_data;
double eb;
int device;
size_t uncompressed_len;
size_t compressed_len;
cusz::Header header;
char *mode;
} Data_t;
void compress(Data_t *data, size_t nx, size_t ny, size_t nz, cudaStream_t stream)
{
size_t uncompressed_alloclen = data->uncompressed_len * 1.03;
// Defining cusz stuff
Compressor *compressor = new Compressor;
cusz::TimeRecord timerecord;
cusz::Context *ctx = new cusz::Context();
ctx->set_len(nx, ny, nz, 1).set_eb(data->eb).set_control_string(data->mode);
ctx->device = data->device;
float *d_uncompressed_copy;
cudaMalloc(&d_uncompressed_copy, sizeof(float) * nx * ny * nz);
cudaMemcpy(d_uncompressed_copy, data->d_uncompressed_data, sizeof(float) * nx * ny * nz, cudaMemcpyHostToDevice);
cusz::Context::adjust_eb(ctx, d_uncompressed_copy);
cusz::core_compress(compressor, ctx, // compressor & config
d_uncompressed_copy, uncompressed_alloclen, // input
data->d_compressed_data, data->compressed_len, data->header, // output
stream, &timerecord);
cudaFree(d_uncompressed_copy);
delete compressor;
}
void decompress(Data_t *data, cudaStream_t stream)
{
auto compressor = new Compressor;
cusz::TimeRecord timerecord;
size_t uncompressed_alloclen = data->uncompressed_len * 1.03;
cudaMalloc(&data->d_uncompressed_data, sizeof(float) * uncompressed_alloclen);
cusz::core_decompress(compressor, &data->header,
data->d_compressed_data, // input
data->compressed_len, // input len
data->d_decompressed_data, // output
uncompressed_alloclen, // output len
stream, &timerecord);
// cusz::TimeRecordViewer::view_decompression(&timerecord, len * sizeof(float));
delete compressor;
}
void readInputDataFromFile(string filepath, float *h_array, size_t len)
{
std::ifstream ifs(filepath.c_str(), std::ios::binary | std::ios::in);
if (not ifs.is_open())
{
std::cerr << "fail to open " << filepath << std::endl;
exit(1);
}
ifs.read(reinterpret_cast<char *>(h_array), std::streamsize(len * sizeof(float)));
ifs.close();
}
void exportData(string path, void *h_data, int data_size, size_t len)
{
auto file = fopen(path.c_str(), "wb");
fwrite(h_data, data_size, len, file);
fclose(file);
}
int main(int argc, char *argv[])
{
double eb = 1e-4;
char *mode = "mode=r2r"; // "abs" or "r2r"
string inputFilepath = "../../hurr-CLOUDf48-500x500x100";
size_t nx = 500;
size_t ny = 500;
size_t nz = 100;
bool printReport = true;
fprintf(stderr, "----------CUSZ------------------\n");
fprintf(stderr, "Parameters\n");
fprintf(stderr, "EB: %lf; Mode: %s\n", eb, mode);
fprintf(stderr, "Input file path %s\n", inputFilepath.c_str());
fprintf(stderr, "Dims: (%li, %li, %li)\n", nx, ny, nz);
fprintf(stderr, "--------------------------------\n");
for (int i = 0; i < 10; i++)
{
fprintf(stderr, "Iteration #%i\n", i);
Data_t _data;
Data_t *data = &_data;
cudaStream_t stream;
cudaStreamCreate(&stream);
size_t len = nx * ny * nz;
data->uncompressed_len = len;
data->eb = eb;
data->mode = mode;
cudaMallocHost(&data->h_uncompressed_data, len * sizeof(float));
readInputDataFromFile(inputFilepath, data->h_uncompressed_data, len);
cudaMalloc(&data->d_uncompressed_data, len * sizeof(float));
cudaMemcpy(data->d_uncompressed_data, data->h_uncompressed_data, len * sizeof(float), cudaMemcpyHostToDevice);
chrono::steady_clock::time_point begin;
chrono::steady_clock::time_point end;
begin = std::chrono::steady_clock::now();
compress(data, nx, ny, nz, stream);
end = std::chrono::steady_clock::now();
fprintf(stderr, "Compression spent time %li[µs]\n", chrono::duration_cast<chrono::microseconds>(end - begin).count());
fprintf(stderr, "Starting decompression\n");
begin = std::chrono::steady_clock::now();
cudaMalloc(&data->d_decompressed_data, len * sizeof(float));
decompress(data, stream);
end = std::chrono::steady_clock::now();
fprintf(stderr, "DEcompression spent time %li[µs]\n", chrono::duration_cast<chrono::microseconds>(end - begin).count());
if (printReport)
{
fprintf(stderr, "Report:\n");
cudaFreeHost(&data->h_decompressed_data);
cudaMallocHost(&data->h_decompressed_data, len * sizeof(float));
cudaMemcpy(data->h_decompressed_data, data->d_decompressed_data, len * sizeof(float), cudaMemcpyHostToDevice);
fprintf(stderr, "CPU Metrics:\n");
cusz::QualityViewer::echo_metric_cpu(data->h_decompressed_data, data->h_uncompressed_data, len, size_t(data->compressed_len), false);
}
cudaFreeHost(&data->h_decompressed_data);
cudaFree(data->d_decompressed_data);
cudaFreeHost(&data->h_uncompressed_data);
cudaFree(data->d_uncompressed_data);
cudaFreeHost(&data->h_compressed_data);
cudaFree(data->d_compressed_data);
cudaStreamDestroy(stream);
fprintf(stderr, "\n----------------------------------------\n\n");
}
}
@maltempi
Copy link
Author

memoryfootprint_cusz_r2r_4gpu_10iter-each_device_3

Axis Y: GPU Memory usage (MB)
Axis X: Time (seconds)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment