Skip to content

Instantly share code, notes, and snippets.

@maltempi
Last active November 5, 2022 23:15
Show Gist options
  • Save maltempi/0d35fc74b1521eb963fb19c0fb2c1da6 to your computer and use it in GitHub Desktop.
Save maltempi/0d35fc74b1521eb963fb19c0fb2c1da6 to your computer and use it in GitHub Desktop.
Standalone CUSZ
cmake_minimum_required(VERSION 3.18)
cmake_minimum_required(VERSION 3.18 FATAL_ERROR)
project(cusz_example LANGUAGES CXX CUDA)
enable_language(CUDA)
add_executable(ex-api src/main.cu)
include_directories(/opt/cusz/include/cusz)
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --extended-lambda --expt-relaxed-constexpr -Wno-deprecated-declarations")
set(LIB_TYPE SHARED)
target_link_libraries(ex-api
PRIVATE
-lcuda
-lcusparse
/opt/cusz/lib/libcusz.so
/opt/cusz/lib/libcuszargp.so
/opt/cusz/lib/libcuszcomp.so
/opt/cusz/lib/libcuszhuff.so
/opt/cusz/lib/libcuszkernel.so
/opt/cusz/lib/libcuszpq.so
/opt/cusz/lib/libcuszspm.so
/opt/cusz/lib/libcuszspv.so
)
/**
* // maltempi comments:
* NOTE: src/main.cu
* Original source code available on https://github.com/szcompressor/cuSZ/blob/1dd0b8ae265daf88be2a6be1c4152e4bf83a0a99/example/src/capi.cu
*
* @file capi.cu
* @author Jiannan Tian
* @brief
* @version 0.3
* @date 2022-05-06
*
* (C) 2022 by Washington State University, Argonne National Laboratory
*/
#include "cusz.h"
#include "cuszapi.hh"
#include "cli/quality_viewer.hh"
#include "cli/timerecord_viewer.hh"
template <typename T>
void f(std::string fname)
{
/* For demo, we use 3600x1800 CESM data. */
auto len = 3600 * 1800;
cusz_header header;
uint8_t* exposed_compressed;
uint8_t* compressed;
size_t compressed_len;
T *d_uncompressed, *h_uncompressed;
T *d_decompressed, *h_decompressed;
/* cuSZ requires a 3% overhead on device (not required on host). */
size_t uncompressed_memlen = len * 1.03;
size_t decompressed_memlen = uncompressed_memlen;
/* code snippet for looking at the device array easily */
auto peek_devdata = [](T* d_arr, size_t num = 20) {
thrust::for_each(thrust::device, d_arr, d_arr + num, [=] __device__ __host__(const T i) { printf("%f\t", i); });
printf("\n");
};
// clang-format off
cudaMalloc( &d_uncompressed, sizeof(T) * uncompressed_memlen );
cudaMallocHost( &h_uncompressed, sizeof(T) * len );
cudaMalloc( &d_decompressed, sizeof(T) * decompressed_memlen );
cudaMallocHost( &h_decompressed, sizeof(T) * len );
// clang-format on
/* User handles loading from filesystem & transferring to device. */
io::read_binary_to_array(fname, h_uncompressed, len);
cudaMemcpy(d_uncompressed, h_uncompressed, sizeof(T) * len, cudaMemcpyHostToDevice);
/* a casual peek */
printf("peeking uncompressed data, 20 elements\n");
peek_devdata(d_uncompressed, 20);
cudaStream_t stream;
cudaStreamCreate(&stream);
// using default
// cusz_framework* framework = cusz_default_framework();
// alternatively
cusz_framework fw = cusz_framework{
.pipeline = Auto,
.predictor = cusz_custom_predictor{.type = LorenzoI},
.quantization = cusz_custom_quantization{.radius = 512},
.codec = cusz_custom_codec{.type = Huffman}};
cusz_framework* framework = &fw;
// Brace initializing a struct pointer is not supported by all host compilers
// when nvcc forwards.
// cusz_framework* framework = new cusz_framework{
// .pipeline = Auto,
// .predictor = cusz_custom_predictor{.type = LorenzoI},
// .quantization = cusz_custom_quantization{.radius = 512},
// .codec = cusz_custom_codec{.type = Huffman}};
cusz_compressor* comp = cusz_create(framework, FP32);
cusz_config* config = new cusz_config{.eb = 2.4e-4, .mode = Rel};
cusz_len uncomp_len = cusz_len{3600, 1800, 1, 1, 1.03};
cusz_len decomp_len = uncomp_len;
cusz::TimeRecord compress_timerecord;
cusz::TimeRecord decompress_timerecord;
{
cusz_compress(
comp, config, d_uncompressed, uncomp_len, &exposed_compressed, &compressed_len, &header,
(void*)&compress_timerecord, stream);
/* User can interpret the collected time information in other ways. */
cusz::TimeRecordViewer::view_compression(&compress_timerecord, len * sizeof(T), compressed_len);
/* verify header */
printf("header.%-*s : %x\n", 12, "(addr)", &header);
printf("header.%-*s : %lu, %lu, %lu\n", 12, "{x,y,z}", header.x, header.y, header.z);
printf("header.%-*s : %lu\n", 12, "filesize", ConfigHelper::get_filesize(&header));
}
/* If needed, User should perform a memcopy to transfer `exposed_compressed` before `compressor` is destroyed. */
cudaMalloc(&compressed, compressed_len);
cudaMemcpy(compressed, exposed_compressed, compressed_len, cudaMemcpyDeviceToDevice);
{
cusz_decompress(
comp, &header, exposed_compressed, compressed_len, d_decompressed, decomp_len,
(void*)&decompress_timerecord, stream);
cusz::TimeRecordViewer::view_decompression(&decompress_timerecord, len * sizeof(T));
}
/* a casual peek */
printf("peeking decompressed data, 20 elements\n");
peek_devdata(d_decompressed, 20);
/* demo: offline checking (de)compression quality. */
/* load data again */ cudaMemcpy(d_uncompressed, h_uncompressed, sizeof(T) * len, cudaMemcpyHostToDevice);
/* perform evaluation */ cusz::QualityViewer::echo_metric_gpu(d_decompressed, d_uncompressed, len, compressed_len);
cusz_release(comp);
cudaFree(compressed);
// delete compressor;
cudaStreamDestroy(stream);
}
int main(int argc, char** argv)
{
if (argc < 2) {
printf("PROG /path/to/cesm-3600x1800\n");
exit(0);
}
f<float>(std::string(argv[1]));
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment