Related to : szcompressor/cuSZ#71
Last active
November 5, 2022 23:15
-
-
Save maltempi/0d35fc74b1521eb963fb19c0fb2c1da6 to your computer and use it in GitHub Desktop.
Standalone CUSZ
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
cmake_minimum_required(VERSION 3.18) | |
cmake_minimum_required(VERSION 3.18 FATAL_ERROR) | |
project(cusz_example LANGUAGES CXX CUDA) | |
enable_language(CUDA) | |
add_executable(ex-api src/main.cu) | |
include_directories(/opt/cusz/include/cusz) | |
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --extended-lambda --expt-relaxed-constexpr -Wno-deprecated-declarations") | |
set(LIB_TYPE SHARED) | |
target_link_libraries(ex-api | |
PRIVATE | |
-lcuda | |
-lcusparse | |
/opt/cusz/lib/libcusz.so | |
/opt/cusz/lib/libcuszargp.so | |
/opt/cusz/lib/libcuszcomp.so | |
/opt/cusz/lib/libcuszhuff.so | |
/opt/cusz/lib/libcuszkernel.so | |
/opt/cusz/lib/libcuszpq.so | |
/opt/cusz/lib/libcuszspm.so | |
/opt/cusz/lib/libcuszspv.so | |
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* // maltempi comments: | |
* NOTE: src/main.cu | |
* Original source code available on https://github.com/szcompressor/cuSZ/blob/1dd0b8ae265daf88be2a6be1c4152e4bf83a0a99/example/src/capi.cu | |
* | |
* @file capi.cu | |
* @author Jiannan Tian | |
* @brief | |
* @version 0.3 | |
* @date 2022-05-06 | |
* | |
* (C) 2022 by Washington State University, Argonne National Laboratory | |
*/ | |
#include "cusz.h" | |
#include "cuszapi.hh" | |
#include "cli/quality_viewer.hh" | |
#include "cli/timerecord_viewer.hh" | |
template <typename T> | |
void f(std::string fname) | |
{ | |
/* For demo, we use 3600x1800 CESM data. */ | |
auto len = 3600 * 1800; | |
cusz_header header; | |
uint8_t* exposed_compressed; | |
uint8_t* compressed; | |
size_t compressed_len; | |
T *d_uncompressed, *h_uncompressed; | |
T *d_decompressed, *h_decompressed; | |
/* cuSZ requires a 3% overhead on device (not required on host). */ | |
size_t uncompressed_memlen = len * 1.03; | |
size_t decompressed_memlen = uncompressed_memlen; | |
/* code snippet for looking at the device array easily */ | |
auto peek_devdata = [](T* d_arr, size_t num = 20) { | |
thrust::for_each(thrust::device, d_arr, d_arr + num, [=] __device__ __host__(const T i) { printf("%f\t", i); }); | |
printf("\n"); | |
}; | |
// clang-format off | |
cudaMalloc( &d_uncompressed, sizeof(T) * uncompressed_memlen ); | |
cudaMallocHost( &h_uncompressed, sizeof(T) * len ); | |
cudaMalloc( &d_decompressed, sizeof(T) * decompressed_memlen ); | |
cudaMallocHost( &h_decompressed, sizeof(T) * len ); | |
// clang-format on | |
/* User handles loading from filesystem & transferring to device. */ | |
io::read_binary_to_array(fname, h_uncompressed, len); | |
cudaMemcpy(d_uncompressed, h_uncompressed, sizeof(T) * len, cudaMemcpyHostToDevice); | |
/* a casual peek */ | |
printf("peeking uncompressed data, 20 elements\n"); | |
peek_devdata(d_uncompressed, 20); | |
cudaStream_t stream; | |
cudaStreamCreate(&stream); | |
// using default | |
// cusz_framework* framework = cusz_default_framework(); | |
// alternatively | |
cusz_framework fw = cusz_framework{ | |
.pipeline = Auto, | |
.predictor = cusz_custom_predictor{.type = LorenzoI}, | |
.quantization = cusz_custom_quantization{.radius = 512}, | |
.codec = cusz_custom_codec{.type = Huffman}}; | |
cusz_framework* framework = &fw; | |
// Brace initializing a struct pointer is not supported by all host compilers | |
// when nvcc forwards. | |
// cusz_framework* framework = new cusz_framework{ | |
// .pipeline = Auto, | |
// .predictor = cusz_custom_predictor{.type = LorenzoI}, | |
// .quantization = cusz_custom_quantization{.radius = 512}, | |
// .codec = cusz_custom_codec{.type = Huffman}}; | |
cusz_compressor* comp = cusz_create(framework, FP32); | |
cusz_config* config = new cusz_config{.eb = 2.4e-4, .mode = Rel}; | |
cusz_len uncomp_len = cusz_len{3600, 1800, 1, 1, 1.03}; | |
cusz_len decomp_len = uncomp_len; | |
cusz::TimeRecord compress_timerecord; | |
cusz::TimeRecord decompress_timerecord; | |
{ | |
cusz_compress( | |
comp, config, d_uncompressed, uncomp_len, &exposed_compressed, &compressed_len, &header, | |
(void*)&compress_timerecord, stream); | |
/* User can interpret the collected time information in other ways. */ | |
cusz::TimeRecordViewer::view_compression(&compress_timerecord, len * sizeof(T), compressed_len); | |
/* verify header */ | |
printf("header.%-*s : %x\n", 12, "(addr)", &header); | |
printf("header.%-*s : %lu, %lu, %lu\n", 12, "{x,y,z}", header.x, header.y, header.z); | |
printf("header.%-*s : %lu\n", 12, "filesize", ConfigHelper::get_filesize(&header)); | |
} | |
/* If needed, User should perform a memcopy to transfer `exposed_compressed` before `compressor` is destroyed. */ | |
cudaMalloc(&compressed, compressed_len); | |
cudaMemcpy(compressed, exposed_compressed, compressed_len, cudaMemcpyDeviceToDevice); | |
{ | |
cusz_decompress( | |
comp, &header, exposed_compressed, compressed_len, d_decompressed, decomp_len, | |
(void*)&decompress_timerecord, stream); | |
cusz::TimeRecordViewer::view_decompression(&decompress_timerecord, len * sizeof(T)); | |
} | |
/* a casual peek */ | |
printf("peeking decompressed data, 20 elements\n"); | |
peek_devdata(d_decompressed, 20); | |
/* demo: offline checking (de)compression quality. */ | |
/* load data again */ cudaMemcpy(d_uncompressed, h_uncompressed, sizeof(T) * len, cudaMemcpyHostToDevice); | |
/* perform evaluation */ cusz::QualityViewer::echo_metric_gpu(d_decompressed, d_uncompressed, len, compressed_len); | |
cusz_release(comp); | |
cudaFree(compressed); | |
// delete compressor; | |
cudaStreamDestroy(stream); | |
} | |
int main(int argc, char** argv) | |
{ | |
if (argc < 2) { | |
printf("PROG /path/to/cesm-3600x1800\n"); | |
exit(0); | |
} | |
f<float>(std::string(argv[1])); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment