Last active
February 10, 2021 14:21
-
-
Save data-panda/1f164852fbdadc7364f5cdf8a82ab065 to your computer and use it in GitHub Desktop.
Zero bug reproducer for cuda zfp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* minimal code example showing how to call the zfp (de)compressor */ | |
#include <math.h> | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#include <zfp.h> | |
/* fill grid with a floating point value */ | |
static inline void | |
fillGridWithVal(float *array, int nx, int ny, int nz, float val) | |
{ | |
#pragma omp parallel for | |
for (int k = 0; k < nz; k++) | |
for (int j = 0; j < ny; j++) | |
for (int i = 0; i < nx; i++) { | |
array[ i + nx * (j + ny * k) ] = val; | |
} | |
} | |
/* fill the domain with cosine wave */ | |
/* Fill the 3d domain with cosine wave */ | |
static inline float | |
input_func_cos(int i, int j, int k, int nx, int ny, int nz) | |
{ | |
float delta = 1.; | |
float x = i * delta; | |
float y = j * delta; | |
float z = k * delta; | |
// source of seismic wave at top | |
float xs = (nx * delta)/2.; | |
float ys = (ny * delta)/2.; | |
float zs = 0; | |
float wl = 348.23; | |
float amp = 25000; | |
x -= xs; | |
y -= ys; | |
z -= zs; | |
float r = sqrtf( x * x + y * y + z * z); | |
float val = amp * fminf(1., 1./r) * cos(r / wl * 2.* M_PI); | |
// printf("%e, %e, %e\n",r, fminf(1., 1./r), cos(r / wl * 2.* M_PI)); | |
return val; | |
} | |
/* array visualization functions */ | |
static void | |
force_big_endian(unsigned char *bytes) | |
{ | |
static int doneTest = 0; | |
static int shouldSwap = 0; | |
if(!doneTest) | |
{ | |
int tmp1 = 1; | |
unsigned char *tmp2 = (unsigned char *) &tmp1; | |
if (*tmp2 != 0) | |
shouldSwap = 1; | |
doneTest = 1; | |
} | |
if (shouldSwap) | |
{ | |
unsigned char tmp = bytes[0]; | |
bytes[0] = bytes[3]; | |
bytes[3] = tmp; | |
tmp = bytes[1]; | |
bytes[1] = bytes[2]; | |
bytes[2] = tmp; | |
} | |
} | |
void | |
writeStructuredPoints(int nx, int ny, int nz, float *data, int binary, const char *filename) | |
{ | |
long int nt = nx * ny * nz; | |
char full_filename[1024]; | |
if (strstr(filename, ".vtk") != NULL) | |
{ | |
strcpy(full_filename, filename); | |
} | |
else | |
{ | |
sprintf(full_filename, "%s.vtk", filename); | |
} | |
FILE *fp = fopen(full_filename, "w+"); | |
/* write header */ | |
fprintf(fp, "# vtk DataFile Version 2.0\n"); | |
fprintf(fp, "Volume data from array\n"); | |
if (binary) fprintf(fp, "BINARY\n"); | |
else fprintf(fp, "ASCII\n"); | |
fprintf(fp, "DATASET STRUCTURED_POINTS\n"); | |
fprintf(fp, "DIMENSIONS %d %d %d\n", nx, ny, nz); | |
fprintf(fp, "SPACING %f %f %f\n", 1., 1., 1.); | |
fprintf(fp, "ORIGIN %f %f %f\n", 0., 0., 0.); | |
fprintf(fp, "POINT_DATA %ld\n", nt); | |
fprintf(fp, "SCALARS value float 1\n"); //hardcode variable name as value | |
fprintf(fp, "LOOKUP_TABLE default\n"); | |
/* write the values */ | |
if(binary) // ignore big endian and little endian check | |
{ | |
for(int i = 0; i < nt; i++) | |
{ | |
float val = data[i]; | |
force_big_endian((unsigned char *) &val); | |
fwrite(&val, sizeof(float), 1, fp); | |
} | |
fprintf(fp, "\n"); | |
} | |
else | |
{ | |
int numInColumn = 0; | |
int values = 0; | |
for(long int i = 0; i < nt; i++) | |
{ | |
fprintf(fp, "%f\n", data[i]); values++; | |
} | |
printf("array dim is %d and value written is %d\n", nt, values); | |
} | |
fclose(fp); | |
} | |
/* compress or decompress array */ | |
static int | |
compress(float* array, int nx, int ny, int nz, int bpv, zfp_bool decompress, size_t *cmp_size) | |
{ | |
int status = 0; /* return value: 0 = success */ | |
zfp_type type; /* array scalar type */ | |
zfp_field* field; /* array meta data */ | |
zfp_stream* zfp; /* compressed stream */ | |
void* buffer; /* storage for compressed stream */ | |
size_t bufsize; /* byte size of compressed buffer */ | |
bitstream* stream; /* bit stream to write to or read from */ | |
size_t zfpsize; /* byte size of compressed stream */ | |
/* allocate meta data for the 3D array a[nz][ny][nx] */ | |
type = zfp_type_float; | |
field = zfp_field_3d(array, type, nx, ny, nz); | |
/* allocate meta data for a compressed stream */ | |
zfp = zfp_stream_open(NULL); | |
/* set compression mode and parameters via one of four functions */ | |
zfp_stream_set_rate(zfp, bpv, type, zfp_field_dimensionality(field), zfp_false); | |
//zfp_stream_set_accuracy(zfp, 1e-3); | |
zfp_stream_set_execution(zfp, zfp_exec_omp); | |
/* allocate buffer for compressed data */ | |
bufsize = zfp_stream_maximum_size(zfp, field); | |
buffer = malloc(bufsize); | |
/* associate bit stream with allocated buffer */ | |
stream = stream_open(buffer, bufsize); | |
zfp_stream_set_bit_stream(zfp, stream); | |
zfp_stream_rewind(zfp); | |
/* compress or decompress entire array */ | |
if (decompress) { | |
zfp_stream_set_execution(zfp, zfp_exec_serial); | |
zfp_field_set_pointer(field, (void *)array); | |
/* read compressed stream and decompress and output array */ | |
FILE *fp = fopen("compressed_file", "rb+"); | |
zfpsize = fread(buffer, 1, *cmp_size, fp); | |
fclose(fp); | |
zfpsize = zfp_decompress(zfp, field); | |
if (!zfpsize) { | |
fprintf(stderr, "decompression failed\n"); | |
status = EXIT_FAILURE; | |
} | |
} | |
else { | |
/* compress array and output compressed stream */ | |
zfpsize = zfp_compress(zfp, field); | |
if (!zfpsize) { | |
fprintf(stderr, "compression failed\n"); | |
status = EXIT_FAILURE; | |
} | |
else { | |
FILE *fp = fopen("compressed_file", "wb"); | |
fwrite(buffer, 1, zfpsize, fp); | |
fclose(fp); | |
*cmp_size = zfpsize; | |
} | |
} | |
/* clean up */ | |
zfp_field_free(field); | |
zfp_stream_close(zfp); | |
stream_close(stream); | |
free(buffer); | |
return status; | |
} | |
/* Comprares values of decompressed array with raw array */ | |
double | |
calculate_rmse(float *c_array, float *d_array, int nx, int ny, int nz) | |
{ | |
long int nt = nx * ny * nz; /* total num values */ | |
double rmse, error2 = 0.; | |
//long int i, j, k; /* array index vars */ | |
// #pragma omp parallel for reduction(+:error2) | |
for (int k = 0; k < nz; k++) | |
for (int j = 0; j < ny; j++) | |
for (int i = 0; i < nx; i++) { | |
error2 += (d_array[i + nx * (j + ny * k)] - c_array[i + nx * (j + ny * k)]) * | |
(d_array[i + nx * (j + ny * k)] - c_array[i + nx * (j + ny * k)]); | |
} | |
printf("\n \n"); | |
rmse = sqrt(error2/nt); | |
return rmse; | |
} | |
int main(int argc, char* argv[]) | |
{ | |
int status = 0; | |
int decompress = 0; | |
double rmse = 0.; | |
size_t cmp_size = 0; | |
int bpv = 8; // bits per value 8, compression rate 4 | |
/* allocate 100x100x100 array of floats */ | |
int nx = 768; | |
int ny = 768; | |
int nz = 768; | |
float* c_array = malloc(nx * ny * nz * sizeof(float)); | |
float* d_array = malloc(nx * ny * nz * sizeof(float)); | |
fillGridWithVal(c_array, nx, ny, nz, 0.); | |
fillGridWithVal(d_array, nx, ny, nz, 0.); | |
/* initialize array to be compressed */ | |
int i, j, k; | |
for (k = 0; k < nz; k++) | |
for (j = 0; j < ny; j++) | |
for (i = 0; i < nx; i++) { | |
float x = 2.0 * i / nx; | |
float y = 2.0 * j / ny; | |
float z = 2.0 * k / nz; | |
//c_array[i + nx * (j + ny * k)] = exp(-(x * x + y * y + z * z)); | |
c_array[i + nx * (j + ny * k)] = input_func_cos(i, j, k, nx, ny, nz); | |
} | |
/* compress */ | |
status = compress(c_array, nx, ny, nz, 8, 0, &cmp_size); | |
/* decomrpess */ | |
if (!status) { | |
decompress = 1; | |
status = compress(d_array, nx, ny, nz, 8, decompress, &cmp_size); | |
writeStructuredPoints(nx, ny, nz, d_array, 1, "uncomp_cos_cuda"); | |
rmse = calculate_rmse(c_array, d_array, nx, ny, nz); | |
printf("RMSE = %e\n", rmse); | |
} | |
else { | |
printf("! Error in Compression \n"); | |
} | |
free(c_array); | |
free(d_array); | |
return status; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment