Skip to content

Instantly share code, notes, and snippets.

@veggiesaurus
Last active August 2, 2018 14:10
Show Gist options
  • Save veggiesaurus/7994856a933fb075eede06ef68a5ac1f to your computer and use it in GitHub Desktop.
Save veggiesaurus/7994856a933fb075eede06ef68a5ac1f to your computer and use it in GitHub Desktop.
hdf5_rotation
cmake_minimum_required(VERSION 3.0)
project(hdf5_rotate)
set(CMAKE_CXX_STANDARD 11)
FIND_PACKAGE(HDF5)
INCLUDE_DIRECTORIES(${HDF5_INCLUDE_DIR})
find_package(OpenMP)
if (OPENMP_FOUND)
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}")
endif()
set(LINK_LIBS ${LINK_LIBS} hdf5_serial hdf5_cpp hdf5_hl_cpp)
add_executable(hdf5_rotate main.cpp)
target_link_libraries(hdf5_rotate ${LINK_LIBS})
#include <iostream>
#include <H5Cpp.h>
#include <H5File.h>
#include <vector>
#include <chrono>
#include <omp.h>
#include <limits>
#include <cmath>
#include <bits/unique_ptr.h>
using namespace H5;
using namespace std;
int main(int argc, char** argv) {
if (argc != 3) {
cout << "Usage: hdf5_rotate {inputFile} {outputFile}" << endl;
return 1;
}
string inputFileName = argv[1];
string outputFileName = argv[2];
FloatType floatDataType(PredType::NATIVE_FLOAT);
floatDataType.setOrder(H5T_ORDER_LE);
IntType int64Type(PredType::NATIVE_INT64);
int64Type.setOrder(H5T_ORDER_LE);
auto tStart = chrono::high_resolution_clock::now();
H5File inputFile(inputFileName, H5F_ACC_RDONLY);
auto group = inputFile.openGroup("0");
auto dataSet = group.openDataSet("DATA");
vector<hsize_t> dims(dataSet.getSpace().getSimpleExtentNdims(), 0);
dataSet.getSpace().getSimpleExtentDims(dims.data(), NULL);
uint32_t N = dims.size();
if (N != 4) {
cout << "Currently only supports 4D cubes" << endl;
return 1;
}
auto stokes = dims[0];
auto depth = dims[1];
auto height = dims[2];
auto width = dims[3];
vector<hsize_t> outputDims = {stokes, width, height, depth};
DataSpace dataSpace(4, outputDims.data());
H5File outputFile(outputFileName, H5F_ACC_TRUNC);
auto outputGroup = outputFile.createGroup("0");
auto swizzledGroup = outputGroup.createGroup("SwizzledData");
auto outputDataSet = swizzledGroup.createDataSet("ZYXW", floatDataType, dataSpace);
auto statsGroup = outputGroup.createGroup("Statistics");
auto statsXYGroup = statsGroup.createGroup("XY");
vector<hsize_t> xyStatsDims = {stokes, depth};
DataSpace xyStatsDataSpace(2, xyStatsDims.data());
auto xyMinDataSet = statsXYGroup.createDataSet("MIN", floatDataType, xyStatsDataSpace);
auto xyMaxDataSet = statsXYGroup.createDataSet("MAX", floatDataType, xyStatsDataSpace);
auto xyMeanDataSet = statsXYGroup.createDataSet("MEAN", floatDataType, xyStatsDataSpace);
auto xyNanCountDataSet = statsXYGroup.createDataSet("NAN_COUNT", int64Type, xyStatsDataSpace);
auto statsZGroup = statsGroup.createGroup("Z");
vector<hsize_t> zStatsDims = {stokes, height, width};
DataSpace zStatsDataSpace(3, zStatsDims.data());
auto zMinDataSet = statsZGroup.createDataSet("MIN", floatDataType, zStatsDataSpace);
auto zMaxDataSet = statsZGroup.createDataSet("MAX", floatDataType, zStatsDataSpace);
auto zMeanDataSet = statsZGroup.createDataSet("MEAN", floatDataType, zStatsDataSpace);
auto zNanCountDataSet = statsZGroup.createDataSet("NAN_COUNT", int64Type, zStatsDataSpace);
auto cubeSize = depth * height * width;
cout << "Allocating " << cubeSize * 4 * 2 * 1e-9 << " GB of memory..." << flush;
auto tStartAlloc = chrono::high_resolution_clock::now();
float* stokesCube = new float[cubeSize];
float* rotatedCube = new float[cubeSize];
vector<float> minValsXY(depth * stokes);
vector<float> maxValsXY(depth * stokes);
vector<float> meanValsXY(depth * stokes);
vector<int64_t> nanValsXY(depth * stokes);
vector<float> minValsZ(width * height * stokes, numeric_limits<float>::max());
vector<float> maxValsZ(width * height * stokes, -numeric_limits<float>::max());
vector<float> meanValsZ(width * height * stokes, 0);
vector<int64_t> nanValsZ(width * height * stokes, 0);
auto tEndAlloc = chrono::high_resolution_clock::now();
auto dtAlloc = chrono::duration_cast<chrono::milliseconds>(tEndAlloc - tStartAlloc).count();
cout << "Done in " << dtAlloc * 1e-3 << " seconds" << endl;
for (unsigned int currentStokes = 0; currentStokes < stokes; currentStokes++) {
// Read data into memory space
hsize_t memDims[] = {depth, height, width};
DataSpace memspace(3, memDims);
auto sliceDataSpace = dataSet.getSpace();
vector<hsize_t> count = {1, depth, height, width};
vector<hsize_t> start = {currentStokes, 0, 0, 0};
sliceDataSpace.selectHyperslab(H5S_SELECT_SET, count.data(), start.data());
cout << "Reading Stokes " << currentStokes << " dataset..." << flush;
auto tStartRead = chrono::high_resolution_clock::now();
dataSet.read(stokesCube, PredType::NATIVE_FLOAT, memspace, sliceDataSpace);
auto tEndRead = chrono::high_resolution_clock::now();
auto dtRead = chrono::duration_cast<chrono::milliseconds>(tEndRead - tStartRead).count();
float readSpeed = (cubeSize * 4) * 1.0e-6 / (dtRead * 1.0e-3);
cout << "Done in " << dtRead * 1e-3 << " seconds (" << readSpeed << " MB/s)" << endl;
cout << "Processing Stokes " << currentStokes << " dataset..." << flush;
auto tStartProcess = chrono::high_resolution_clock::now();
// First loop calculates stats for each XY slice and rotates the dataset
#pragma omp parallel for
for (auto i = 0; i < depth; i++) {
float minVal = numeric_limits<float>::max();
float maxVal = -numeric_limits<float>::max();
float sum = 0;
int64_t nanCount = 0;
for (auto j = 0; j < height; j++) {
for (auto k = 0; k < width; k++) {
auto sourceIndex = k + width * j + (height * width) * i;
auto destIndex = i + depth * j + (height * depth) * k;
auto val = stokesCube[sourceIndex];
rotatedCube[destIndex] = val;
// Stats
if (!isnan(val)) {
minVal = min(minVal, val);
maxVal = max(maxVal, val);
sum += val;
} else {
nanCount += 1;
}
}
}
if (nanCount != (height * width)) {
minValsXY[currentStokes * depth + i] = minVal;
maxValsXY[currentStokes * depth + i] = maxVal;
nanValsXY[currentStokes * depth + i] = nanCount;
meanValsXY[currentStokes * depth + i] = sum / (height * width - nanCount);
} else {
minValsXY[currentStokes * depth + i] = NAN;
maxValsXY[currentStokes * depth + i] = NAN;
nanValsXY[currentStokes * depth + i] = nanCount;
meanValsXY[currentStokes * depth + i] = NAN;
}
}
// Second loop calculates stats for each Z profile
#pragma omp parallel for
for (auto j = 0; j < height; j++) {
for (auto k = 0; k < width; k++) {
float minVal = numeric_limits<float>::max();
float maxVal = -numeric_limits<float>::max();
float sum = 0;
int64_t nanCount = 0;
for (auto i = 0; i < depth; i++) {
auto sourceIndex = k + width * j + (height * width) * i;
auto val = stokesCube[sourceIndex];
if (!isnan(val)) {
minVal = min(minVal, val);
maxVal = max(maxVal, val);
sum += val;
} else {
nanCount += 1;
}
}
if (nanCount != (height * width)) {
minValsZ[currentStokes * width * height + k + j * width] = minVal;
maxValsZ[currentStokes * width * height + k + j * width] = maxVal;
nanValsZ[currentStokes * width * height + k + j * width] = nanCount;
meanValsZ[currentStokes * width * height + k + j * width] = sum / (depth - nanCount);
} else {
minValsZ[currentStokes * width * height + k + j * width] = NAN;
maxValsZ[currentStokes * width * height + k + j * width] = NAN;
nanValsZ[currentStokes * width * height + k + j * width] = nanCount;
meanValsZ[currentStokes * width * height + k + j * width] = NAN;
}
}
}
auto tEndRotate = chrono::high_resolution_clock::now();
auto dtRotate = chrono::duration_cast<chrono::milliseconds>(tEndRotate - tStartProcess).count();
float rotateSpeed = (cubeSize * 4) * 1.0e-6 / (dtRotate * 1.0e-3);
cout << "Done in " << dtRotate * 1e-3 << " seconds (" << rotateSpeed << " MB/s)" << endl;
cout << "Writing Stokes " << currentStokes << " dataset..." << flush;
auto tStartWrite = chrono::high_resolution_clock::now();
outputDataSet.write(rotatedCube, PredType::NATIVE_FLOAT, memspace, sliceDataSpace);
auto tEndWrite = chrono::high_resolution_clock::now();
auto dtWrite = chrono::duration_cast<chrono::milliseconds>(tEndWrite - tStartWrite).count();
float writeSpeed = (cubeSize * 4) * 1.0e-6 / (dtWrite * 1.0e-3);
cout << "Done in " << dtWrite * 1e-3 << " seconds (" << writeSpeed << " MB/s)" << endl;
}
xyMinDataSet.write(minValsXY.data(), PredType::NATIVE_FLOAT);
xyMaxDataSet.write(maxValsXY.data(), PredType::NATIVE_FLOAT);
xyMeanDataSet.write(meanValsXY.data(), PredType::NATIVE_FLOAT);
xyNanCountDataSet.write(nanValsXY.data(), PredType::NATIVE_INT64);
zMinDataSet.write(minValsZ.data(), PredType::NATIVE_FLOAT);
zMaxDataSet.write(maxValsZ.data(), PredType::NATIVE_FLOAT);
zMeanDataSet.write(meanValsZ.data(), PredType::NATIVE_FLOAT);
zNanCountDataSet.write(nanValsZ.data(), PredType::NATIVE_INT64);
auto tEnd = chrono::high_resolution_clock::now();
auto dtTotal = chrono::duration_cast<chrono::milliseconds>(tEnd - tStart).count();
cout << "Stokes cube rotated in " << dtTotal * 1e-3 << " seconds" << endl;
delete[] stokesCube;
delete[] rotatedCube;
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment