Skip to content

Instantly share code, notes, and snippets.

@jasorod
Last active August 22, 2019 17:37
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jasorod/d63c262a876744cee1cbe503c27f6cc3 to your computer and use it in GitHub Desktop.
Save jasorod/d63c262a876744cee1cbe503c27f6cc3 to your computer and use it in GitHub Desktop.
Blue Noise Generator
/*
* Blue noise dithered sampling algorithm based on the paper "Blue-noise Dithered Sampling" by Georgiev and Fajardo
* Uses simulated anealing to calculate a blue noise image from a random set of values
*
* Program will output a PGM file for the first dimension of the random noise image and the blue noise image
*
* compile: nvcc -O3 --std=c++14 -o blue_noise_gen blue_noise_gen.cu
*
*/
#include <cuda_runtime.h>
#include <math.h>
#include <cstring>
#include <fstream>
#include <iostream>
#include <random>
#include <limits>
/*
* adjust these values depending on the size of the image and the dimensions of the vector you want, the
* threshold for when the algorithm should stop, and the bit-depth of the output PGM
*/
constexpr int image_width = 256;
constexpr int image_height = 256;
constexpr float variance = 2.1f;
constexpr int kernel_range = 5;
constexpr int iteration_miss_threshold = 5000000;
constexpr unsigned int dimensions = 1;
constexpr int max_output_value = std::numeric_limits<unsigned char>::max();
//leave these values constant
constexpr int cuda_block_size = 32;
constexpr float epsilon = 1e-5;
template<unsigned int dimesion = 1>
struct FloatVectorType {
static constexpr unsigned int dim = dimension;
float data[dimension];
} __attribute__ ((aligned(4)));
bool temp_probability(float temp, float prev_state, float current_state, float probability) {
return std::exp(-1 * (current_state - prev_state) / temp) > probability && std::fabs(prev_state - current_state) > epsilon;
}
template<typename T>
__inline__ __device__ T warp_reduce_sum(T val) {
for (int offset = warpSize/2; offset > 0; offset /= 2) {
val += __shfl_down_sync(0xffffffff, val, offset);
}
return val;
}
template<typename T>
__global__ void reduce_sum(T *input_array, T* out_value, int N) {
T sum = T(0);
int index = blockIdx.x * blockDim.x + threadIdx.x;
if (index < N) {
sum = warp_reduce_sum(input_array[index]);
}
if ((threadIdx.x & (warpSize - 1)) == 0) {
atomicAdd(out_value, sum);
}
}
template<typename VectorType>
__device__ float norm_diff(VectorType const& a, VectorType const& b) {
float diff[VectorType::dim];
for (int i = 0; i < VectorType::dim; i++) {
diff[i] = b.data[i] - a.data[i];
}
return normf(VectorType::dim, diff);
}
template<typename VectorType>
__global__ void energy_function(VectorType const *input_array, float *energy_array, int width, int height, int kernel_range, float variance) {
const int col = threadIdx.x + blockIdx.x * blockDim.x;
const int row = threadIdx.y + blockIdx.y * blockDim.y;
if (col < width && row < height) {
VectorType input_source_sample = input_array[col + row * width];
energy_array[col + row * width] = 0.0f;
for (int i = 1 - kernel_range; i < kernel_range; i++) {
for (int j = 1 - kernel_range; j < kernel_range; j++) {
int input_index_x = (width + ((col + j) % width)) % width;
int input_index_y = (height + ((row + i) % height)) % height;
VectorType input_kernel_sample = input_array[input_index_x + input_index_y * width];
float pixel_gaussian = (powf(static_cast<float>(col + j) - static_cast<float>(col), 2.0f) + powf(static_cast<float>(row + i) - static_cast<float>(row), 2.0f)) / powf(variance, 2.0f);
float vector_gaussian = powf(norm_diff(input_source_sample, input_kernel_sample), static_cast<float>(VectorType::dim) / 2.0f);
energy_array[col + row * width] += expf(-1 * (pixel_gaussian + vector_gaussian));
}
}
}
}
template<typename T>
void swap_pixels(T *image_array, int2 location_a, int2 location_b) {
T temp = image_array[location_a.x + location_a.y * image_width];
image_array[location_a.x + location_a.y * image_width] = image_array[location_b.x + location_b.y * image_width];
image_array[location_b.x + location_b.y * image_width] = temp;
}
template<typename VectorType>
void output_pgm(std::string filename, int channel, VectorType const *data_array) {
std::ofstream pgm(filename.c_str());
pgm << "P2\n" << image_width << " " << image_height << "\n" << max_output_value << "\n";
for (int i = 0; i < image_height; i++) {
for (int j = 0; j < image_width; j++) {
pgm << static_cast<unsigned int>(data_array[j + i * image_width].data[channel] * max_output_value) << "\n";
}
}
pgm.close();
}
int main(int argc, char** argv) {
std::random_device rd;
std::mt19937 generator(rd());
std::uniform_int_distribution<> rand_int_width_dist(0, image_width - 1);
std::uniform_int_distribution<> rand_int_height_dist(0, image_height - 1);
std::uniform_real_distribution<> rand_float_dist(0.0, 1.0);
FloatVectorType<dimensions> *source_array;
FloatVectorType<dimensions> *solution_array;
cudaMallocManaged(&source_array, sizeof(FloatVectorType<dimensions>) * image_width * image_height);
solution_array = new FloatVectorType<dimensions>[image_width * image_height];
for (int i = 0; i < image_height; i++){
for (int j = 0; j < image_width; j++) {
source_array[j + i * image_width] = FloatVectorType<dimensions>{static_cast<float>(rand_float_dist(generator))};
}
}
std::cerr << "Outputting initial random starting point" << std::endl;
output_pgm("random_noise.pgm", 0, source_array);
float current_energy = std::numeric_limits<float>::max();
float *energy_array = nullptr;
float *new_energy_value = nullptr;
cudaMallocManaged(&energy_array, sizeof(float) * image_width * image_height);
cudaMallocManaged(&new_energy_value, sizeof(float));
float temperature = 1.0f;
int2 swap_a_loc;
int2 swap_b_loc;
int iteration = 0;
int iteration_miss = 0;
dim3 grid_size_2d((image_width + cuda_block_size) / cuda_block_size, (image_height + cuda_block_size) / cuda_block_size);
dim3 block_size_2d(cuda_block_size, cuda_block_size);
dim3 grid_size_1d((image_width * image_height + cuda_block_size) / cuda_block_size);
dim3 block_size_1d(cuda_block_size);
//iterate until we've gotten to the point where we're no longer getting results
while (iteration_miss < iteration_miss_threshold) {
//generate random swap indices
swap_a_loc.x = rand_int_width_dist(generator);
swap_a_loc.y = rand_int_height_dist(generator);
swap_b_loc.x = rand_int_width_dist(generator);
swap_b_loc.y = rand_int_height_dist(generator);
//swap two random pixel pairs in the source array
swap_pixels(source_array, swap_a_loc, swap_b_loc);
*new_energy_value = 0.0f;
energy_function<<<grid_size_2d, block_size_2d>>>(source_array, energy_array, image_width, image_height, kernel_range, variance);
reduce_sum<<<grid_size_1d, block_size_1d>>>(energy_array, new_energy_value, image_width * image_height);
cudaDeviceSynchronize();
//if the new energy less than the current energy or temp heuristic is true, assign the current source array as the new solution array, otherwise undo the pixel swap and start again
if (*new_energy_value < current_energy || temp_probability(temperature, current_energy, *new_energy_value, rand_float_dist(generator))) {
std::cerr << "[Run #" << iteration << "] found new energy result: " << *new_energy_value << std::endl;
current_energy = *new_energy_value;
cudaMemcpy(solution_array, source_array, sizeof(FloatVectorType<dimensions>) * image_width * image_height, cudaMemcpyDefault);
iteration_miss = 0;
} else {
swap_pixels(source_array, swap_a_loc, swap_b_loc);
iteration_miss++;
}
temperature *= 0.995f;
iteration++;
}
std::cerr << "[Run #" << iteration << "] finished simulation, outputting PGM file" << std::endl;
output_pgm("blue_noise.pgm", 0, solution_array);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment