Skip to content

Instantly share code, notes, and snippets.

@silverweed
Last active April 20, 2019 19:47
Show Gist options
  • Save silverweed/6508f8ce26e4cbcc690f91d5f48010df to your computer and use it in GitHub Desktop.
Save silverweed/6508f8ce26e4cbcc690f91d5f48010df to your computer and use it in GitHub Desktop.
Cuda + SFML test
CC = nvcc
CFLAGS = -std=c++11 --compiler-options -Wall --compiler-options -Wextra --compiler-options -ggdb
LDFLAGS = -lsfml-graphics -lsfml-window -lsfml-system -lcurand
all: test2
%: %.o
$(CC) $(CFLAGS) $^ -o $@ $(LDFLAGS)
%.o: %.cu myutils.hpp
$(CC) $(CFLAGS) $< -c
#pragma once
#include <iostream>
#define MUST(x) \
if (x != cudaSuccess) { \
std::cerr << "CUDA error at line " << __LINE__ << ": " << cudaGetErrorString(x) << std::endl; \
std::exit(1); \
}
#define MUST_CRND(x) \
if (x != CURAND_STATUS_SUCCESS) { \
std::cerr << "CURAND error at line " << __LINE__ << std::endl; \
std::exit(1); \
}
#include <SFML/Graphics.hpp>
#include <SFML/Window.hpp>
#include <curand.h>
#include "myutils.hpp"
#define WIDTH 1920
#define HEIGHT 1080
using u8 = unsigned char;
void float_to_u8(float *src, u8 *dst, size_t n);
sf::View keep_ratio(const sf::Event::SizeEvent& size, const sf::Vector2u& designedsize);
int main() {
/// Init CURAND
curandGenerator_t gen;
MUST_CRND(curandCreateGenerator(&gen, CURAND_RNG_PSEUDO_DEFAULT))
/// Init SFML
sf::RenderWindow window(sf::VideoMode(WIDTH, HEIGHT), "Test CuSFML");
window.setFramerateLimit(60);
sf::Texture tex;
tex.create(WIDTH, HEIGHT);
sf::Sprite sprite(tex);
const size_t N = WIDTH * HEIGHT * 3;
// Array of generated floats on device. This is filled by curand generator.
float *devData;
// Array to copy generated floats to on host.
float *hostData;
// Array of pixels to update the texture with.
// Note that we only generate RGB channels, but we still need to pass RGBA
// data to the SFML texture, so this array has 4 numbers per pixel, not 3.
u8 pixels[WIDTH * HEIGHT * 4];
// Allocate host and device memory
hostData = static_cast<float*>(malloc(N * sizeof(float)));
MUST(cudaMalloc(&devData, N * sizeof(float)))
cudaEvent_t start, end;
MUST(cudaEventCreate(&start))
MUST(cudaEventCreate(&end))
// Main loop
while (window.isOpen()) {
// Event loop
sf::Event evt;
while (window.pollEvent(evt)) {
switch (evt.type) {
case sf::Event::Closed:
window.close();
break;
case sf::Event::Resized:
window.setView(keep_ratio(evt.size, sf::Vector2u(WIDTH, HEIGHT)));
case sf::Event::KeyPressed:
switch (evt.key.code) {
case sf::Keyboard::Q:
window.close();
break;
default: break;
}
default: break;
}
}
MUST(cudaEventRecord(start))
// Generate random floats on GPU
MUST_CRND(curandGenerateUniform(gen, devData, N))
// and copy them to host
MUST(cudaMemcpy(hostData, devData, N * sizeof(float), cudaMemcpyDeviceToHost))
MUST(cudaEventRecord(end))
MUST(cudaEventSynchronize(end))
float ms;
MUST(cudaEventElapsedTime(&ms, start, end))
std::clog << "CUDA took " << ms << " ms\n";
// Convert generated floats to u8
float_to_u8(hostData, pixels, N);
tex.update(pixels);
window.clear();
window.draw(sprite);
window.display();
}
// Tear down CURAND
MUST_CRND(curandDestroyGenerator(gen))
}
void float_to_u8(float *src, u8 *dst, size_t n) {
for (size_t i = 0; i < n/3; ++i) {
#pragma unroll
for(size_t j = 0; j < 3; ++j)
dst[4*i + j] = static_cast<u8>(src[3*i+j] * 255);
dst[4*i + 3] = 255;
}
}
// Handle resizing
sf::View keep_ratio(const sf::Event::SizeEvent& size, const sf::Vector2u& designedsize) {
sf::FloatRect viewport(0.f, 0.f, 1.f, 1.f);
const float screenwidth = size.width / static_cast<float>(designedsize.x),
screenheight = size.height / static_cast<float>(designedsize.y);
if (screenwidth > screenheight) {
viewport.width = screenheight / screenwidth;
viewport.left = (1.f - viewport.width) / 2.f;
} else if (screenwidth < screenheight) {
viewport.height = screenwidth / screenheight;
viewport.top = (1.f - viewport.height) / 2.f;
}
sf::View view(sf::FloatRect(0, 0, designedsize.x , designedsize.y));
view.setViewport(viewport);
return view;
}
#include <SFML/Graphics.hpp>
#include <SFML/Window.hpp>
#include "myutils.hpp"
#include <iomanip>
#include <cmath>
#define WIDTH 1920
#define HEIGHT 1080
using std::cerr;
using std::endl;
using u8 = unsigned char;
sf::View keep_ratio(const sf::Event::SizeEvent& size, const sf::Vector2u& designedsize);
template<typename DataType>
__global__ void updatePixels(DataType *data, float pulse, float t, float wavefreq) {
const auto W = gridDim.x * blockDim.x;
const auto H = gridDim.y * blockDim.y;
const auto idx = blockDim.x * blockIdx.x + threadIdx.x;
const auto idy = blockDim.y * blockIdx.y + threadIdx.y;
// Calculate psi(r) = |cos(pulse * t - wavefreq * r)|
const float x = idx - W / 2.0;
const float y = idy - H / 2.0;
const float r = sqrtf(x * x + y * y);
const float psi = abs(cosf(pulse * t - wavefreq * r));
const auto index = 4 * (W * idy + idx);
data[index + 0] = 0;
data[index + 1] = 255 * (float(threadIdx.x)/blockDim.x * float(threadIdx.y)/blockDim.y);
data[index + 2] = 255 * psi;
data[index + 3] = 255;
}
template<typename DataType>
void updatePixelsCPU(DataType *data, float pulse, float t, float wavefreq, int W, int H) {
for (int idx = 0; idx < W; ++idx) {
for (int idy = 0; idy < H; ++idy) {
// Calculate psi(r) = |cos(pulse * t - wavefreq * r)|
const float x = idx - W / 2.0;
const float y = idy - H / 2.0;
const float r = sqrtf(x * x + y * y);
const float psi = abs(cosf(pulse * t - wavefreq * r));
const auto index = 4 * (W * idy + idx);
data[index + 0] = 0;
data[index + 1] = 0;
data[index + 2] = 255 * psi;
data[index + 3] = 255;
}
}
}
int main() {
/// Init SFML
sf::RenderWindow window(sf::VideoMode(WIDTH, HEIGHT), "Test CudaSFML");
window.setFramerateLimit(60);
bool vsync = true;
sf::Texture tex;
tex.create(WIDTH, HEIGHT);
sf::Sprite sprite(tex);
const size_t N = WIDTH * HEIGHT * 4;
// Array of generated pixels on device.
u8 *devData;
// Array to copy generated pixels to on host.
u8 *hostData;
// Allocate host and device memory
MUST(cudaMallocHost(&hostData, N * sizeof(float)))
MUST(cudaMalloc(&devData, N * sizeof(float)))
dim3 blockSize(32, 18);
dim3 gridSize(WIDTH / blockSize.x, HEIGHT / blockSize.y);
float t = 0; // time
float pulse = 10;
float wavefreq = 0.04;
sf::Clock clock;
float timeAcc = 0;
float ms = 0;
bool cpu = false;
cudaEvent_t start, end;
MUST(cudaEventCreate(&start))
MUST(cudaEventCreate(&end))
int cycles = 0;
// Main loop
while (window.isOpen()) {
// Event loop
sf::Event evt;
while (window.pollEvent(evt)) {
switch (evt.type) {
case sf::Event::Closed:
window.close();
break;
case sf::Event::Resized:
window.setView(keep_ratio(evt.size, sf::Vector2u(WIDTH, HEIGHT)));
case sf::Event::KeyPressed:
switch (evt.key.code) {
case sf::Keyboard::Q:
window.close();
break;
case sf::Keyboard::Add:
wavefreq += wavefreq / 5.0;
break;
case sf::Keyboard::Subtract:
wavefreq -= wavefreq / 5.0;
break;
case sf::Keyboard::V:
vsync = !vsync;
window.setFramerateLimit(vsync ? 60 : 0);
break;
case sf::Keyboard::C:
cpu = !cpu;
break;
default: break;
}
default: break;
}
}
if (!cpu) {
MUST(cudaEventRecord(start))
// Generate pixels on device
updatePixels<<<gridSize, blockSize>>>(devData, pulse, t, wavefreq);
// and copy them to host myutils.hpp
MUST(cudaMemcpy(hostData, devData, N * sizeof(float), cudaMemcpyDeviceToHost))
MUST(cudaEventRecord(end))
MUST(cudaEventSynchronize(end))
MUST(cudaEventElapsedTime(&ms, start, end))
} else {
updatePixelsCPU(hostData, pulse, t, wavefreq, WIDTH, HEIGHT);
}
tex.update(hostData);
window.clear();
window.draw(sprite);
window.display();
const auto delta = clock.restart().asSeconds();
t += delta;
timeAcc += delta;
if (!cpu) {
if (++cycles == 100) {
std::clog << "[GPU] " << std::setprecision(4) << std::setw(6) << 100.0 / timeAcc << " FPS ("
<< std::setw(5) << 10 * timeAcc << " ms loop, " << std::setw(5) << ms << " ms CUDA)\n";
cycles = 0;
timeAcc = 0;
}
} else {
std::clog << "[CPU] " << std::setprecision(4) << std::setw(6) << 1.0 / timeAcc << " FPS ("
<< std::setw(5) << 1000 * timeAcc << " ms loop)\n";
cycles = 0;
timeAcc = 0;
}
}
MUST(cudaFreeHost(hostData))
MUST(cudaFree(devData))
MUST(cudaEventDestroy(end))
MUST(cudaEventDestroy(start))
}
// Handle resizing
sf::View keep_ratio(const sf::Event::SizeEvent& size, const sf::Vector2u& designedsize) {
sf::FloatRect viewport(0.f, 0.f, 1.f, 1.f);
const float screenwidth = size.width / static_cast<float>(designedsize.x),
screenheight = size.height / static_cast<float>(designedsize.y);
if (screenwidth > screenheight) {
viewport.width = screenheight / screenwidth;
viewport.left = (1.f - viewport.width) / 2.f;
} else if (screenwidth < screenheight) {
viewport.height = screenwidth / screenheight;
viewport.top = (1.f - viewport.height) / 2.f;
}
sf::View view(sf::FloatRect(0, 0, designedsize.x , designedsize.y));
view.setViewport(viewport);
return view;
}
@pradyumnabilagi
Copy link

pradyumnabilagi commented Apr 20, 2019

should we put these files in visual studio 2017 by making a new makefile project?
if yes what should the configuration of the debugger and release be?

I really want to put CUDA and sfml in one code and run.I have been trying to do that scince 2 days after integrating CUDA with visual studio 2017.
I am using CUDA 10.1
All I really want is to get a GUI for CUDA.please help

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment