Skip to content

Instantly share code, notes, and snippets.

@tomsmeding
Last active November 30, 2020 20:34
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tomsmeding/1631090df10ab5ac98403304ec2b8ac9 to your computer and use it in GitHub Desktop.
Save tomsmeding/1631090df10ab5ac98403304ec2b8ac9 to your computer and use it in GitHub Desktop.
Crappy, but reasonably fast, scatter plots
To build everything, use 'make'. To build only datagen or only cpu, use 'make
datagen' or 'make cpu'.
For the GPU code, you might need to change the sm_61 in 'Makefile' to match
your NVIDIA GPU version.
To generate data, use './datagen 2000000 >data.txt' or similar. Format: one
line the number N, then a line with N random [0, 1] values, then another line
with N random [0, 1] values. First line is intended to be the x coordinates,
second line is the y coordinates.
#include <cstdio>
#include <cassert>
#include "bmp.h"
int64_t bmp_rgb_encoded_size(int width, int height) {
return 54 + (3 * (int64_t)width + 3) / 4 * 4 * (int64_t)height;
}
static void write_le32(uint8_t *dst, uint32_t value) {
dst[0] = value & 0xff;
dst[1] = (value >> 8) & 0xff;
dst[2] = (value >> 16) & 0xff;
dst[3] = value >> 24;
}
static void write_le16(uint8_t *dst, uint16_t value) {
dst[0] = value & 0xff;
dst[1] = value >> 8;
}
void bmp_rgb_encode_memory(uint8_t *dst, const uint8_t *src, int width, int height) {
const int64_t size = bmp_rgb_encoded_size(width, height);
const int padding = (4 - 3 * width % 4) % 4;
dst[0] = 'B';
dst[1] = 'M';
write_le32(&dst[2], size);
dst[6] = dst[7] = dst[8] = dst[9] = 0;
write_le32(&dst[10], 54);
write_le32(&dst[14], 40); // this is a BITMAPINFOHEADER
write_le32(&dst[18], width);
write_le32(&dst[22], height);
write_le16(&dst[26], 1);
write_le16(&dst[28], 24);
write_le32(&dst[30], 0);
write_le32(&dst[34], size - 54);
write_le32(&dst[38], 0); // pixels/metre; imagemagick sets this to 0
write_le32(&dst[42], 0); // and if they can, we can
write_le32(&dst[46], 0);
write_le32(&dst[50], 0);
int off = 54;
for (int y = height - 1; y >= 0; y--) {
for (int x = 0; x < width; x++) {
dst[off+0] = src[3 * (width * y + x) + 2];
dst[off+1] = src[3 * (width * y + x) + 1];
dst[off+2] = src[3 * (width * y + x) + 0];
off += 3;
}
for (int i = 0; i < padding; i++) dst[off++] = 0;
}
assert(off == size);
}
uint8_t* bmp_rgb_encode_memory_alloc(const uint8_t *src, int width, int height) {
const int64_t size = bmp_rgb_encoded_size(width, height);
uint8_t *dst = new uint8_t[size];
bmp_rgb_encode_memory(dst, src, width, height);
return dst;
}
void bmp_rgb_encode_file(const char *fname, const uint8_t *src, int width, int height) {
const int64_t size = bmp_rgb_encoded_size(width, height);
uint8_t *data = bmp_rgb_encode_memory_alloc(src, width, height);
FILE *f = fopen(fname, "w");
fwrite(data, 1, size, f);
fclose(f);
delete[] data;
}
#pragma once
#include <cstdint>
int64_t bmp_encoded_size(int width, int height);
// src should be 3*w*h bytes
// dst should be bmp_rgb_encoded_size(w,h) bytes
void bmp_rgb_encode_memory(uint8_t *dst, const uint8_t *src, int width, int height);
// Returned array is allocated using new[]
uint8_t* bmp_rgb_encode_memory_alloc(const uint8_t *src, int width, int height);
void bmp_rgb_encode_file(const char *fname, const uint8_t *src, int width, int height);
#include <iostream>
#include <vector>
#include <cstdint>
#include <cstring>
#include <ctime>
#include "bmp.h"
#include "util.h"
// Assumes in-bounds
static void draw_pixel(uint8_t *image, size_t width, int x, int y, uint32_t clr) {
image[3 * (width * y + x) + 0] = clr >> 16;
image[3 * (width * y + x) + 1] = clr >> 8;
image[3 * (width * y + x) + 2] = clr;
}
static void draw_dot(
uint8_t *image, size_t width, size_t height, int cx, int cy, int r,
uint32_t clr
) {
for (int dy = -r; dy <= r; dy++)
for (int dx = -r; dx <= r; dx++) {
if (cx + dx < 0 || cx + dx >= (int64_t)width) continue;
if (cy + dy < 0 || cy + dy >= (int64_t)height) continue;
if (dx * dx + dy * dy > r * r) continue;
draw_pixel(image, width, cx + dx, cy + dy, clr);
}
}
int main(int argc, char **argv) {
if (argc != 4) {
std::cerr << "Usage: " << argv[0] << " <width> <height> <out.bmp>" << std::endl;
return 1;
}
const size_t width = parse_int64(argv[1]);
const size_t height = parse_int64(argv[2]);
const char *destfile = argv[3];
const double minx = 0.0, maxx = 1.0;
const double miny = 0.0, maxy = 1.0;
const int dot_radius = 4;
const double dot_radius_domain_x = dot_radius * (maxx - minx) / width;
const double dot_radius_domain_y = dot_radius * (maxy - miny) / height;
clock_t t_start = clock();
size_t N;
std::cin >> N;
std::vector<double> X(N), Y(N);
for (double &v : X) std::cin >> v;
for (double &v : Y) std::cin >> v;
std::cerr << "[cpu read] Time taken: " << (double)(clock() - t_start) / CLOCKS_PER_SEC << std::endl;
t_start = clock();
uint8_t *image = new uint8_t[3 * width * height];
memset(image, 255, 3 * width * height);
for (size_t i = 0; i < N; i++) {
if (X[i] < minx - dot_radius_domain_x || X[i] > maxx + dot_radius_domain_x) continue;
if (Y[i] < miny - dot_radius_domain_y || Y[i] > maxy + dot_radius_domain_y) continue;
const int ix = (X[i] - minx) / (maxx - minx) * (width - 1);
const int iy = (Y[i] - miny) / (maxy - miny) * (height - 1);
draw_dot(image, width, height, ix, iy, dot_radius, 0x2222FF);
}
std::cerr << "[cpu] Time taken: " << (double)(clock() - t_start) / CLOCKS_PER_SEC << std::endl;
bmp_rgb_encode_file(destfile, image, width, height);
delete[] image;
}
#include <iostream>
#include <random>
#include <cstdlib>
#include <ctime>
#include "util.h"
int main(int argc, char **argv) {
if (argc != 2) {
std::cerr << "Usage: " << argv[0] << " <N>" << std::endl;
std::cerr << "Outputs three lines; the first line contains N, the next two both N random numbers in [0,1]." << std::endl;
return 1;
}
const size_t N = parse_int64(argv[1]);
clock_t t_start = clock();
auto seed = std::random_device{}();
std::default_random_engine gen{seed};
std::uniform_real_distribution<double> distr{0, 1};
std::cout << N << std::endl;
for (size_t i = 0; i < N; i++) {
if (i != 0) std::cout.put(' ');
std::cout << distr(gen);
}
std::cout << '\n';
for (size_t i = 0; i < N; i++) {
if (i != 0) std::cout.put(' ');
std::cout << distr(gen);
}
std::cout << std::endl;
clock_t t_end = clock();
std::cerr << "[datagen] Time taken: " << (double)(t_end - t_start) / CLOCKS_PER_SEC << std::endl;
}
#include <iostream>
#include <vector>
#include <cstdint>
#include <cstring>
#include <ctime>
#include "bmp.h"
#include "util.h"
#define CUSUC(expr__) \
do { \
cudaError_t e__ = (expr__); \
if (e__ != cudaSuccess) { \
fprintf(stderr, "On %s:%d %s\n", __FILE__, __LINE__, #expr__); \
fprintf(stderr, "CUDA error: %s\n", cudaGetErrorString(e__)); \
abort(); \
} \
} while(0)
// Assumes in-bounds
__device__
static void draw_pixel(uint8_t *image, size_t width, int x, int y, uint32_t clr) {
image[3 * (width * y + x) + 0] = clr >> 16;
image[3 * (width * y + x) + 1] = clr >> 8;
image[3 * (width * y + x) + 2] = clr;
}
__device__
static void draw_dot(
uint8_t *image, size_t width, size_t height, int cx, int cy, int r,
uint32_t clr
) {
for (int dy = -r; dy <= r; dy++)
for (int dx = -r; dx <= r; dx++) {
if (cx + dx < 0 || cx + dx >= (int64_t)width) continue;
if (cy + dy < 0 || cy + dy >= (int64_t)height) continue;
if (dx * dx + dy * dy > r * r) continue;
draw_pixel(image, width, cx + dx, cy + dy, clr);
}
}
struct Params {
double minx, maxx;
double miny, maxy;
int dot_radius;
double dot_radius_domain_x;
double dot_radius_domain_y;
};
__global__
static void k_draw_points(
int N, int width, int height, uint8_t *image, double *X, double *Y, Params params
) {
const int index = blockIdx.x * blockDim.x + threadIdx.x;
if (index >= N) return;
const double xval = X[index], yval = Y[index];
const double minx = params.minx, maxx = params.maxx;
const double miny = params.miny, maxy = params.maxy;
if (xval < minx - params.dot_radius_domain_x
|| xval > maxx + params.dot_radius_domain_x
|| yval < miny - params.dot_radius_domain_y
|| yval > maxy + params.dot_radius_domain_y) {
return;
}
const int ix = (xval - minx) / (maxx - minx) * (width - 1);
const int iy = (yval - miny) / (maxy - miny) * (height - 1);
draw_dot(image, width, height, ix, iy, params.dot_radius, 0x2222FF);
}
int main(int argc, char **argv) {
if (argc != 4) {
std::cerr << "Usage: " << argv[0] << " <width> <height> <out.bmp>" << std::endl;
return 1;
}
const size_t width = parse_int64(argv[1]);
const size_t height = parse_int64(argv[2]);
const char *destfile = argv[3];
const double minx = 0.0, maxx = 1.0;
const double miny = 0.0, maxy = 1.0;
const int dot_radius = 4;
const double dot_radius_domain_x = dot_radius * (maxx - minx) / width;
const double dot_radius_domain_y = dot_radius * (maxy - miny) / height;
Params params;
params.minx = minx;
params.maxx = maxx;
params.miny = miny;
params.maxy = maxy;
params.dot_radius = dot_radius;
params.dot_radius_domain_x = dot_radius_domain_x;
params.dot_radius_domain_y = dot_radius_domain_y;
size_t N;
std::cin >> N;
clock_t t_start = clock();
std::vector<uint8_t> image(3 * width * height);
std::vector<double> X(N), Y(N);
uint8_t *imagedev = nullptr;
double *Xdev = nullptr, *Ydev = nullptr;
cudaMalloc(&imagedev, 3 * width * height);
cudaMalloc(&Xdev, N * sizeof(double));
cudaMalloc(&Ydev, N * sizeof(double));
if (!imagedev || !Xdev || !Ydev) {
std::cerr << "Failed to cudaMalloc" << std::endl;
return 1;
}
std::cerr << "[gpu malloc] Time taken: " << (double)(clock() - t_start) / CLOCKS_PER_SEC << std::endl;
t_start = clock();
for (size_t i = 0; i < N; i++) std::cin >> X[i];
for (size_t i = 0; i < N; i++) std::cin >> Y[i];
std::cerr << "[gpu read] Time taken: " << (double)(clock() - t_start) / CLOCKS_PER_SEC << std::endl;
t_start = clock();
CUSUC(cudaMemset(imagedev, 255, 3 * width * height));
CUSUC(cudaMemcpy(Xdev, X.data(), N * sizeof(double), cudaMemcpyHostToDevice));
CUSUC(cudaMemcpy(Ydev, Y.data(), N * sizeof(double), cudaMemcpyHostToDevice));
CUSUC(cudaDeviceSynchronize());
k_draw_points<<<(N + 1023) / 1024, 1024>>>(N, width, height, imagedev, Xdev, Ydev, params);
CUSUC(cudaDeviceSynchronize());
CUSUC(cudaMemcpy(image.data(), imagedev, 3 * width * height, cudaMemcpyDeviceToHost));
CUSUC(cudaDeviceSynchronize());
std::cerr << "[gpu] Time taken: " << (double)(clock() - t_start) / CLOCKS_PER_SEC << std::endl;
bmp_rgb_encode_file(destfile, image.data(), width, height);
t_start = clock();
CUSUC(cudaFree(Xdev));
CUSUC(cudaFree(Ydev));
CUSUC(cudaFree(imagedev));
std::cerr << "[gpu free] Time taken: " << (double)(clock() - t_start) / CLOCKS_PER_SEC << std::endl;
}
#include <iostream>
#include <cstdlib>
#include "util.h"
int64_t parse_int64(const char *str) {
char *endp = nullptr;
const int64_t val = strtoll(str, &endp, 10);
if (str[0] == '\0' || *endp != '\0') {
std::cerr << "Invalid number: '" << str << "'" << std::endl;
return 1;
}
return val;
}
#pragma once
#include <cstdint>
int64_t parse_int64(const char *str);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment