Skip to content

Instantly share code, notes, and snippets.

@notogawa
Created February 10, 2020 12:22
Show Gist options
  • Save notogawa/36d0cc9168ae3236902729f26064281d to your computer and use it in GitHub Desktop.
Save notogawa/36d0cc9168ae3236902729f26064281d to your computer and use it in GitHub Desktop.
#include <stdio.h>
#include <string.h>
#include <stdint.h>
#include <assert.h>
#include <sys/time.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
typedef struct {
uint32_t size;
uint32_t flags;
uint32_t handle;
uint32_t offset;
} drm_v3d_create_bo;
typedef struct {
uint32_t handle;
uint32_t flags;
uint64_t offset;
} drm_v3d_mmap_bo;
typedef struct {
uint32_t handle;
uint32_t pad;
} gem_close;
typedef struct {
uint32_t handle;
uint32_t pad;
uint64_t timeout_ns;
} drm_v3d_wait_bo;
typedef struct {
uint32_t cfg[7];
uint32_t coef[4];
uint64_t bo_handles;
uint32_t bo_handle_count;
uint32_t in_sync;
uint32_t out_sync;
} drm_v3d_submit_csd;
#define DRM_IOCTL_BASE 'd'
#define DRM_COMMAND_BASE 0x40
#define DRM_GEM_CLOSE 0x09
#define DRM_V3D_WAIT_BO (DRM_COMMAND_BASE + 0x01)
#define DRM_V3D_CREATE_BO (DRM_COMMAND_BASE + 0x02)
#define DRM_V3D_MMAP_BO (DRM_COMMAND_BASE + 0x03)
#define DRM_V3D_WAIT_BO (DRM_COMMAND_BASE + 0x01)
#define DRM_V3D_SUBMIT_CSD (DRM_COMMAND_BASE + 0x07)
#define IOCTL_GEM_CLOSE _IOW(DRM_IOCTL_BASE, DRM_GEM_CLOSE, gem_close)
#define IOCTL_V3D_CREATE_BO _IOWR(DRM_IOCTL_BASE, DRM_V3D_CREATE_BO, drm_v3d_create_bo)
#define IOCTL_V3D_MMAP_BO _IOWR(DRM_IOCTL_BASE, DRM_V3D_MMAP_BO, drm_v3d_mmap_bo)
#define IOCTL_V3D_WAIT_BO _IOWR(DRM_IOCTL_BASE, DRM_V3D_WAIT_BO, drm_v3d_wait_bo)
#define IOCTL_V3D_SUBMIT_CSD _IOW(DRM_IOCTL_BASE, DRM_V3D_SUBMIT_CSD, drm_v3d_submit_csd)
static uint64_t do_nothing[] = {
0x3c203186bb800000, // nop; thrsw
0x3c203186bb800000, // nop; thrsw
0x3c003186bb800000, // nop
0x3c003186bb800000, // nop
0x3c203186bb800000, // nop; thrsw
0x3c003186bb800000, // nop
0x3c003186bb800000, // nop
0x3c003186bb800000, // nop
};
static int submit_csd(int fd, uint32_t phyaddr, uint32_t handle) {
const uint32_t wg_x = 1;
const uint32_t wg_y = 1;
const uint32_t wg_z = 1;
const uint32_t wg_size = wg_x * wg_y * wg_z;
const uint32_t wgs_per_sg = 1;
const uint32_t bo_handles[] = { handle };
drm_v3d_submit_csd csd;
csd.cfg[0] = wg_x << 16;
csd.cfg[1] = wg_y << 16;
csd.cfg[2] = wg_z << 16;
csd.cfg[3] =
((((wgs_per_sg * wg_size + 16u - 1u) / 16u) - 1u) << 12) |
(wgs_per_sg << 8) |
(wg_size & 0xff);
csd.cfg[4] = 0;
csd.cfg[5] = phyaddr;
csd.cfg[6] = 0;
csd.coef[0] = 0;
csd.coef[1] = 0;
csd.coef[2] = 0;
csd.coef[3] = 0;
csd.bo_handles = (uintptr_t)bo_handles;
csd.bo_handle_count = sizeof(bo_handles)/sizeof(bo_handles[0]);
csd.in_sync = 0;
csd.out_sync = 0;
return ioctl(fd, IOCTL_V3D_SUBMIT_CSD, &csd);
}
static int wait_bo(int fd, uint32_t handle) {
drm_v3d_wait_bo wait;
wait.handle = handle;
wait.pad = 0;
wait.timeout_ns = 10e9;
return ioctl(fd, IOCTL_V3D_WAIT_BO, &wait);
}
static double get_time() {
struct timeval t;
gettimeofday(&t, NULL);
return (double)t.tv_sec + t.tv_usec * 1e-6;
}
int main() {
int fd = open("/dev/dri/card0", O_RDWR);
assert(fd > 0);
drm_v3d_create_bo create_bo;
create_bo.size = sizeof(do_nothing);
create_bo.flags = 0;
{
int res = ioctl(fd, IOCTL_V3D_CREATE_BO, &create_bo);
assert(res == 0);
}
uint32_t handle = create_bo.handle;
uint32_t phyaddr = create_bo.offset;
drm_v3d_mmap_bo mmap_bo;
mmap_bo.handle = handle;
mmap_bo.flags = 0;
{
int res = ioctl(fd, IOCTL_V3D_MMAP_BO, &mmap_bo);
assert(res == 0);
}
void* usraddr = mmap(NULL, sizeof(do_nothing), PROT_READ | PROT_WRITE, MAP_SHARED, fd, mmap_bo.offset);
assert(usraddr != MAP_FAILED);
memcpy(usraddr, do_nothing, sizeof(do_nothing));
for (int submit_times = 1; submit_times < 11; ++submit_times) {
printf("[submit x%d]\n", submit_times);
for (int try = 0; try < 5; ++try) {
double start = get_time();
for (int i = 0; i < submit_times; ++i) {
submit_csd(fd, phyaddr, handle);
}
wait_bo(fd, handle);
double end = get_time();
printf(" try %d: %.6lf sec\n", try+1, end - start);
}
}
{
int res = munmap(usraddr, sizeof(do_nothing));
assert(res == 0);
}
gem_close cl;
cl.handle = handle;
ioctl(fd, IOCTL_GEM_CLOSE, &cl);
return 0;
}
[submit x1]
try 1: 0.021083 sec
try 2: 0.029954 sec
try 3: 0.029995 sec
try 4: 0.029996 sec
try 5: 0.039995 sec
[submit x2]
try 1: 0.060004 sec
try 2: 0.059991 sec
try 3: 0.059994 sec
try 4: 0.059995 sec
try 5: 0.059996 sec
[submit x3]
try 1: 0.089996 sec
try 2: 0.089995 sec
try 3: 0.089998 sec
try 4: 0.089993 sec
try 5: 0.089999 sec
[submit x4]
try 1: 0.119992 sec
try 2: 0.119995 sec
try 3: 0.119996 sec
try 4: 0.089998 sec
try 5: 0.119993 sec
[submit x5]
try 1: 0.149996 sec
try 2: 0.149995 sec
try 3: 0.149996 sec
try 4: 0.149995 sec
try 5: 0.149995 sec
[submit x6]
try 1: 0.180001 sec
try 2: 0.179991 sec
try 3: 0.179996 sec
try 4: 0.179997 sec
try 5: 0.179993 sec
[submit x7]
try 1: 0.209996 sec
try 2: 0.209996 sec
try 3: 0.209997 sec
try 4: 0.209994 sec
try 5: 0.209997 sec
[submit x8]
try 1: 0.239995 sec
try 2: 0.239995 sec
try 3: 0.239995 sec
try 4: 0.240000 sec
try 5: 0.239991 sec
[submit x9]
try 1: 0.269993 sec
try 2: 0.269998 sec
try 3: 0.269995 sec
try 4: 0.269996 sec
try 5: 0.269996 sec
[submit x10]
try 1: 0.299996 sec
try 2: 0.299995 sec
try 3: 0.299994 sec
try 4: 0.299996 sec
try 5: 0.299996 sec
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment