-
-
Save musamaanjum/f8c1119ea227a6fe63a7d95e7b464aee to your computer and use it in GitHub Desktop.
Test to measure the running time of pagemap_ioctl
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <stdlib.h> | |
#include <stdbool.h> | |
#include <string.h> | |
#include <time.h> | |
#include <unistd.h> | |
#include <sys/mman.h> | |
#define _GNU_SOURCE | |
#define _OPEN_THREADS | |
#include <pthread.h> | |
#include <errno.h> | |
#include <fcntl.h> | |
#include <stdint.h> | |
#include <sys/ioctl.h> | |
#include <linux/userfaultfd.h> | |
#include <sys/ioctl.h> | |
#include <assert.h> | |
#define LEN(region) ((region.end - region.start)/4096) | |
#define LOG(format, ...) {printf("%x:%s: " format, getpid(), __func__ __VA_OPT__(,)__VA_ARGS__);} | |
int pagemap_fd; | |
#define MAX_THREAD_COUNT 64 | |
#define PAGE_SIZE 0x1000 | |
#define TEST_TIME 3000.0 | |
static bool finish; | |
static int nthreads; | |
static volatile long long raw_writes_count, writes_time; | |
static char *mem; | |
static bool random_access, read_reset; | |
#if defined(__i386__) | |
static __inline__ unsigned long long rdtsc(void) | |
{ | |
unsigned long long int x; | |
__asm__ volatile (".byte 0x0f, 0x31" : "=A" (x)); | |
return x; | |
} | |
#elif defined(__x86_64__) | |
static __inline__ unsigned long long rdtsc(void) | |
{ | |
unsigned hi, lo; | |
__asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi)); | |
return ( (unsigned long long)lo)|( ((unsigned long long)hi)<<32 ); | |
} | |
#endif | |
struct thread_info | |
{ | |
volatile void *mem; | |
size_t npages; | |
}; | |
void *thread_proc(void *data) | |
{ | |
struct thread_info *info = data; | |
volatile unsigned char *m = info->mem; | |
size_t page; | |
unsigned long long t1, t2; | |
page = 0; | |
while (!finish) | |
{ | |
if (random_access) | |
page = rand() % info->npages; | |
else | |
page = (page + 1) % info->npages; | |
t1 = rdtsc(); | |
++*(volatile unsigned int *)(m + page * PAGE_SIZE); | |
t2 = rdtsc(); | |
__atomic_add_fetch (&raw_writes_count, 1, __ATOMIC_RELAXED); | |
__atomic_add_fetch (&writes_time, t2 - t1, __ATOMIC_RELAXED); | |
} | |
return 0; | |
} | |
double curr_time_ms(void) | |
{ | |
struct timespec current_time; | |
clock_gettime(CLOCK_MONOTONIC, ¤t_time); | |
return current_time.tv_sec * 1000.0 + | |
current_time.tv_nsec / 1000000.0; | |
} | |
static double rdtsc_c; | |
//////////////////////////////////////////////////////////////////////////////////////////////// | |
#define UFFD_FEATURE_WP_UNPOPULATED (1<<13) | |
#define UFFD_FEATURE_WP_ASYNC (1<<15) | |
#ifndef PAGEMAP_SCAN | |
/* Pagemap ioctl */ | |
#define PAGEMAP_SCAN _IOWR('f', 16, struct pm_scan_arg) | |
/* Bits are set in flags of the page_region and masks in pm_scan_args */ | |
#define PAGE_IS_WPASYNC (1 << 0) | |
#define PAGE_IS_WRITTEN (1 << 1) | |
#define PAGE_IS_FILE (1 << 2) | |
#define PAGE_IS_PRESENT (1 << 3) | |
#define PAGE_IS_SWAPPED (1 << 4) | |
#define PAGE_IS_PFNZERO (1 << 5) | |
/* | |
* struct page_region - Page region with flags | |
* @start: Start of the region | |
* @end: End of the region (exclusive) | |
* @categories: PAGE_IS_* category bitmask for the region | |
*/ | |
struct page_region { | |
__u64 start; | |
__u64 end; | |
__u64 categories; | |
}; | |
/* Flags for PAGEMAP_SCAN ioctl */ | |
#define PM_SCAN_WP_MATCHING (1 << 0) /* Write protect the pages matched. */ | |
#define PM_SCAN_CHECK_WPASYNC (1 << 1) /* Abort the scan when a non-WP-enabled page is found. */ | |
/* | |
* struct pm_scan_arg - Pagemap ioctl argument | |
* @size: Size of the structure | |
* @flags: Flags for the IOCTL | |
* @start: Starting address of the region | |
* (Ending address of the walk is also returned in it) | |
* @end: Ending address of the region | |
* @vec: Address of page_region struct array for output | |
* @vec_len: Length of the page_region struct array | |
* @max_pages: Optional limit for number of returned pages (0 = disabled) | |
* @category_inverted: PAGE_IS_* categories which values match if 0 instead of 1 | |
* @category_mask: Skip pages for which any category doesn't match | |
* @category_anyof_mask: Skip pages for which no category matches | |
* @return_mask: PAGE_IS_* categories that are to be reported in `page_region`s returned | |
*/ | |
struct pm_scan_arg { | |
__u64 size; | |
__u64 flags; | |
__u64 start; | |
__u64 end; | |
__u64 walk_end; | |
__u64 vec; | |
__u64 vec_len; | |
__u64 max_pages; | |
__u64 category_inverted; | |
__u64 category_mask; | |
__u64 category_anyof_mask; | |
__u64 return_mask; | |
}; | |
#endif | |
#define __NR_userfaultfd 323 | |
#define PAGEMAP "/proc/self/pagemap" | |
int pagemap_fd; | |
int uffd; | |
static long pagemap_ioctl(void *start, size_t len, void *vec, int vec_len, int flag, | |
int max_pages, long required_mask, long anyof_mask, long excluded_mask, | |
long return_mask) | |
{ | |
struct pm_scan_arg arg; | |
arg.start = (uintptr_t)start; | |
arg.end = (uintptr_t)start + len; | |
arg.vec = (uintptr_t)vec; | |
arg.vec_len = vec_len; | |
arg.flags = flag; | |
arg.size = sizeof(struct pm_scan_arg); | |
arg.max_pages = max_pages; | |
arg.category_mask = required_mask; | |
arg.category_anyof_mask = anyof_mask; | |
arg.category_inverted = excluded_mask; | |
arg.return_mask = return_mask; | |
return ioctl(pagemap_fd, PAGEMAP_SCAN, &arg); | |
} | |
int init_uffd(void) | |
{ | |
struct uffdio_api uffdio_api; | |
uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK); | |
if (uffd == -1) | |
exit(1); | |
uffdio_api.api = UFFD_API; | |
uffdio_api.features = UFFD_FEATURE_WP_UNPOPULATED | UFFD_FEATURE_WP_ASYNC | | |
UFFD_FEATURE_WP_HUGETLBFS_SHMEM; | |
if (ioctl(uffd, UFFDIO_API, &uffdio_api)) | |
exit(1); | |
if (!(uffdio_api.api & UFFDIO_REGISTER_MODE_WP) || | |
!(uffdio_api.features & UFFD_FEATURE_WP_UNPOPULATED) || | |
!(uffdio_api.features & UFFD_FEATURE_WP_ASYNC) || | |
!(uffdio_api.features & UFFD_FEATURE_WP_HUGETLBFS_SHMEM)) | |
exit(1); | |
return 0; | |
} | |
int wp_init(void *start, size_t size) | |
{ | |
struct uffdio_register uffdio_register; | |
struct uffdio_writeprotect wp; | |
madvise( start, size, MADV_NOHUGEPAGE ); | |
uffdio_register.range.start = (uintptr_t)start; | |
uffdio_register.range.len = size; | |
uffdio_register.mode = UFFDIO_REGISTER_MODE_WP; | |
if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) | |
exit(1); | |
if (!(uffdio_register.ioctls & UFFDIO_WRITEPROTECT)) | |
exit(1); | |
wp.range.start = (uintptr_t)start; | |
wp.range.len = size; | |
wp.mode = UFFDIO_WRITEPROTECT_MODE_WP; | |
if (ioctl(uffd, UFFDIO_WRITEPROTECT, &wp)) | |
exit(1); | |
return 0; | |
} | |
int wp_free(void *start, size_t size) | |
{ | |
struct uffdio_register uffdio_register; | |
uffdio_register.range.start = (uintptr_t)start; | |
uffdio_register.range.len = size; | |
uffdio_register.mode = UFFDIO_REGISTER_MODE_WP; | |
if (ioctl(uffd, UFFDIO_UNREGISTER, &uffdio_register.range)) | |
exit(1); | |
return 0; | |
} | |
int wp_addr_range(void *lpBaseAddress, int dwRegionSize) | |
{ | |
struct uffdio_writeprotect wp; | |
wp.range.start = (unsigned long)lpBaseAddress; | |
wp.range.len = dwRegionSize; | |
wp.mode = UFFDIO_WRITEPROTECT_MODE_WP; | |
if (ioctl(uffd, UFFDIO_WRITEPROTECT, &wp)) | |
exit(1); | |
return 0; | |
} | |
int wp_addr_range_ioctl(void *start, size_t size) | |
{ | |
int ret; | |
ret = pagemap_ioctl(start, size, NULL, 0, PM_SCAN_WP_MATCHING, | |
0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); | |
if (ret < 0) { | |
LOG("%p, %p.\n", start, (void *)size); | |
perror("pagemap_ioctl"); | |
exit(1); | |
} | |
return 0; | |
} | |
struct page_region *buf; | |
int read_reset_dirty(int reset, char *start, size_t len, void **vec, unsigned int *ww_count, | |
unsigned int *granularity) | |
{ | |
struct pm_scan_arg arg; | |
int i, ret; | |
uint64_t addr; | |
arg.start = (uintptr_t)start; | |
arg.end = (uintptr_t)start + len; | |
arg.vec = (uintptr_t)buf; | |
arg.vec_len = *ww_count; | |
arg.flags = 0; | |
if (reset) | |
arg.flags |= PM_SCAN_WP_MATCHING; | |
arg.size = sizeof(struct pm_scan_arg); | |
arg.max_pages = *ww_count; | |
arg.category_mask = PAGE_IS_WRITTEN; | |
arg.category_anyof_mask = 0; | |
arg.category_inverted = 0; | |
arg.return_mask = PAGE_IS_WRITTEN; | |
if (granularity) | |
*granularity = 4096; | |
ret = ioctl(pagemap_fd, PAGEMAP_SCAN, &arg); | |
assert(ret >= 0); | |
*ww_count = 0; | |
for (i = 0; i < ret; i++) { | |
for (addr = buf[i].start; addr != buf[i].end; addr += 0x1000) | |
*vec++ = (void *)(uintptr_t)addr; | |
*ww_count += LEN(buf[i]); | |
} | |
ret = 0; | |
return ret; | |
} | |
int reset_dirty(void *start, size_t size) | |
{ | |
return wp_addr_range_ioctl(start, size); | |
} | |
int main(int argc, char *argv[]) | |
{ | |
unsigned int ww_count; | |
long long ww_total, cycle_count; | |
struct thread_info info[MAX_THREAD_COUNT]; | |
unsigned long long t1, t2, rdtsc_start, rdtsc_end; | |
double start, curr, cycle_start; | |
static void **ww_addr; | |
long long writes_count; | |
unsigned long long wwtot_time, wwreset_time; | |
double rw_delay_ms; | |
unsigned int granularity; | |
unsigned int i; | |
int get_count; | |
size_t npages; | |
pthread_t th; | |
pagemap_fd = open(PAGEMAP, O_RDONLY); | |
if (pagemap_fd < 0) { | |
perror("pagemapfd"); | |
return -EINVAL; | |
} | |
if (init_uffd()) | |
return -1; | |
if (argc < 6) { | |
puts("Usage: win.exe <nthreads> <npages> <watch_delay_ms> <random_access> <read_reset>\n"); | |
return -1; | |
} | |
nthreads = atoi(argv[1]); | |
if (nthreads > MAX_THREAD_COUNT) { | |
LOG("Maximum of %u threads supported.\n", MAX_THREAD_COUNT); | |
return -1; | |
} | |
npages = atoi(argv[2]); | |
if (npages < nthreads || npages % nthreads) { | |
LOG("npages should be > nthreads and evenly divisible by nthreads.\n"); | |
return -1; | |
} | |
rw_delay_ms = atof(argv[3]); | |
random_access = atoi(argv[4]); | |
read_reset = atoi(argv[5]); | |
ww_addr = malloc(sizeof(*ww_addr) * npages); | |
buf = malloc(100000 * sizeof(struct page_region)); | |
mem = mmap(NULL, npages * 0x1000, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); | |
if (!mem) { | |
perror("Error commit"); | |
return -1; | |
} | |
wp_init(mem, npages * 0x1000); | |
wp_addr_range(mem, npages * 0x1000); | |
for (i = 0; i < nthreads; ++i) { | |
info[i].mem = mem + 0x1000 * i * npages / nthreads; | |
info[i].npages = npages / nthreads; | |
pthread_create(&th, NULL, thread_proc, &info[i]); | |
} | |
get_count = npages; | |
wwreset_time = wwtot_time = 0; | |
curr = start = curr_time_ms(); | |
ww_total = 0; | |
cycle_count = 0; | |
rdtsc_start = rdtsc(); | |
while (curr - start < TEST_TIME) | |
{ | |
cycle_start = curr; | |
ww_count = get_count; | |
t1 = rdtsc(); | |
if (read_reset_dirty(read_reset, mem, npages * PAGE_SIZE, | |
ww_addr, &ww_count, &granularity)) { | |
LOG("GetWriteWatch() failed, GetLastError() %d.\n", errno); | |
return -1; | |
} | |
assert((char *)ww_addr[0] >= mem); | |
ww_total += ww_count; | |
if (!read_reset) | |
{ | |
unsigned long long t1, t2; | |
t1 = rdtsc(); | |
reset_dirty(mem, npages * PAGE_SIZE); | |
t2 = rdtsc(); | |
wwreset_time += t2 - t1; | |
} | |
t2 = rdtsc(); | |
wwtot_time += t2 - t1; | |
curr = curr_time_ms(); | |
while (curr - start < TEST_TIME && curr - cycle_start < rw_delay_ms) { | |
sched_yield(); | |
curr = curr_time_ms(); | |
} | |
++cycle_count; | |
} | |
rdtsc_end = rdtsc(); | |
writes_count = raw_writes_count; | |
finish = true; | |
rdtsc_c = 1000.0 * (curr - start) / (rdtsc_end - rdtsc_start); | |
LOG("Elapsed %.1lfms, cycle_count %llu, writes_count %lld, writes watched %llu.\n", | |
curr - start, cycle_count, writes_count, ww_total); | |
LOG("writes per thread * msec %.3lf, avg. write time %.3lfns, GetWriteWatch() avg %.1lfmcs (reset %.1lf)\n", | |
writes_count / (TEST_TIME * nthreads), 1000.0 * writes_time * rdtsc_c / writes_count, wwtot_time * rdtsc_c / cycle_count, wwreset_time * rdtsc_c / cycle_count); | |
free(buf); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment