Skip to content

Instantly share code, notes, and snippets.

@joaomlneto
Last active October 20, 2018 00:19
Show Gist options
  • Save joaomlneto/0ede391fa2178791b28bc10ef8936400 to your computer and use it in GitHub Desktop.
Save joaomlneto/0ede391fa2178791b28bc10ef8936400 to your computer and use it in GitHub Desktop.
Quirks of `mbind` `MPOL_INTERLEAVE`
#define _GNU_SOURCE
#include <sched.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <numa.h>
#include <numaif.h>
void printmask(char *name, struct bitmask *mask) {
printf("%s: ", name);
for (int i = 0; i < mask->size; i++)
if (numa_bitmask_isbitset(mask, i))
printf("%d ", i);
putchar('\n');
}
int main() {
cpu_set_t mask;
long nproc, i;
struct bitmask *numa_nodes = numa_allocate_nodemask();
if (sched_getaffinity(0, sizeof(cpu_set_t), &mask) == -1) {
perror("sched_getaffinity");
exit(-1);
}
nproc = sysconf(_SC_NPROCESSORS_ONLN);
for (i = 0; i < nproc; i++) {
if (CPU_ISSET(i, &mask)) {
printf("core %d (NUMA node %d)\n", i, numa_node_of_cpu(i));
numa_bitmask_setbit(numa_nodes, numa_node_of_cpu(i));
}
}
printmask("worker nodes", numa_nodes);
return 0;
}
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sched.h>
#include <sys/syscall.h>
#include <numaif.h>
#include <numa.h>
#define N ((1<<28) / sizeof(int))
#define PAGE_SIZE sysconf(_SC_PAGESIZE)
#define PAGE_MASK (~(PAGE_SIZE - 1))
void printmask(char *name, struct bitmask *mask) {
printf("%s (size=%d) ", name, mask->size);
for (int i = 0; i < mask->size; i++)
if (numa_bitmask_isbitset(mask, i))
printf("%d ", i);
putchar('\n');
}
void print_command(char *cmd) {
FILE *fp;
char buf[1024];
if ((fp = popen(cmd, "r")) == NULL) {
perror("popen");
exit(-1);
}
while(fgets(buf, sizeof(buf), fp) != NULL) {
printf("%s", buf);
}
if(pclose(fp)) {
perror("pclose");
exit(-1);
}
}
void print_node_allocations() {
char buf[1024];
snprintf(buf, sizeof(buf), "numastat -c %d", getpid());
printf("\x1B[32m");
print_command(buf);
printf("\x1B[0m");
}
int main(int argc, char **argv) {
int *a = numa_alloc_local(N * sizeof(int));
size_t len = (N * sizeof(int)) & PAGE_MASK;
unsigned long mymask = *numa_get_mems_allowed()->maskp;
unsigned long maxnode = numa_get_mems_allowed()->size;
// pin thread to core zero
cpu_set_t mask;
CPU_ZERO(&mask);
CPU_SET(0, &mask);
if (sched_setaffinity(syscall(SYS_gettid), sizeof(mask), &mask) < 0) {
perror("sched_setaffinity");
exit(-1);
}
// initialize array
printf("\n\n(1) array allocated on local node\n");
a[0] = 997;
for(size_t i=1; i < N; i++) {
a[i] = a[i-1] * a[i-1] % 1000000000;
}
print_node_allocations();
// attempt to get it to be uniformly interleaved on all nodes
printf("\n\n(2) array interleaved on all nodes\n");
if (mbind(a, len, MPOL_INTERLEAVE, &mymask, maxnode, MPOL_MF_MOVE_ALL | MPOL_MF_STRICT) == -1) {
perror("mbind failed");
exit(-1);
}
print_node_allocations();
printf("doesn't seem to do anything...\n");
// what if we interleave on all but the local node?
printf("\n\n(3) array interleaved on all nodes (except local node)\n");
mymask -= 0x01;
if (mbind(a, len, MPOL_INTERLEAVE, &mymask, maxnode, MPOL_MF_MOVE_ALL | MPOL_MF_STRICT) == -1) {
perror("mbind failed");
exit(-1);
}
print_node_allocations();
// attempt to get it to be uniformly interleaved on all nodes
printf("\n\n(4) array interleaved on all nodes except second node\n");
mymask = mymask + 0x01 - 0x02;
if (mbind(a, len, MPOL_INTERLEAVE, &mymask, maxnode, MPOL_MF_MOVE_ALL | MPOL_MF_STRICT) == -1) {
perror("mbind failed");
exit(-1);
}
print_node_allocations();
printf("interleave will only move data from the nodes that aren't in the mask\n");
printf("there's no way to interleave everything on all nodes with a single `mbind` call\n");
return 0;
}
$ clear; cc mbind_interleave_test.c -g -lnuma && sudo ./a.out
(1) array allocated on local node
Per-node process memory usage (in MBs) for PID 22835 (a.out)
Node 0 Node 1 Node 2 Node 3 Node 4 Node 5 Node 6 Node 7 Total
------ ------ ------ ------ ------ ------ ------ ------ -----
Huge 0 0 0 0 0 0 0 0 0
Heap 0 0 0 0 0 0 0 0 0
Stack 0 0 0 0 0 0 0 0 0
Private 256 0 0 1 0 0 0 0 257
------- ------ ------ ------ ------ ------ ------ ------ ------ -----
Total 256 0 0 1 0 0 0 0 257
(2) array interleaved on all nodes
Per-node process memory usage (in MBs) for PID 22835 (a.out)
Node 0 Node 1 Node 2 Node 3 Node 4 Node 5 Node 6 Node 7 Total
------ ------ ------ ------ ------ ------ ------ ------ -----
Huge 0 0 0 0 0 0 0 0 0
Heap 0 0 0 0 0 0 0 0 0
Stack 0 0 0 0 0 0 0 0 0
Private 256 0 0 1 0 0 0 0 257
------- ------ ------ ------ ------ ------ ------ ------ ------ -----
Total 256 0 0 1 0 0 0 0 257
doesn't seem to do anything...
(3) array interleaved on all nodes (except local node)
Per-node process memory usage (in MBs) for PID 22835 (a.out)
Node 0 Node 1 Node 2 Node 3 Node 4 Node 5 Node 6 Node 7 Total
------ ------ ------ ------ ------ ------ ------ ------ -----
Huge 0 0 0 0 0 0 0 0 0
Heap 0 0 0 0 0 0 0 0 0
Stack 0 0 0 0 0 0 0 0 0
Private 0 37 37 38 37 37 37 37 257
------- ------ ------ ------ ------ ------ ------ ------ ------ -----
Total 0 37 37 38 37 37 37 37 257
(4) array interleaved on all nodes except second node
Per-node process memory usage (in MBs) for PID 22835 (a.out)
Node 0 Node 1 Node 2 Node 3 Node 4 Node 5 Node 6 Node 7 Total
------ ------ ------ ------ ------ ------ ------ ------ -----
Huge 0 0 0 0 0 0 0 0 0
Heap 0 0 0 0 0 0 0 0 0
Stack 0 0 0 0 0 0 0 0 0
Private 37 0 37 38 37 37 37 37 257
------- ------ ------ ------ ------ ------ ------ ------ ------ -----
Total 37 0 37 38 37 37 37 37 257
interleave will only move data from the nodes that aren't in the mask
there's no way to interleave everything on all nodes with a single `mbind` call
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment