-
-
Save manodeep/23e5a2cc037752ce861bdb2998314137 to your computer and use it in GitHub Desktop.
C code to reproduce memory race in WCSLIB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <stdlib.h> | |
#include <omp.h> | |
#include <assert.h> | |
#include <string.h> | |
#include <time.h> | |
#include "wcslib.h" | |
typedef int (*roundtrip_coords_impls)(const int N, double *pixcrd, double *imgcrd, double *phi, double*theta, double *world, int *stat, double *dest, struct wcsprm *wcs, const int nthreads); | |
int set_wcs(struct wcsprm *wcs) | |
{ | |
//This replicates the wcs setup in astropy issue 16245 | |
assert(wcs != NULL); | |
assert(wcs->naxis == 2); | |
wcs->crpix[0] = -234.75; | |
wcs->crpix[1] = 8.3393; | |
wcs->cdelt[0] = -0.066667; | |
wcs->cdelt[1] = 0.066667; | |
wcs->crval[0] = 0.0; | |
wcs->crval[1] = -90.0; | |
sprintf(wcs->ctype[0], "RA---AIR"); | |
sprintf(wcs->ctype[1], "DEC--AIR"); | |
return wcsset(wcs); | |
} | |
int setup_random_pixcrd(const int N, const double min, const double max, double *pixcrd) | |
{ | |
unsigned int seed = 292364782; | |
for(int i=0;i<N;i++) { | |
double r = (double)rand_r(&seed)/(double)RAND_MAX; | |
pixcrd[i] = r*(max - min) + min; | |
r = (double)rand_r(&seed)/(double)RAND_MAX; | |
pixcrd[i+N] = r*(max - min) + min; | |
} | |
return EXIT_SUCCESS; | |
} | |
int distribute_compute_over_ntasks(const int totn, const int NTasks, const int ThisTask, int *n_thistask, int *start) | |
{ | |
if(ThisTask > NTasks || ThisTask < 0 || NTasks < 1) { | |
fprintf(stderr,"Error: ThisTask = %d and NTasks = %d must satisfy i) ThisTask < NTasks, ii) ThisTask > 0 and iii) NTasks >= 1\n", | |
ThisTask, NTasks); | |
return EXIT_FAILURE; | |
} | |
if(totn < 0) { | |
fprintf(stderr,"Error: On ThisTask = %d: total number = %d must be >= 0\n", ThisTask, totn); | |
return EXIT_FAILURE; | |
} | |
if(totn == 0) { | |
fprintf(stderr,"Warning: Got 0 forests to distribute! Returning\n"); | |
*n_thistask = 0; | |
*start = 0; | |
return EXIT_SUCCESS; | |
} | |
// Assign each task an equal number of compute units. If we can't equally assign each task | |
// the EXACT same number of compute, give each task an extra unit (if required). | |
const int n_per_cpu = totn/NTasks; | |
const int rem_n = totn % NTasks; | |
int n_this_task = n_per_cpu; | |
if(ThisTask < rem_n) { | |
n_this_task++; | |
} | |
int64_t start_num = n_per_cpu * ThisTask; | |
if(ThisTask < rem_n) { | |
start_num += ThisTask; | |
} else { | |
start_num += rem_n; // All tasks that weren't given an extra forest will be offset by a constant amount. | |
} | |
/* Now fill up the destination */ | |
*n_thistask = n_this_task; | |
*start = start_num; | |
return EXIT_SUCCESS; | |
} | |
int roundtrip_coords_buggy(const int N, double *pixcrd, double *imgcrd, double *phi, double *theta, double *world, int *stat, double *dest, struct wcsprm *wcs, const int nthreads) | |
{ | |
const int naxis = wcs->naxis; | |
fprintf(stderr,"Using %s with nthreads = %d\n", __FUNCTION__, nthreads); | |
#pragma omp parallel num_threads(nthreads) shared(wcs, pixcrd, imgcrd, phi, theta, world, stat, dest) | |
{ | |
assert(omp_get_num_threads() == nthreads); | |
const int tid = omp_get_thread_num(); | |
int start, n_thistask; | |
distribute_compute_over_ntasks(N, nthreads, tid, &n_thistask, &start); | |
int end = start + n_thistask; | |
const int offset = 2*start; | |
double *local_pixcrd = pixcrd + offset; | |
double *local_imgcrd = imgcrd + offset; | |
double *local_phi = phi + offset; | |
double *local_theta = theta + offset; | |
double *local_world = world + offset; | |
int *local_stat = stat + offset; | |
double *local_dest = dest + offset; | |
fprintf(stderr,"On thread %d: start = %d, end = %d n_thistask = %d\n", tid, start, end, n_thistask); | |
int status = wcsp2s(wcs, n_thistask, naxis, local_pixcrd, local_imgcrd, local_phi, local_theta, local_world, local_stat); | |
if(status != EXIT_SUCCESS) { | |
fprintf(stderr, "wcsp2s failed with status %d\n", status); | |
} | |
//calling wcsset() causes the race condition (needs nowait to trigger it) | |
wcsset(wcs); | |
//convert back to pixel coordinates | |
status = wcss2p(wcs, n_thistask, naxis, local_world, local_phi, local_theta, local_imgcrd, local_pixcrd, local_stat); | |
if(status != EXIT_SUCCESS) { | |
fprintf(stderr, "wcss2p failed with status %d\n", status); | |
} | |
memcpy(local_dest, local_pixcrd, 2*n_thistask*sizeof(*local_dest)); | |
} | |
fprintf(stderr,"Using %s with nthreads = %d...done\n", __FUNCTION__, nthreads); | |
return EXIT_SUCCESS; | |
} | |
int roundtrip_coords_omp_barrier(const int N, double *pixcrd, double *imgcrd, double *phi, double *theta, double *world, int *stat, double *dest, struct wcsprm *wcs, const int nthreads) | |
{ | |
const int naxis = wcs->naxis; | |
fprintf(stderr,"Using %s with nthreads = %d\n", __FUNCTION__, nthreads); | |
#pragma omp parallel num_threads(nthreads) shared(wcs, pixcrd, imgcrd, phi, theta, world, stat, dest) | |
{ | |
assert(omp_get_num_threads() == nthreads); | |
const int tid = omp_get_thread_num(); | |
int start, n_thistask; | |
distribute_compute_over_ntasks(N, nthreads, tid, &n_thistask, &start); | |
int end = start + n_thistask; | |
const int offset = 2*start; | |
double *local_pixcrd = pixcrd + offset; | |
double *local_imgcrd = imgcrd + offset; | |
double *local_phi = phi + offset; | |
double *local_theta = theta + offset; | |
double *local_world = world + offset; | |
int *local_stat = stat + offset; | |
double *local_dest = dest + offset; | |
fprintf(stderr,"On thread %d: start = %d, end = %d n_thistask = %d\n", tid, start, end, n_thistask); | |
int status = wcsp2s(wcs, n_thistask, naxis, local_pixcrd, local_imgcrd, local_phi, local_theta, local_world, local_stat); | |
if(status != EXIT_SUCCESS) { | |
fprintf(stderr, "wcsp2s failed with status %d\n", status); | |
} | |
//calling wcsset() causes the race condition (needs nowait to trigger it) | |
#pragma omp barrier | |
#pragma omp single | |
wcsset(wcs); | |
#pragma omp barrier | |
//convert back to pixel coordinates | |
status = wcss2p(wcs, n_thistask, naxis, local_world, local_phi, local_theta, local_imgcrd, local_pixcrd, local_stat); | |
if(status != EXIT_SUCCESS) { | |
fprintf(stderr, "wcss2p failed with status %d\n", status); | |
} | |
memcpy(local_dest, local_pixcrd, 2*n_thistask*sizeof(*local_dest)); | |
} | |
fprintf(stderr,"Using %s with nthreads = %d...done\n", __FUNCTION__, nthreads); | |
return EXIT_SUCCESS; | |
} | |
int count_n_mismatches(const int N, const double *input, const double *output) | |
{ | |
int n_mismatches = 0; | |
#pragma omp simd reduction(+:n_mismatches) | |
for(int i=0;i<2*N;i++) { | |
if(fabs(input[i] - output[i]) > 1e-8) { | |
n_mismatches++; | |
// fprintf(stderr, "Mismatch at i=%d: input = %g, output = %g fabs(diff) = %0.10e\n", | |
// i, input[i], output[i], fabs(input[i] - output[i])); | |
} | |
} | |
return n_mismatches/2; | |
} | |
int main(int argc, char **argv) | |
{ | |
int N = 100; | |
int max_numthreads = 1; | |
if(argc >= 2) { | |
N = atoi(argv[1]); | |
} | |
if(argc >= 3) { | |
max_numthreads = atoi(argv[2]); | |
} | |
struct wcsprm wcs; | |
memset(&wcs, 0, sizeof(wcs)); | |
const int naxis = 2; | |
wcsini(1, naxis, &wcs); | |
set_wcs(&wcs); | |
double *pixcrd = malloc(2*N*sizeof(*pixcrd)); | |
double *imgcrd = malloc(2*N*sizeof(*imgcrd)); | |
double *phi = malloc(2*N*sizeof(*phi)); | |
double *theta = malloc(2*N*sizeof(*theta)); | |
double *world = malloc(2*N*sizeof(*world)); | |
int *stat = malloc(2*N*sizeof(*stat)); | |
double *dest = malloc(2*N*sizeof(*dest)); | |
double *pixcrd_orig = malloc(2*N*sizeof(*pixcrd_orig)); | |
if(pixcrd == NULL || dest == NULL || | |
imgcrd == NULL || phi == NULL || | |
theta == NULL || world == NULL || | |
stat == NULL || pixcrd_orig == NULL) | |
{ | |
fprintf(stderr, "Memory allocation failed for N = %d\n", N); | |
return EXIT_FAILURE; | |
} | |
const roundtrip_coords_impls all_functions[] = { &roundtrip_coords_omp_barrier, &roundtrip_coords_buggy}; | |
const char *function_names[72] = { "roundtrip_coords_omp_barrier", "roundtrip_coords_buggy"}; | |
const int n_functions = sizeof(all_functions)/sizeof(all_functions[0]); | |
const double min = -1000.0; | |
const double max = 1000.0; | |
setup_random_pixcrd(N, min, max, pixcrd); | |
memcpy(pixcrd_orig, pixcrd, 2*N*sizeof(*pixcrd)); | |
for(int nthreads=1;nthreads<=max_numthreads;nthreads++) { | |
for(int ifunc=0;ifunc<n_functions;ifunc++) { | |
struct timespec t0, t1; | |
clock_gettime(CLOCK_MONOTONIC, &t0); | |
int status = all_functions[ifunc](N, pixcrd, imgcrd, phi, theta, world, stat, dest, &wcs, nthreads); | |
if(status != EXIT_SUCCESS) { | |
fprintf(stderr, "%s failed with status %d\n", function_names[ifunc], status); | |
return status; | |
} | |
clock_gettime(CLOCK_MONOTONIC, &t1); | |
const double func_time = (t1.tv_sec - t0.tv_sec) + 1e-9*(t1.tv_nsec - t0.tv_nsec); | |
const int n_mismatches = count_n_mismatches(N, pixcrd_orig, dest); | |
fprintf(stderr,"Called function = '%s' with nthreads=%d, N=%d, n_mismatches = %d. Time taken = %g seconds\n", | |
function_names[ifunc], nthreads, N, n_mismatches, func_time); | |
memcpy(pixcrd, pixcrd_orig, 2*N*sizeof(*pixcrd)); | |
} | |
} | |
wcsfree(&wcs); | |
free(pixcrd); | |
free(world); | |
free(theta); | |
free(imgcrd); | |
free(phi); | |
free(dest); | |
free(stat); | |
free(pixcrd_orig); | |
return EXIT_SUCCESS; | |
} | |
//I created the file in the cextern/wcslib/C directory and compiled with: | |
//clang -g -I.. -Wall -Wextra -fsanitize=leak -fsanitize=undefined -fsanitize=bounds -fsanitize=address -fsanitize-undefined-trap-on-error -fstack-protector-all test_wcs_threads.c -std=c99 -O3 -Wl,-no_compact_unwind -fopenmp=libomp libwcs-PIC.a -o test_wcs_threads | |
//Running on a single thread with 1 million points produces same results after roundtrip (tol = 1e-8): | |
/* | |
On thread 0: start = 0, end = 10000000 n_thistask = 10000000 | |
Called function = 'roundtrip_coords_omp_barrier' with nthreads=1, N=10000000, n_mismatches = 0. Time taken = 2.9328 seconds | |
On thread 0: start = 0, end = 10000000 n_thistask = 10000000 | |
Called function = 'roundtrip_coords_buggy' with nthreads=1, N=10000000, n_mismatches = 0. Time taken = 2.76603 seconds | |
*/ | |
//Running on multiple threads sometimes produces a crash with double-free, and sometimes the n_mismatches > 0: | |
/* | |
(.venv) [~/sorsery-consulting/2024/reproject-astropy-wcs/astropy/cextern/wcslib/C @HAW10013056] ./test_wcs_threads 100000 2 | |
On thread 0: start = 0, end = 100000 n_thistask = 100000 | |
Called function = 'roundtrip_coords_omp_barrier' with nthreads=1, N=100000, n_mismatches = 0. Time taken = 0.029945 seconds | |
On thread 0: start = 0, end = 100000 n_thistask = 100000 | |
Called function = 'roundtrip_coords_buggy' with nthreads=1, N=100000, n_mismatches = 0. Time taken = 0.028093 seconds | |
On thread 0: start = 0, end = 50000 n_thistask = 50000 | |
On thread 1: start = 50000, end = 100000 n_thistask = 50000 | |
Called function = 'roundtrip_coords_omp_barrier' with nthreads=2, N=100000, n_mismatches = 95872. Time taken = 0.016187 seconds | |
On thread 0: start = 0, end = 50000 n_thistask = 50000 | |
On thread 1: start = 50000, end = 100000 n_thistask = 50000 | |
================================================================= | |
==67903==ERROR: AddressSanitizer: attempting double-free on 0x0001034001b0 in thread T0: | |
#0 0x100ce19f0 in wrap_free+0x8c (libclang_rt.asan_osx_dynamic.dylib:arm64+0x3d9f0) (BuildId: e7b5fb9640823e93be90c001748b8fc332000000200000000100000000000b00) | |
#1 0x10055822c in linset lin.c:747 | |
#2 0x100574074 in wcsset.part.0 wcs.c:2807 | |
#3 0x10054bc70 in .omp_outlined. test_wcs_threads.c:89 | |
#4 0x10081daf8 in __kmp_invoke_microtask+0x98 (libomp.dylib:arm64+0x81af8) (BuildId: 583abbdbdb43371e8f4bf9100abf532b32000000200000000100000000000c00) | |
0x0001034001b0 is located 0 bytes inside of 16-byte region [0x0001034001b0,0x0001034001c0) | |
freed by thread T1 here: | |
#0 0x100ce19f0 in wrap_free+0x8c (libclang_rt.asan_osx_dynamic.dylib:arm64+0x3d9f0) (BuildId: e7b5fb9640823e93be90c001748b8fc332000000200000000100000000000b00) | |
#1 0x10055822c in linset lin.c:747 | |
#2 0x100574074 in wcsset.part.0 wcs.c:2807 | |
#3 0x10054bc70 in .omp_outlined. test_wcs_threads.c:89 | |
#4 0x10081daf8 in __kmp_invoke_microtask+0x98 (libomp.dylib:arm64+0x81af8) (BuildId: 583abbdbdb43371e8f4bf9100abf532b32000000200000000100000000000c00) | |
previously allocated by thread T0 here: | |
#0 0x100ce1c68 in wrap_calloc+0x90 (libclang_rt.asan_osx_dynamic.dylib:arm64+0x3dc68) (BuildId: e7b5fb9640823e93be90c001748b8fc332000000200000000100000000000b00) | |
#1 0x100558238 in linset lin.c:748 | |
#2 0x100574074 in wcsset.part.0 wcs.c:2807 | |
#3 0x10054c830 in .omp_outlined..13 test_wcs_threads.c:131 | |
#4 0x10081daf8 in __kmp_invoke_microtask+0x98 (libomp.dylib:arm64+0x81af8) (BuildId: 583abbdbdb43371e8f4bf9100abf532b32000000200000000100000000000c00) | |
Thread T1 created by T0 here: | |
#0 0x100cdbc10 in wrap_pthread_create+0x50 (libclang_rt.asan_osx_dynamic.dylib:arm64+0x37c10) (BuildId: e7b5fb9640823e93be90c001748b8fc332000000200000000100000000000b00) | |
#1 0x1007fff10 in __kmp_create_worker+0xcc (libomp.dylib:arm64+0x63f10) (BuildId: 583abbdbdb43371e8f4bf9100abf532b32000000200000000100000000000c00) | |
#2 0x1007be35c in __kmp_allocate_thread+0x410 (libomp.dylib:arm64+0x2235c) (BuildId: 583abbdbdb43371e8f4bf9100abf532b32000000200000000100000000000c00) | |
#3 0x1007b8f34 in __kmp_allocate_team+0x96c (libomp.dylib:arm64+0x1cf34) (BuildId: 583abbdbdb43371e8f4bf9100abf532b32000000200000000100000000000c00) | |
#4 0x1007ba980 in __kmp_fork_call+0x13b4 (libomp.dylib:arm64+0x1e980) (BuildId: 583abbdbdb43371e8f4bf9100abf532b32000000200000000100000000000c00) | |
#5 0x1007ade9c in __kmpc_fork_call+0xa8 (libomp.dylib:arm64+0x11e9c) (BuildId: 583abbdbdb43371e8f4bf9100abf532b32000000200000000100000000000c00) | |
#6 0x10054c220 in roundtrip_coords_omp_barrier test_wcs_threads.c:131 | |
#7 0x10054d354 in main test_wcs_threads.c:233 | |
#8 0x1008b9088 in start+0x204 (dyld:arm64+0x5088) (BuildId: 75627683a78032adae34cf86dd23a26b32000000200000000100000000050c00) | |
#9 0xf12efffffffffffc (<unknown module>) | |
SUMMARY: AddressSanitizer: double-free (libclang_rt.asan_osx_dynamic.dylib:arm64+0x3d9f0) (BuildId: e7b5fb9640823e93be90c001748b8fc332000000200000000100000000000b00) in wrap_free+0x8c | |
==67903==ABORTING | |
Abort trap: 6 | |
*/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//(.venv) [~/sorsery-consulting/2024/reproject-astropy-wcs/astropy/cextern/wcslib/C @HAW10013056] ./test_wcs_threads 1000000 4 &>xx.log | |
Using roundtrip_coords_omp_barrier with nthreads = 1 | |
On thread 0: start = 0, end = 1000000 n_thistask = 1000000 | |
Using roundtrip_coords_omp_barrier with nthreads = 1...done | |
Called function = 'roundtrip_coords_omp_barrier' with nthreads=1, N=1000000, n_mismatches = 0. Time taken = 0.302789 seconds | |
Using roundtrip_coords_buggy with nthreads = 1 | |
On thread 0: start = 0, end = 1000000 n_thistask = 1000000 | |
Using roundtrip_coords_buggy with nthreads = 1...done | |
Called function = 'roundtrip_coords_buggy' with nthreads=1, N=1000000, n_mismatches = 0. Time taken = 0.280202 seconds | |
Using roundtrip_coords_omp_barrier with nthreads = 2 | |
On thread 1: start = 500000, end = 1000000 n_thistask = 500000 | |
On thread 0: start = 0, end = 500000 n_thistask = 500000 | |
Using roundtrip_coords_omp_barrier with nthreads = 2...done | |
Called function = 'roundtrip_coords_omp_barrier' with nthreads=2, N=1000000, n_mismatches = 0. Time taken = 0.149766 seconds | |
Using roundtrip_coords_buggy with nthreads = 2 | |
On thread 0: start = 0, end = 500000 n_thistask = 500000 | |
On thread 1: start = 500000, end = 1000000 n_thistask = 500000 | |
Using roundtrip_coords_buggy with nthreads = 2...done | |
Called function = 'roundtrip_coords_buggy' with nthreads=2, N=1000000, n_mismatches = 0. Time taken = 0.148138 seconds | |
Using roundtrip_coords_omp_barrier with nthreads = 3 | |
On thread 0: start = 0, end = 333334 n_thistask = 333334 | |
On thread 1: start = 333334, end = 666667 n_thistask = 333333 | |
On thread 2: start = 666667, end = 1000000 n_thistask = 333333 | |
Using roundtrip_coords_omp_barrier with nthreads = 3...done | |
Called function = 'roundtrip_coords_omp_barrier' with nthreads=3, N=1000000, n_mismatches = 0. Time taken = 0.100863 seconds | |
Using roundtrip_coords_buggy with nthreads = 3 | |
On thread 0: start = 0, end = 333334 n_thistask = 333334 | |
On thread 1: start = 333334, end = 666667 n_thistask = 333333 | |
On thread 2: start = 666667, end = 1000000 n_thistask = 333333 | |
Using roundtrip_coords_buggy with nthreads = 3...done | |
Called function = 'roundtrip_coords_buggy' with nthreads=3, N=1000000, n_mismatches = 0. Time taken = 0.104435 seconds | |
Using roundtrip_coords_omp_barrier with nthreads = 4 | |
On thread 0: start = 0, end = 250000 n_thistask = 250000 | |
On thread 2: start = 500000, end = 750000 n_thistask = 250000 | |
On thread 3: start = 750000, end = 1000000 n_thistask = 250000 | |
On thread 1: start = 250000, end = 500000 n_thistask = 250000 | |
Using roundtrip_coords_omp_barrier with nthreads = 4...done | |
Called function = 'roundtrip_coords_omp_barrier' with nthreads=4, N=1000000, n_mismatches = 0. Time taken = 0.084524 seconds | |
Using roundtrip_coords_buggy with nthreads = 4 | |
On thread 0: start = 0, end = 250000 n_thistask = 250000 | |
On thread 1: start = 250000, end = 500000 n_thistask = 250000 | |
On thread 2: start = 500000, end = 750000 n_thistask = 250000 | |
On thread 3: start = 750000, end = 1000000 n_thistask = 250000 | |
wcsp2s failed with status 8 | |
Using roundtrip_coords_buggy with nthreads = 4...done | |
Called function = 'roundtrip_coords_buggy' with nthreads=4, N=1000000, n_mismatches = 6. Time taken = 0.0774 seconds |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment