Skip to content

Instantly share code, notes, and snippets.

View compare_native_and_external32.c
#include <stdlib.h>
#include <stdio.h>
#include <inttypes.h>
#include <string.h>
#include <mpi.h>
void print_buf(const char* msg, const char *buf, int nbytes,
int start_from, int stop_at, int vals_per_line)
{
@bosilca
bosilca / check_avx.c
Created Sep 26, 2020
Playground for the AVX512 support on KNL / KNC.
View check_avx.c
#include <stdlib.h>
#include <stdio.h>
#include <immintrin.h>
#define OMPI_OP_AVX_HAS_AVX512BW_FLAG 0x00000200
#define OMPI_OP_AVX_HAS_AVX512F_FLAG 0x00000100
#define OMPI_OP_AVX_HAS_AVX2_FLAG 0x00000020
#define OMPI_OP_AVX_HAS_AVX_FLAG 0x00000010
#define OMPI_OP_AVX_HAS_SSE4_1_FLAG 0x00000008
#define OMPI_OP_AVX_HAS_SSE3_FLAG 0x00000004
@bosilca
bosilca / check_coll_names.c
Created Jul 31, 2020
A quick benchmark to evaluate the cost of converting a MPI collective communication name into the collective identifier. The benchmark does not check the cost for a particular permutation of the collectives, as they all have the same chance to be in the configuration file I look at the cost to search for all of them once.
View check_coll_names.c
#include <stdio.h>
#include <string.h>
#include <time.h>
#include <inttypes.h>
extern int mca_coll_base_name_to_colltype(const char* name);
typedef enum COLLTYPE {
ALLGATHER = 0, /* 0 */
ALLGATHERV, /* 1 */
@bosilca
bosilca / cuda_allreduce.cc
Created Nov 13, 2016
Quick example to check the performance of MPI_Allreduce from GPU buffers.
View cuda_allreduce.cc
#include <mpi.h>
#include <cuda_runtime.h>
#include <stdlib.h>
#include <time.h>
#include <iostream>
/**
* mpic++ -g -Wall -I/opt/cuda/8.0/include cuda_check.cc -o cuda_check -L/opt/cuda/8.0/lib64 -lcudart
*/
@bosilca
bosilca / gist:64843961946319497da7
Last active Feb 7, 2016
Dump the offset of the jobid and vpid in an opal_process_name_t. Show how content of each of the jobid, job family and vpid.
View gist:64843961946319497da7
/* Compile with gcc -Wall orte_offset.c -o orte_offset */
#include <stdio.h>
#include <stdlib.h>
#include <stddef.h>
typedef uint32_t opal_jobid_t;
typedef uint32_t opal_vpid_t;
typedef struct {
opal_jobid_t jobid;
opal_vpid_t vpid;
View gist:1716cbc1e71a3dd0baf4
Index: datatypes.tex
===================================================================
--- datatypes.tex (revision 1835)
+++ datatypes.tex (working copy)
@@ -128,6 +128,7 @@
lb(Typemap) & = & \min_j disp_j , \nonumber \\
ub(Typemap) & = & \max_j (disp_j + \mpicode{sizeof}(type_j)) + \epsilon , \mbox{ and}
\nonumber \\ extent(Typemap) & = & ub(Typemap) - lb(Typemap).
+\label{soft-lb-ub-definition}
\end{eqnarray}