Skip to content

Instantly share code, notes, and snippets.

@tylov
Last active June 8, 2024 13:10
Show Gist options
  • Save tylov/8bfb5984864602ad52614f8ddea3544c to your computer and use it in GitHub Desktop.
Save tylov/8bfb5984864602ad52614f8ddea3544c to your computer and use it in GitHub Desktop.
C implementation of Python numpy multidimensional arrays
/*
MIT License
*
* Copyright (c) 2023 Tyge Løvset
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
/*
This is a single header, C implementation of python numpy multidimensional array slicing.
It supports both row- and column-major layout, transposing of md spans, and flat iteration.
cspan.h is also a part of the STC library, https://github.com/stclib/STC
Documentation: https://github.com/stclib/STC/blob/master/docs/cspan_api.md
Examples: https://github.com/stclib/STC/blob/master/docs/cspan_api.md
The python line:
slice = sp4[2][3:][1:5:2][:] # reduces rank from 4 to 3
can be written in C as:
Span3 slice = cspan_slice(Span3, &sp4, {2}, {3,c_END}, {1,5,2}, {c_ALL});
Compile-time check that number of arguments matches type of sp4.
Default runtime argument bounds check and that possible rank-reduction is type-correct.
There are also optimized common slicing constructors (for dimensions 1-4), e.g.:
Span sub = cspan_subspan(&sp, 5, 3); // cspan_slice(Span, &sp, {5,8});
Span3 ms3 = cspan_submd4(&sp3, 1); // cspan_slice(Span3, &sp3, {1}, {c_ALL}, {c_ALL});
Span2 ms2 = cspan_submd4(&sp3, 1, 0); // cspan_slice(Span2, &sp3, {1}, {0}, {c_ALL});
The code generated and the implementation is fast and compact, while being
generic and type-safe. For comparison, C++23 std::mdspan does not support
slicing, subspan, submd, or flattened iteration.
// Example:
#include <stdio.h>
#include <stdlib.h>
#include "cspan.h"
using_cspan3(DSpan, double);
int main(void) {
enum{nx=3, ny=4, nz=5};
double data[nx*ny*nz];
printf("\nMultidim span ms[3, 4, 5], fortran ordered");
DSpan3 ms = cspan_md_layout(c_COLMAJOR, data, nx, ny, nz);
int idx = 0;
for (int i = 0; i < ms.shape[0]; ++i)
for (int j = 0; j < ms.shape[1]; ++j)
for (int k = 0; k < ms.shape[2]; ++k)
*cspan_at(&ms, i, j, k) = ++idx;
cspan_transpose(&ms);
printf(", transposed:\n\n");
cspan_print(DSpan3, ms, "%.2f");
puts("Slicing:");
printf("ms[0, :, :]\n");
cspan_print(DSpan2, cspan_slice(DSpan2, &ms, {0}, {c_ALL}, {c_ALL}), "%g");
printf("ms[:, 0, :]\n");
cspan_print(DSpan2, cspan_slice(DSpan2, &ms, {c_ALL}, {0}, {c_ALL}), "%g");
printf("ms[:, :, 0]\n");
cspan_print(DSpan2, cspan_slice(DSpan2, &ms, {c_ALL}, {c_ALL}, {0}), "%g");
}
*/
#ifndef CSPAN_INDEX_TYPE
#define CSPAN_INDEX_TYPE int32_t
#endif
#ifndef CSPAN_H_INCLUDED
#define CSPAN_H_INCLUDED
#define c_MACRO_OVERLOAD(name, ...) \
c_JOIN(c_JOIN0(name,_), c_NUMARGS(__VA_ARGS__))(__VA_ARGS__)
#define c_JOIN0(a, b) a ## b
#define c_JOIN(a, b) c_JOIN0(a, b)
#define c_EXPAND(...) __VA_ARGS__
#define c_NUMARGS(...) _c_APPLY_ARG_N((__VA_ARGS__, _c_RSEQ_N))
#define _c_APPLY_ARG_N(args) c_EXPAND(_c_ARG_N args)
#define _c_RSEQ_N 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
#define _c_ARG_N(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, \
_14, _15, _16, N, ...) N
#define c_foreach(...) c_MACRO_OVERLOAD(c_foreach, __VA_ARGS__)
#define c_foreach_3(it, C, cnt) \
for (C##_iter it = C##_begin(&cnt); it.ref; C##_next(&it))
#define c_foreach_4(it, C, start, finish) \
for (C##_iter it = start, *_endref = (C##_iter*)(finish).ref \
; it.ref != (C##_value*)_endref; C##_next(&it))
#define c_static_assert(b) (int)(1 ? 0 : sizeof(int[(b) ? 1 : -1]))
#define c_arraylen(a) (intptr_t)(sizeof(a)/sizeof 0[a])
#define c_less_unsigned(a, b) ((size_t)(a) < (size_t)(b))
#define c_swap(T, xp, yp) do { T _t = *(xp); *(xp) = *(yp); *(yp) = _t; } while (0)
// ----------------------------------------------------------------
#include <assert.h>
#include <stddef.h>
#include <string.h>
#include <stdint.h>
#if !defined i_static && (defined i_header || defined i_implement)
#define CSPAN_API
#else
#define CSPAN_API static inline
#define i_static
#endif
typedef CSPAN_INDEX_TYPE cextent_t, cstride_t;
#define using_cspan(...) c_MACRO_OVERLOAD(using_cspan, __VA_ARGS__)
#define using_cspan_2(Self, T) \
using_cspan_3(Self, T, 1)
#define using_cspan_3(Self, T, RANK) \
typedef T Self##_value; \
typedef struct { \
Self##_value *data; \
cextent_t shape[RANK]; \
cspan_tuple##RANK stride; \
} Self; \
\
typedef struct { \
Self##_value *ref; \
const Self *_s; \
cextent_t pos[RANK]; \
} Self##_iter; \
\
static inline Self Self##_slice_(Self##_value* d, const cextent_t shape[], const cstride_t stri[], \
const intptr_t args[][3], const int rank) { \
Self s; int outrank; \
s.data = d + _cspan_slice(s.shape, s.stride.d, &outrank, shape, stri, args, rank); \
assert(outrank == RANK); \
return s; \
} \
static inline Self##_iter Self##_begin(const Self* self) { \
Self##_iter it = {.ref=self->data, ._s=self}; \
return it; \
} \
static inline Self##_iter Self##_end(const Self* self) { \
Self##_iter it = {0}; (void)self; \
return it; \
} \
static inline void Self##_next(Self##_iter* it) { \
int done; \
it->ref += _cspan_next##RANK(it->pos, it->_s->shape, it->_s->stride.d, RANK, &done); \
if (done) it->ref = NULL; \
} \
struct stc_nostruct
#define using_cspan2(Self, T) using_cspan_2(Self, T); using_cspan_3(Self##2, T, 2)
#define using_cspan3(Self, T) using_cspan2(Self, T); using_cspan_3(Self##3, T, 3)
#define using_cspan4(Self, T) using_cspan3(Self, T); using_cspan_3(Self##4, T, 4)
#define using_cspan_tuple(N) typedef struct { cstride_t d[N]; } cspan_tuple##N
using_cspan_tuple(1); using_cspan_tuple(2);
using_cspan_tuple(3); using_cspan_tuple(4);
using_cspan_tuple(5); using_cspan_tuple(6);
using_cspan_tuple(7); using_cspan_tuple(8);
// cspan_init: static construction from initialization list
//
#define cspan_init(Span, ...) \
((Span){.data=(Span##_value[])__VA_ARGS__, \
.shape={sizeof((Span##_value[])__VA_ARGS__)/sizeof(Span##_value)}, \
.stride=(cspan_tuple1){.d={1}}})
// cspan_from* pointer+size or c-array
//
#define cspan_from_n(ptr, n) \
{.data=(ptr), \
.shape={(cextent_t)(n)}, \
.stride=(cspan_tuple1){.d={1}}}
#define cspan_from_array(array) \
cspan_from_n(array, c_arraylen(array))
// cspan_subspan on 1d spans
//
#define cspan_subspan(self, offset, count) \
{.data=cspan_at(self, offset), \
.shape={(cextent_t)(count)}, \
.stride=(self)->stride}
// Accessors
//
#define cspan_size(self) _cspan_size((self)->shape, cspan_rank(self))
#define cspan_rank(self) c_arraylen((self)->shape) // constexpr
#define cspan_is_colmajor(self) ((self)->stride.d[0] < (self)->stride.d[cspan_rank(self) - 1])
#define cspan_is_rowmajor(self) (!cspan_is_colmajor(self))
#define cspan_get_layout(self) (cspan_is_colmajor(self) ? c_COLMAJOR : c_ROWMAJOR)
#define cspan_at(self, ...) ((self)->data + cspan_index(self, __VA_ARGS__))
#define cspan_front(self) ((self)->data)
#define cspan_back(self) ((self)->data + cspan_size(self) - 1)
#define cspan_index(self, ...) \
_cspan_index((self)->shape, (self)->stride.d, ((const intptr_t[]){__VA_ARGS__}), \
cspan_rank(self) + c_static_assert(cspan_rank(self) == c_NUMARGS(__VA_ARGS__)))
// Multi-dimensional span constructors
//
typedef enum {c_ROWMAJOR, c_COLMAJOR} cspan_layout;
#define cspan_md(array, ...) \
cspan_md_layout(c_ROWMAJOR, array, __VA_ARGS__)
#define cspan_md_layout(layout, array, ...) \
{.data=array, \
.shape={__VA_ARGS__}, \
.stride=*(c_JOIN(cspan_tuple,c_NUMARGS(__VA_ARGS__))*) \
_cspan_shape2stride(layout, ((cstride_t[]){__VA_ARGS__}), c_NUMARGS(__VA_ARGS__))}
// Transpose and swap axes
//
#define cspan_transposed2(self) \
{.data=(self)->data + c_static_assert(cspan_rank(self) == 2), \
.shape={(self)->shape[1], (self)->shape[0]}, \
.stride=(cspan_tuple2){.d={(self)->stride.d[1], (self)->stride.d[0]}}}
#define cspan_transpose(self) \
_cspan_transpose((self)->shape, (self)->stride.d, cspan_rank(self))
#define cspan_swap_axes(self, ax1, ax2) \
_cspan_swap_axes((self)->shape, (self)->stride.d, ax1, ax2, cspan_rank(self))
// General slicing function.
//
#define c_END (cextent_t)(((size_t)1 << (sizeof(cextent_t)*8 - 1)) - 1)
#define c_ALL 0,c_END
#define cspan_slice(OutSpan, self, ...) \
OutSpan##_slice_((self)->data, (self)->shape, (self)->stride.d, \
((const intptr_t[][3]){__VA_ARGS__}), cspan_rank(self) + \
c_static_assert(cspan_rank(self) == sizeof((cextent_t[][3]){__VA_ARGS__})/sizeof(cextent_t[3])))
// submd#(): # <= 4 optimized. Reduce rank, like e.g. cspan_slice(Span2, &ms3, {x}, {c_ALL}, {c_ALL});
//
#define cspan_submd2(self, x) \
{.data=cspan_at(self, x, 0), \
.shape={(self)->shape[1]}, \
.stride=(cspan_tuple1){.d={(self)->stride.d[1]}}}
#define cspan_submd3(...) c_MACRO_OVERLOAD(cspan_submd3, __VA_ARGS__)
#define cspan_submd3_2(self, x) \
{.data=cspan_at(self, x, 0, 0), \
.shape={(self)->shape[1], (self)->shape[2]}, \
.stride=(cspan_tuple2){.d={(self)->stride.d[1], (self)->stride.d[2]}}}
#define cspan_submd3_3(self, x, y) \
{.data=cspan_at(self, x, y, 0), \
.shape={(self)->shape[2]}, \
.stride=(cspan_tuple1){.d={(self)->stride.d[2]}}}
#define cspan_submd4(...) c_MACRO_OVERLOAD(cspan_submd4, __VA_ARGS__)
#define cspan_submd4_2(self, x) \
{.data=cspan_at(self, x, 0, 0, 0), \
.shape={(self)->shape[1], (self)->shape[2], (self)->shape[3]}, \
.stride=(cspan_tuple3){.d={(self)->stride.d[1], (self)->stride.d[2], (self)->stride.d[3]}}}
#define cspan_submd4_3(self, x, y) \
{.data=cspan_at(self, x, y, 0, 0), \
.shape={(self)->shape[2], (self)->shape[3]}, \
.stride=(cspan_tuple2){.d={(self)->stride.d[2], (self)->stride.d[3]}}}
#define cspan_submd4_4(self, x, y, z) \
{.data=cspan_at(self, x, y, z, 0), \
.shape={(self)->shape[3]}, \
.stride=(cspan_tuple1){.d={(self)->stride.d[3]}}}
#define cspan_print(...) c_MACRO_OVERLOAD(cspan_print, __VA_ARGS__)
#define cspan_print_3(Span, span, fmt) \
cspan_print_4(Span, span, fmt, stdout)
#define cspan_print_4(Span, span, fmt, fp) \
cspan_print_5(Span, span, fmt, fp, "[]")
#define cspan_print_5(Span, span, fmt, fp, brackets) \
cspan_print_6(Span, span, fmt, fp, brackets, c_EXPAND)
#define cspan_print_6(Span, span, fmt, fp, brackets, field) do { \
const Span _s = span; \
const char *_f = fmt, *_b = brackets; \
FILE* _fp = fp; \
int _w, _max = 0; \
char _res[2][16], _fld[128]; \
c_foreach_3 (_it, Span, _s) { \
_w = snprintf(NULL, 0ULL, _f, field(_it.ref[0])); \
if (_w > _max) _max = _w; \
} \
c_foreach_3 (_it, Span, _s) { \
_cspan_print_assist(_it.pos, _s.shape, cspan_rank(&_s), _res, _b); \
_w = _max + (_it.pos[cspan_rank(&_s) - 1] > 0); \
sprintf(_fld, _f, field(_it.ref[0])); \
fprintf(_fp, "%s%*s%s", _res[0], _w, _fld, _res[1]); \
} \
} while (0)
/* ------------------- PRIVAT DEFINITIONS ------------------- */
static inline intptr_t _cspan_size(const cextent_t shape[], int rank) {
intptr_t size = shape[0];
while (--rank) size *= shape[rank];
return size;
}
static inline void _cspan_swap_axes(cextent_t shape[], cstride_t stride[], int i, int j, int rank) {
(void)rank;
assert(c_less_unsigned(i, rank) & c_less_unsigned(j, rank));
c_swap(cextent_t, shape + i, shape + j);
c_swap(cstride_t, stride + i, stride + j);
}
static inline void _cspan_transpose(cextent_t shape[], cstride_t stride[], int rank) {
for (int i = 0; i < --rank; ++i) {
c_swap(cextent_t, shape + i, shape + rank);
c_swap(cstride_t, stride + i, stride + rank);
}
}
static inline intptr_t _cspan_index(const cextent_t shape[], const cstride_t stride[],
const intptr_t args[], int rank) {
intptr_t off = 0;
(void)shape;
while (rank--) {
assert(c_less_unsigned(args[rank], shape[rank]));
off += args[rank]*stride[rank];
}
return off;
}
CSPAN_API void _cspan_print_assist(cextent_t pos[], const cextent_t shape[], const int rank,
char result[2][16], const char* brackets);
CSPAN_API intptr_t _cspan_next2(cextent_t pos[], const cextent_t shape[], const cstride_t stride[],
int rank, int* done);
#define _cspan_next1(pos, shape, stride, rank, done) (*done = ++pos[0]==shape[0], stride[0])
#define _cspan_next3 _cspan_next2
#define _cspan_next4 _cspan_next2
#define _cspan_next5 _cspan_next2
#define _cspan_next6 _cspan_next2
#define _cspan_next7 _cspan_next2
#define _cspan_next8 _cspan_next2
CSPAN_API intptr_t _cspan_slice(cextent_t oshape[], cstride_t ostride[], int* orank,
const cextent_t shape[], const cstride_t stride[],
const intptr_t args[][3], int rank);
CSPAN_API cstride_t* _cspan_shape2stride(cspan_layout layout, cstride_t shape[], int rank);
#endif // STC_CSPAN_H_INCLUDED
/* --------------------- IMPLEMENTATION --------------------- */
#if defined(i_implement) || defined(i_static)
CSPAN_API void _cspan_print_assist(cextent_t pos[], const cextent_t shape[], const int rank,
char result[2][16], const char* brackets) {
int n = 0, j = 0, r = rank - 1;
memset(result, 0, 32);
while (n <= r && pos[r - n] == 0) ++n;
if (n) for (; j < rank; ++j)
result[0][j] = j < rank - n ? ' ' : brackets[0];
for (j = 0; r >= 0 && pos[r] + 1 == shape[r]; --r, ++j)
result[1][j] = brackets[1];
n = (j > 0) + (j > 1 /*&& j < rank*/); // newlines
if (brackets[2] && j < rank) result[1][j++] = brackets[2]; // comma
while (n--) result[1][j++] = '\n';
}
CSPAN_API intptr_t _cspan_next2(cextent_t pos[], const cextent_t shape[], const cstride_t stride[],
int r, int* done) {
intptr_t off = stride[--r];
++pos[r];
for (; r && pos[r] == shape[r]; --r) {
pos[r] = 0; ++pos[r - 1];
off += stride[r - 1] - (intptr_t)shape[r]*stride[r];
}
*done = pos[r] == shape[r];
return off;
}
CSPAN_API cstride_t* _cspan_shape2stride(cspan_layout layout, cstride_t shpstri[], int rank) {
int i, inc;
if (layout == c_COLMAJOR) i = 0, inc = 1;
else i = rank - 1, inc = -1;
cstride_t k = 1, s1 = shpstri[i], s2;
shpstri[i] = 1;
while (--rank) {
i += inc;
s2 = shpstri[i];
shpstri[i] = (k *= s1);
s1 = s2;
}
return shpstri;
}
CSPAN_API intptr_t _cspan_slice(cextent_t oshape[], cstride_t ostride[], int* orank,
const cextent_t shape[], const cstride_t stride[],
const intptr_t args[][3], int rank) {
intptr_t end, off = 0;
int i = 0, oi = 0;
for (; i < rank; ++i) {
off += args[i][0]*stride[i];
switch (args[i][1]) {
case 0: assert(c_less_unsigned(args[i][0], shape[i])); continue;
case c_END: end = shape[i]; break;
default: end = args[i][1];
}
oshape[oi] = (cextent_t)(end - args[i][0]);
ostride[oi] = stride[i];
assert((oshape[oi] > 0) & !c_less_unsigned(shape[i], end));
if (args[i][2] > 0) {
ostride[oi] *= (cextent_t)args[i][2];
oshape[oi] = (oshape[oi] - 1)/(cextent_t)args[i][2] + 1;
}
++oi;
}
*orank = oi;
return off;
}
#endif
#undef i_header
#undef i_implement
#undef i_static
@ib00
Copy link

ib00 commented Jun 8, 2024

I am new to this C macro trickery. What is the limitation that prevents you from doing this for a general size?

Could you use another macro to dispatch to cspan_md1, cspan_md2, etc. based on the number of arguments?

BTW, this is a very cool thing that you have accomplished here.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment