Skip to content

Instantly share code, notes, and snippets.

@robUx4
Created September 26, 2018 14:56
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save robUx4/5b31ad86e5aa179fecd198a5c8ba63cd to your computer and use it in GitHub Desktop.
Save robUx4/5b31ad86e5aa179fecd198a5c8ba63cd to your computer and use it in GitHub Desktop.
vla MSVC issues
From 72c81fbfa53838a9594137b582b10d99ff385141 Mon Sep 17 00:00:00 2001
From: Steve Lhomme <robux4@ycbcr.xyz>
Date: Wed, 26 Sep 2018 16:54:27 +0200
Subject: [PATCH] WIP use _alloca() with MSVC
---
src/cdef.c | 4 ++++
src/ipred.c | 3 +++
src/itx.c | 6 ++++++
src/looprestoration.c | 33 +++++++++++++++++++++++++++++++++
src/recon.c | 4 ++++
src/wedge.c | 6 ++++++
6 files changed, 56 insertions(+)
diff --git a/src/cdef.c b/src/cdef.c
index 0d924fc..e9df832 100644
--- a/src/cdef.c
+++ b/src/cdef.c
@@ -91,7 +91,11 @@ static void cdef_filter_block_c(pixel *const dst, const ptrdiff_t dst_stride,
const int damping, const enum CdefEdgeFlags edges)
{
const ptrdiff_t tmp_stride = 16 >> (w == 4);
+#ifdef _MSC_VER
+ uint16_t *tmp = _alloca(sizeof(*tmp) * (tmp_stride * (h + 4)));
+#else /* !_MSC_VER */
uint16_t tmp[tmp_stride * (h + 4)];
+#endif
uint16_t *tmp2 = tmp + 2 * tmp_stride + 2;
const uint8_t *const pri_taps = cdef_pri_taps[(pri_strength >> (BITDEPTH - 8)) & 1];
const uint8_t *const sec_taps = cdef_sec_taps[(pri_strength >> (BITDEPTH - 8)) & 1];
diff --git a/src/ipred.c b/src/ipred.c
index 35975b7..082050e 100644
--- a/src/ipred.c
+++ b/src/ipred.c
@@ -455,6 +455,7 @@ z1_c(pixel *dst, const ptrdiff_t stride, const pixel *const topleft_in,
assert(angle < 90);
const int dx = dr_intra_derivative[angle];
pixel top_out[(64 + 64) * 2];
+#endif
const pixel *top;
int max_base_x;
const int upsample_above = get_upsample(width + height, 90 - angle, is_sm);
@@ -520,6 +521,7 @@ z2_c(pixel *dst, const ptrdiff_t stride, const pixel *const topleft_in,
const int upsample_left = get_upsample(width + height, 180 - angle, is_sm);
const int upsample_above = get_upsample(width + height, angle - 90, is_sm);
pixel edge[64 * 2 + 64 * 2 + 1];
+#endif
pixel *const topleft = &edge[height * 2];
if (upsample_above) {
@@ -598,6 +600,7 @@ z3_c(pixel *dst, const ptrdiff_t stride, const pixel *const topleft_in,
assert(angle > 180);
const int dy = dr_intra_derivative[270 - angle];
pixel left_out[(64 + 64) * 2];
+#endif
const pixel *left;
int max_base_y;
const int upsample_left = get_upsample(width + height, angle - 180, is_sm);
diff --git a/src/itx.c b/src/itx.c
index 41e890a..dc101d4 100644
--- a/src/itx.c
+++ b/src/itx.c
@@ -49,7 +49,13 @@ inv_txfm_add_c(pixel *dst, const ptrdiff_t stride,
{
int i, j;
const ptrdiff_t sh = imin(h, 32), sw = imin(w, 32);
+#ifdef _MSC_VER
+ coef *tmp = _alloca(sizeof(*tmp) * (w * h));
+ coef *out = _alloca(sizeof(*out) * (h));
+ coef *in_mem = _alloca(sizeof(*in_mem) * (w));
+#else /* !_MSC_VER */
coef tmp[w * h], out[h], in_mem[w];
+#endif /* !_MSC_VER */
const int is_rect2 = w * 2 == h || h * 2 == w;
if (w != sw) memset(&in_mem[sw], 0, (w - sw) * sizeof(*in_mem));
diff --git a/src/looprestoration.c b/src/looprestoration.c
index a08d3a5..3b9dca4 100644
--- a/src/looprestoration.c
+++ b/src/looprestoration.c
@@ -121,14 +121,22 @@ static void wiener_c(pixel *p, const ptrdiff_t p_stride,
{
// padding is 3 pixels above and 3 pixels below
const ptrdiff_t tmp_stride = sizeof(pixel) * (w + 6);
+#ifdef _MSC_VER
+ pixel *tmp = _alloca(sizeof(*tmp) * ((h + 6) * PXSTRIDE(tmp_stride)));
+#else /* !_MSC_VER */
pixel tmp[(h + 6) * PXSTRIDE(tmp_stride)];
+#endif /* !_MSC_VER */
pixel *tmp_ptr = tmp;
padding(tmp, tmp_stride, p, p_stride, lpf, lpf_stride, w, h, edges);
// Values stored between horizontal and vertical filtering don't
// fit in a uint8_t.
+#ifdef _MSC_VER
+ uint16_t *hor = _alloca(sizeof(*hor) * ((h + 6 /*padding*/) * w));
+#else /* !_MSC_VER */
uint16_t hor[(h + 6 /*padding*/) * w];
+#endif /* !_MSC_VER */
uint16_t *hor_ptr = hor;
const int round_bits_h = 3 + (BITDEPTH == 12) * 2;
@@ -412,11 +420,19 @@ static void selfguided_filter(int32_t *dst, const ptrdiff_t dst_stride,
{
const int tmp_stride = w + 6;
// FIXME Replace array with scratch memory
+#ifdef _MSC_VER
+ int32_t *A_ = _alloca(sizeof(*A_) * ((h + 6) * tmp_stride));
+#else /* !_MSC_VER */
int32_t A_[(h + 6) * tmp_stride];
+#endif /* !_MSC_VER */
int32_t *A = A_ + 3 * tmp_stride + 3;
// By inverting A and B after the boxsums, B can be of size coef instead
// of int32_t
+#ifdef _MSC_VER
+ coef *B_ = _alloca(sizeof(*B_) * ((h + 6) * tmp_stride));
+#else /* !_MSC_VER */
coef B_[(h + 6) * tmp_stride];
+#endif /* !_MSC_VER */
coef *B = B_ + 3 * tmp_stride + 3;
const int step = (n == 25) + 1;
@@ -519,13 +535,21 @@ static void selfguided_c(pixel *p, const ptrdiff_t p_stride,
{
// padding is 3 pixels above and 3 pixels below
const int tmp_stride = sizeof(pixel) * (w + 6);
+#ifdef _MSC_VER
+ pixel *tmp = _alloca(sizeof(*tmp) * ((h + 6) * PXSTRIDE(tmp_stride)));
+#else /* !_MSC_VER */
pixel tmp[(h + 6) * PXSTRIDE(tmp_stride)];
+#endif /* !_MSC_VER */
padding(tmp, tmp_stride, p, p_stride, lpf, lpf_stride, w, h, edges);
// both r1 and r0 can't be zero
if (!sgr_params[sgr_idx][0]) {
+#ifdef _MSC_VER
+ int32_t *dst = _alloca(sizeof(*dst) * (h * w));
+#else /* !_MSC_VER */
int32_t dst[h * w];
+#endif /* !_MSC_VER */
const int s1 = sgr_params[sgr_idx][3];
selfguided_filter(dst, w, tmp, tmp_stride, w, h, 9, s1);
const int w1 = (1 << 7) - sgr_w[1];
@@ -538,7 +562,11 @@ static void selfguided_c(pixel *p, const ptrdiff_t p_stride,
p += PXSTRIDE(p_stride);
}
} else if (!sgr_params[sgr_idx][1]) {
+#ifdef _MSC_VER
+ int32_t *dst = _alloca(sizeof(*dst) * (h * w));
+#else /* !_MSC_VER */
int32_t dst[h * w];
+#endif /* !_MSC_VER */
const int s0 = sgr_params[sgr_idx][2];
selfguided_filter(dst, w, tmp, tmp_stride, w, h, 25, s0);
const int w0 = sgr_w[0];
@@ -551,8 +579,13 @@ static void selfguided_c(pixel *p, const ptrdiff_t p_stride,
p += PXSTRIDE(p_stride);
}
} else {
+#ifdef _MSC_VER
+ int32_t *dst0 = _alloca(sizeof(*dst0) * (h * w));
+ int32_t *dst1 = _alloca(sizeof(*dst1) * (h * w));
+#else /* !_MSC_VER */
int32_t dst0[h * w];
int32_t dst1[h * w];
+#endif
const int s0 = sgr_params[sgr_idx][2];
const int s1 = sgr_params[sgr_idx][3];
const int w0 = sgr_w[0];
diff --git a/src/recon.c b/src/recon.c
index 685b9b7..7347715 100644
--- a/src/recon.c
+++ b/src/recon.c
@@ -1248,7 +1248,11 @@ void bytefn(recon_b_inter)(Dav1dTileContext *const t, const enum BlockSize bs,
t->tl_4x4_filter = filter_2d;
} else {
const enum Filter2d filter_2d = b->filter2d;
+#ifdef _MSC_VER
+ coef * const (*tmp) = (coef(**)) t->scratch.compinter;
+#else /* !_MSC_VER */
coef (*tmp)[bw4 * bh4 * 16] = (coef (*)[bw4 * bh4 * 16]) t->scratch.compinter;
+#endif /* !_MSC_VER */
int jnt_weight;
uint8_t *const seg_mask = t->scratch_seg_mask;
const uint8_t *mask;
diff --git a/src/wedge.c b/src/wedge.c
index 114d493..b530892 100644
--- a/src/wedge.c
+++ b/src/wedge.c
@@ -173,9 +173,15 @@ static void fill2d_16x2(uint8_t *dst, const int w, const int h,
const enum BlockSize bs,
const uint8_t (*const master)[64 * 64],
const wedge_code_type *const cb,
+#ifdef _MSC_VER
+ uint8_t (*masks_444)[16][1024],
+ uint8_t (*masks_422)[16][512],
+ uint8_t (*masks_420)[16][256],
+#else /* !MSC_VER */
uint8_t (*masks_444)[16][w * h],
uint8_t (*masks_422)[16][w * h >> 1],
uint8_t (*masks_420)[16][w * h >> 2],
+#endif
const unsigned signs)
{
uint8_t *ptr = dst;
--
2.17.0.windows.1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment