Skip to content

Instantly share code, notes, and snippets.

@bvibber
Last active September 26, 2022 16:44
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bvibber/9de18c7401a29af62b201d06520beb33 to your computer and use it in GitHub Desktop.
Save bvibber/9de18c7401a29af62b201d06520beb33 to your computer and use it in GitHub Desktop.
F16C test code from gtk4

In Fedora 36 on Parallels on a Core i9-based Mac, GTK4 apps are failing on an F16C extension instruction for converting between 32-bit and 16-bit floats for graphics drawing. This extracted code fails in the Parallels VM but works on the native MacOS. Linux's /proc/cpuinfo lists the f16c extension as present.

Expected output:

% make && ./borf
make: `borf' is up to date.
orig: -1.000000 128.500000 15000.000000 150000.000000
made small...
half as ints: bc00 5804 7353 7c00
made big again...
post: -1.000000 128.500000 15000.000000 inf
/* fp16i.c
*
* Copyright 2021 Red Hat, Inc.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* SPDX-License-Identifier: LGPL-2.1-or-later
*/
#define HAVE_F16C
#include <stdlib.h>
#include <stdio.h>
#include <inttypes.h>
#include <immintrin.h>
#if defined(_MSC_VER) && !defined(__clang__)
#define CAST_M128I_P(a) (__m128i const *) a
#else
#define CAST_M128I_P(a) (__m128i_u const *) a
#endif
void
float_to_half4_f16c (const float f[4],
uint16_t h[4])
{
__m128 s = _mm_loadu_ps (f);
__m128i i = _mm_cvtps_ph (s, 0);
_mm_storel_epi64 ((__m128i*)h, i);
}
void
half_to_float4_f16c (const uint16_t h[4],
float f[4])
{
__m128i i = _mm_loadl_epi64 (CAST_M128I_P (h));
__m128 s = _mm_cvtph_ps (i);
_mm_store_ps (f, s);
}
int main(void) {
float big[4] = { -1.0f, 128.5f, 1.5e4f, 1.5e5f /* too big for fp16 */ };
uint16_t half[4] = { 0, 0, 0, 0 };
printf("orig: %f %f %f %f\n", big[0], big[1], big[2], big[3]);
float_to_half4_f16c(big, half);
printf("made small...\n");
printf("half as ints: %x %x %x %x\n", half[0], half[1], half[2], half[3]);
half_to_float4_f16c(half, big);
printf("made big again...\n");
printf("post: %f %f %f %f\n", big[0], big[1], big[2], big[3]);
}
.PHONY : clean
borf : borf.c Makefile
gcc -o borf -mf16c borf.c
clean :
rm -f borf
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment