TG-Techie/patch_soft_static___muldi3.h Secret

## patch_soft_static___muldi3.h

/*
Copyright 2024 TG-Techie (Jonah Y-M)


-------- About --------

This file adds a soft implementation of the int32_t __muldi3(int32_t, int32_t) function
LLVM/Swift/Glang generate calls to when multiplying two 32-bit integers on the RP2040
Microcontroller.

The blink example in the Swift Embedded Examples repository uses a multipy by 100 and
making this show it's fixable.


---------------- NOTICE ----------------
This file contains work derived from the LLVM Project and has been modified; the new
portions and the modified portions of this file  are both licensed under the Apache
License v2.0 with LLVM Exceptions (for simplicity).

please see the full license text at:
https://llvm.org/LICENSE.txt
or
https://spdx.org/licenses/Apache-2.0.html + https://spdx.org/licenses/LLVM-exception.html


-------- Usage --------

Add this file as `swift-embedded-examples/pico-blink/Sources/Support/patch_clang_rt_soft_static___muldi3.h`

the include it in same directory's `inlcude/Support.h`; ex :
```c
#pragma once

#include <stdint.h>

// patch a bug where some versions of the Swift Dev Toolchain are shipping w/out armv6m runtime support
#include "../patch_soft_static___muldi3.h"
```


-------- Why --------

Some builds of the Swift Nighlty/Developer toolchain (supporting Embedded Swift) are
missing armv6m support in the compiler-rt soft float library. This file is a patch.

Notably this bug seemed to be introduced on the April 4th  2024 build,
see: https://github.com/apple/swift-embedded-examples/issues/5#issuecomment-2079176704

*/

// ---- setup ----
#include <stdint.h>
#include <limits.h>

// the RP2040 is a little endian ARMv6-M processor, set the corresponding YUGA macro
#define _YUGA_LITTLE_ENDIAN 1
// "minor" aside": I have no idea what YUGA stands for: If you know what YUGA means...
// please reach out (https://www.linkedin.com/in/tg-techie) i'd be very interested to know.
// It's seemingly  only used in ~4 files in the LLVM project and only in the compiler-rt/lib directory;
// Google searches, Gemini, and ChatGPT also don't seem to know what it means. The earliest
// reference I could find was from around 2009 in here by https://github.com/eocallaghan,
// https://github.com/llvm/llvm-project/blame/7a6cb5febf0449344b82f3335736726b5e15193f/compiler-rt/lib/endianness.h
// Best,
//     TG-Techie

// -------- start derived code --------

// from: https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/int_lib.h
#define COMPILER_RT_ABI __attribute__((__pcs__("aapcs")))


// from: https://github.com/llvm/llvm-project/blob/dcb6c0d71c8dbb6bb17391c968c3716cfafd3765/compiler-rt/lib/builtins/int_types.h#L25-L26
typedef int32_t si_int;
typedef uint32_t su_int;

// from: https://github.com/llvm/llvm-project/blob/dcb6c0d71c8dbb6bb17391c968c3716cfafd3765/compiler-rt/lib/builtins/int_types.h#L37-L51+L25-L26
typedef int64_t di_int;
typedef uint64_t du_int;

typedef union {
  di_int all;
  struct {
#if _YUGA_LITTLE_ENDIAN
    su_int low;
    si_int high;
#else
    si_int high;
    su_int low;
#endif // _YUGA_LITTLE_ENDIAN
  } s;
} dwords;


// from: https://github.com/llvm/llvm-project/blob/dcb6c0d71c8dbb6bb17391c968c3716cfafd3765/compiler-rt/lib/builtins/muldi3.c

// Returns: a * b
static di_int __muldsi3(su_int a, su_int b) {
  dwords r;
  const int bits_in_word_2 = (int)(sizeof(si_int) * CHAR_BIT) / 2;
  const su_int lower_mask = (su_int)~0 >> bits_in_word_2;
  r.s.low = (a & lower_mask) * (b & lower_mask);
  su_int t = r.s.low >> bits_in_word_2;
  r.s.low &= lower_mask;
  t += (a >> bits_in_word_2) * (b & lower_mask);
  r.s.low += (t & lower_mask) << bits_in_word_2;
  r.s.high = t >> bits_in_word_2;
  t = r.s.low >> bits_in_word_2;
  r.s.low &= lower_mask;
  t += (b >> bits_in_word_2) * (a & lower_mask);
  r.s.low += (t & lower_mask) << bits_in_word_2;
  r.s.high += t >> bits_in_word_2;
  r.s.high += (a >> bits_in_word_2) * (b >> bits_in_word_2);
  return r.all;
}

// Returns: a * b
COMPILER_RT_ABI di_int __muldi3(di_int a, di_int b) {
  dwords x;
  x.all = a;
  dwords y;
  y.all = b;
  dwords r;
  r.all = __muldsi3(x.s.low, y.s.low);
  r.s.high += x.s.high * y.s.low + x.s.low * y.s.high;
  return r.all;
}

#if defined(__ARM_EABI__)
COMPILER_RT_ALIAS(__muldi3, __aeabi_lmul)
#endif

	/*
	Copyright 2024 TG-Techie (Jonah Y-M)


	-------- About --------

	This file adds a soft implementation of the int32_t __muldi3(int32_t, int32_t) function
	LLVM/Swift/Glang generate calls to when multiplying two 32-bit integers on the RP2040
	Microcontroller.

	The blink example in the Swift Embedded Examples repository uses a multipy by 100 and
	making this show it's fixable.


	---------------- NOTICE ----------------
	This file contains work derived from the LLVM Project and has been modified; the new
	portions and the modified portions of this file are both licensed under the Apache
	License v2.0 with LLVM Exceptions (for simplicity).

	please see the full license text at:
	https://llvm.org/LICENSE.txt
	or
	https://spdx.org/licenses/Apache-2.0.html + https://spdx.org/licenses/LLVM-exception.html


	-------- Usage --------

	Add this file as `swift-embedded-examples/pico-blink/Sources/Support/patch_clang_rt_soft_static___muldi3.h`

	the include it in same directory's `inlcude/Support.h`; ex :
	```c
	#pragma once

	#include <stdint.h>

	// patch a bug where some versions of the Swift Dev Toolchain are shipping w/out armv6m runtime support
	#include "../patch_soft_static___muldi3.h"
	```


	-------- Why --------

	Some builds of the Swift Nighlty/Developer toolchain (supporting Embedded Swift) are
	missing armv6m support in the compiler-rt soft float library. This file is a patch.

	Notably this bug seemed to be introduced on the April 4th 2024 build,
	see: https://github.com/apple/swift-embedded-examples/issues/5#issuecomment-2079176704

	*/

	// ---- setup ----
	#include <stdint.h>
	#include <limits.h>

	// the RP2040 is a little endian ARMv6-M processor, set the corresponding YUGA macro
	#define _YUGA_LITTLE_ENDIAN 1
	// "minor" aside": I have no idea what YUGA stands for: If you know what YUGA means...
	// please reach out (https://www.linkedin.com/in/tg-techie) i'd be very interested to know.
	// It's seemingly only used in ~4 files in the LLVM project and only in the compiler-rt/lib directory;
	// Google searches, Gemini, and ChatGPT also don't seem to know what it means. The earliest
	// reference I could find was from around 2009 in here by https://github.com/eocallaghan,
	// https://github.com/llvm/llvm-project/blame/7a6cb5febf0449344b82f3335736726b5e15193f/compiler-rt/lib/endianness.h
	// Best,
	// TG-Techie

	// -------- start derived code --------

	// from: https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/int_lib.h
	#define COMPILER_RT_ABI __attribute__((__pcs__("aapcs")))


	// from: https://github.com/llvm/llvm-project/blob/dcb6c0d71c8dbb6bb17391c968c3716cfafd3765/compiler-rt/lib/builtins/int_types.h#L25-L26
	typedef int32_t si_int;
	typedef uint32_t su_int;

	// from: https://github.com/llvm/llvm-project/blob/dcb6c0d71c8dbb6bb17391c968c3716cfafd3765/compiler-rt/lib/builtins/int_types.h#L37-L51+L25-L26
	typedef int64_t di_int;
	typedef uint64_t du_int;

	typedef union {
	di_int all;
	struct {
	#if _YUGA_LITTLE_ENDIAN
	su_int low;
	si_int high;
	#else
	si_int high;
	su_int low;
	#endif // _YUGA_LITTLE_ENDIAN
	} s;
	} dwords;


	// from: https://github.com/llvm/llvm-project/blob/dcb6c0d71c8dbb6bb17391c968c3716cfafd3765/compiler-rt/lib/builtins/muldi3.c

	// Returns: a * b
	static di_int __muldsi3(su_int a, su_int b) {
	dwords r;
	const int bits_in_word_2 = (int)(sizeof(si_int) * CHAR_BIT) / 2;
	const su_int lower_mask = (su_int)~0 >> bits_in_word_2;
	r.s.low = (a & lower_mask) * (b & lower_mask);
	su_int t = r.s.low >> bits_in_word_2;
	r.s.low &= lower_mask;
	t += (a >> bits_in_word_2) * (b & lower_mask);
	r.s.low += (t & lower_mask) << bits_in_word_2;
	r.s.high = t >> bits_in_word_2;
	t = r.s.low >> bits_in_word_2;
	r.s.low &= lower_mask;
	t += (b >> bits_in_word_2) * (a & lower_mask);
	r.s.low += (t & lower_mask) << bits_in_word_2;
	r.s.high += t >> bits_in_word_2;
	r.s.high += (a >> bits_in_word_2) * (b >> bits_in_word_2);
	return r.all;
	}

	// Returns: a * b
	COMPILER_RT_ABI di_int __muldi3(di_int a, di_int b) {
	dwords x;
	x.all = a;
	dwords y;
	y.all = b;
	dwords r;
	r.all = __muldsi3(x.s.low, y.s.low);
	r.s.high += x.s.high * y.s.low + x.s.low * y.s.high;
	return r.all;
	}

	#if defined(__ARM_EABI__)
	COMPILER_RT_ALIAS(__muldi3, __aeabi_lmul)
	#endif