Skip to content

Instantly share code, notes, and snippets.

@bernhardmgruber
Last active November 2, 2021 15:05
Show Gist options
  • Save bernhardmgruber/bdbeae6f6d8eb710637cff61bb369947 to your computer and use it in GitHub Desktop.
Save bernhardmgruber/bdbeae6f6d8eb710637cff61bb369947 to your computer and use it in GitHub Desktop.
#pragma once
// ============================================================================
// == ./Meta.hpp ==
// ==
// SPDX-License-Identifier: GPL-3.0-or-later
// #pragma once
#include <boost/mp11.hpp>
#if BOOST_MP11_VERSION < 107300
// Copyright 2015 Peter Dimov.
//
// Distributed under the Boost Software License, Version 1.0.
//
// Boost Software License - Version 1.0 - August 17th, 2003
//
// Permission is hereby granted, free of charge, to any person or organization
// obtaining a copy of the software and accompanying documentation covered by
// this license (the "Software") to use, reproduce, display, distribute,
// execute, and transmit the Software, and to prepare derivative works of the
// Software, and to permit third-parties to whom the Software is furnished to
// do so, all subject to the following:
//
// The copyright notices in the Software and this entire statement, including
// the above license grant, this restriction and the following disclaimer,
// must be included in all copies of the Software, in whole or in part, and
// all derivative works of the Software, unless such copies or derivative
// works are solely in the form of machine-executable object code generated by
// a source language processor.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
// SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
// FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS IN THE SOFTWARE.
namespace boost::mp11
{
namespace detail
{
template<class L2>
struct mp_flatten_impl
{
template<class T>
using fn = mp_if<mp_similar<L2, T>, T, mp_list<T>>;
};
} // namespace detail
template<class L, class L2 = mp_clear<L>>
using mp_flatten = mp_apply<mp_append, mp_push_front<mp_transform_q<detail::mp_flatten_impl<L2>, L>, mp_clear<L>>>;
} // namespace boost::mp11
#endif
namespace llama
{
namespace internal
{
template<typename FromList, template<auto...> class ToList>
struct mp_unwrap_values_into_impl;
template<template<class...> class FromList, typename... Values, template<auto...> class ToList>
struct mp_unwrap_values_into_impl<FromList<Values...>, ToList>
{
using type = ToList<Values::value...>;
};
template<typename FromList, template<auto...> class ToList>
using mp_unwrap_values_into = typename mp_unwrap_values_into_impl<FromList, ToList>::type;
} // namespace internal
} // namespace llama
// ==
// == ./Meta.hpp ==
// ============================================================================
// ============================================================================
// == ./macros.hpp ==
// ==
// Copyright 2018 Alexander Matthes
// SPDX-License-Identifier: GPL-3.0-or-later
// #pragma once
#ifdef __INTEL_COMPILER
# error LLAMA has stopped supporting the Intel Classic Compiler after Intel announced its planned deprecation and \
replacement by the Intel LLVM-based compiler. Please migrate to the Intel LLVM-based compiler.
#endif
#if defined(__INTEL_LLVM_COMPILER)
# define LLAMA_INDEPENDENT_DATA _Pragma("ivdep")
#elif defined(__clang__)
# define LLAMA_INDEPENDENT_DATA _Pragma("clang loop vectorize(assume_safety) interleave(assume_safety)")
#elif defined(__GNUC__)
# define LLAMA_INDEPENDENT_DATA _Pragma("GCC ivdep")
#elif defined(_MSC_VER)
# define LLAMA_INDEPENDENT_DATA __pragma(loop(ivdep))
#else
/// May be put in front of a loop statement. Indicates that all (!) data access inside the loop is indepent, so the
/// loop can be safely vectorized. Example: \code{.cpp}
/// LLAMA_INDEPENDENT_DATA
/// for(int i = 0; i < N; ++i)
/// // because of LLAMA_INDEPENDENT_DATA the compiler knows that a and b
/// // do not overlap and the operation can safely be vectorized
/// a[i] += b[i];
/// \endcode
# define LLAMA_INDEPENDENT_DATA
#endif
#ifndef LLAMA_FORCE_INLINE
# if defined(__NVCC__)
# define LLAMA_FORCE_INLINE __forceinline__
# elif defined(__GNUC__) || defined(__clang__)
# define LLAMA_FORCE_INLINE inline __attribute__((always_inline))
# elif defined(_MSC_VER) || defined(__INTEL_LLVM_COMPILER)
# define LLAMA_FORCE_INLINE __forceinline
# else
/// Forces the compiler to inline a function annotated with this macro
# define LLAMA_FORCE_INLINE inline
# warning LLAMA_FORCE_INLINE is only defined to "inline" for this compiler
# endif
#endif
#ifndef LLAMA_PRAGMA
# define LLAMA_PRAGMA(tokens) _Pragma(# tokens)
#endif
#ifndef LLAMA_UNROLL
# if defined(__NVCC__) || defined(__clang__) || defined(__INTEL_LLVM_COMPILER)
# define LLAMA_UNROLL(...) LLAMA_PRAGMA(unroll __VA_ARGS__)
# elif defined(__GNUG__)
# define LLAMA_UNROLL(...) LLAMA_PRAGMA(GCC unroll __VA_ARGS__)
# elif defined(_MSC_VER)
// MSVC does not support a pragma for unrolling
# define LLAMA_UNROLL(...)
# else
/// Requests the compiler to unroll the loop following this directive. An optional unrolling count may be provided as
/// argument, which must be a constant expression.
# define LLAMA_UNROLL(...)
# warning LLAMA_UNROLL is not implemented for your compiler
# endif
#endif
#ifndef LLAMA_HOST_ACC
# if defined(__NVCC__) || (defined(__clang__) && defined(__CUDA__))
# define LLAMA_HOST_ACC __host__ __device__
# elif defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER) || defined(__INTEL_LLVM_COMPILER)
# define LLAMA_HOST_ACC
# else
/// Some offloading parallelization language extensions such a CUDA, OpenACC or OpenMP 4.5 need to specify whether a
/// class, struct, function or method "resides" on the host, the accelerator (the offloading device) or both. LLAMA
/// supports this with marking every function needed on an accelerator with `LLAMA_HOST_ACC`.
# define LLAMA_HOST_ACC
# warning LLAMA_HOST_ACC is only defined empty for this compiler
# endif
#endif
#define LLAMA_FN_HOST_ACC_INLINE LLAMA_FORCE_INLINE LLAMA_HOST_ACC
#ifndef LLAMA_LAMBDA_INLINE_WITH_SPECIFIERS
# if defined(__clang__) || defined(__INTEL_LLVM_COMPILER)
# define LLAMA_LAMBDA_INLINE_WITH_SPECIFIERS(...) __attribute__((always_inline)) __VA_ARGS__
# elif defined(__GNUC__) || (defined(__NVCC__) && !defined(_MSC_VER))
# define LLAMA_LAMBDA_INLINE_WITH_SPECIFIERS(...) __VA_ARGS__ __attribute__((always_inline))
# elif defined(_MSC_VER)
# define LLAMA_LAMBDA_INLINE_WITH_SPECIFIERS(...) \
__VA_ARGS__ /* FIXME: MSVC cannot combine constexpr and [[msvc::forceinline]] */
# else
# define LLAMA_LAMBDA_INLINE_WITH_SPECIFIERS(...) __VA_ARGS__
# warning LLAMA_LAMBDA_INLINE_WITH_SPECIFIERS not defined for this compiler
# endif
#endif
#ifndef LLAMA_LAMBDA_INLINE
/// Gives strong indication to the compiler to inline the attributed lambda.
# define LLAMA_LAMBDA_INLINE LLAMA_LAMBDA_INLINE_WITH_SPECIFIERS()
#endif
/// Suppresses nvcc warning: 'calling a __host__ function from __host__ __device__ function.'
#if defined(__NVCC__) && !defined(__clang__)
# define LLAMA_SUPPRESS_HOST_DEVICE_WARNING _Pragma("nv_exec_check_disable")
#else
# define LLAMA_SUPPRESS_HOST_DEVICE_WARNING
#endif
#if defined(_MSC_VER)
# define LLAMA_FORCE_INLINE_RECURSIVE __pragma(inline_depth(255))
#else
/// Forces the compiler to recursively inline the call hiearchy started by the subsequent function call.
# define LLAMA_FORCE_INLINE_RECURSIVE
#endif
/// Forces a copy of a value. This is useful to prevent ODR usage of constants when compiling for GPU targets.
#define LLAMA_COPY(x) decltype(x)(x)
// TODO(bgruber): clang 10 and 11 fail to compile this currently with the issue described here:
// https://stackoverflow.com/questions/64300832/why-does-clang-think-gccs-subrange-does-not-satisfy-gccs-ranges-begin-functi
// let's try again with clang 12
// Intel LLVM compiler is also using the clang frontend
#if(__has_include(<ranges>) && defined(__cpp_concepts) && !defined(__clang__) && !defined(__INTEL_LLVM_COMPILER))
# define CAN_USE_RANGES 1
#else
# define CAN_USE_RANGES 0
#endif
// ==
// == ./macros.hpp ==
// ============================================================================
// ============================================================================
// == ./Proofs.hpp ==
// ==
// SPDX-License-Identifier: GPL-3.0-or-later
// #pragma once
// ============================================================================
// == ./ArrayIndexRange.hpp ==
// ==
// #pragma once
// ============================================================================
// == ./ArrayExtents.hpp ==
// ==
// SPDX-License-Identifier: GPL-3.0-or-later
// #pragma once
// ============================================================================
// == ./Array.hpp ==
// ==
// Copyright 2018 Alexander Matthes
// SPDX-License-Identifier: GPL-3.0-or-later
// #pragma once
// #include "macros.hpp" // amalgamate: file already expanded
#include <ostream>
#include <tuple>
namespace llama
{
/// Array class like `std::array` but suitable for use with offloading devices like GPUs.
/// \tparam T type if array elements.
/// \tparam N rank of the array.
template<typename T, std::size_t N>
struct Array
{
using value_type = T;
T element[N > 0 ? N : 1];
LLAMA_FN_HOST_ACC_INLINE constexpr auto size() const
{
return N;
}
LLAMA_FN_HOST_ACC_INLINE constexpr auto begin() -> T*
{
return &element[0];
}
LLAMA_FN_HOST_ACC_INLINE constexpr auto begin() const -> const T*
{
return &element[0];
}
LLAMA_FN_HOST_ACC_INLINE constexpr auto end() -> T*
{
return &element[N];
}
LLAMA_FN_HOST_ACC_INLINE constexpr auto end() const -> const T*
{
return &element[N];
}
template<typename IndexType>
LLAMA_FN_HOST_ACC_INLINE constexpr auto operator[](IndexType&& idx) -> T&
{
return element[idx];
}
template<typename IndexType>
LLAMA_FN_HOST_ACC_INLINE constexpr auto operator[](IndexType&& idx) const -> T const&
{
return element[idx];
}
LLAMA_FN_HOST_ACC_INLINE constexpr friend auto operator==(const Array& a, const Array& b) -> bool
{
for(std::size_t i = 0; i < N; ++i)
if(a.element[i] != b.element[i])
return false;
return true;
}
LLAMA_FN_HOST_ACC_INLINE constexpr friend auto operator!=(const Array& a, const Array& b) -> bool
{
return !(a == b);
}
LLAMA_FN_HOST_ACC_INLINE constexpr friend auto operator+(const Array& a, const Array& b) -> Array
{
Array temp{};
for(std::size_t i = 0; i < N; ++i)
temp[i] = a[i] + b[i];
return temp;
}
template<std::size_t I>
constexpr auto get() -> T&
{
return element[I];
}
template<std::size_t I>
constexpr auto get() const -> const T&
{
return element[I];
}
};
template<typename T>
struct Array<T, 0>
{
using value_type = T;
LLAMA_FN_HOST_ACC_INLINE constexpr auto size() const
{
return 0;
}
LLAMA_FN_HOST_ACC_INLINE constexpr auto begin() -> T*
{
return nullptr;
}
LLAMA_FN_HOST_ACC_INLINE constexpr auto begin() const -> const T*
{
return nullptr;
}
LLAMA_FN_HOST_ACC_INLINE constexpr auto end() -> T*
{
return nullptr;
}
LLAMA_FN_HOST_ACC_INLINE constexpr auto end() const -> const T*
{
return nullptr;
}
LLAMA_FN_HOST_ACC_INLINE constexpr friend auto operator==(const Array&, const Array&) -> bool
{
return true;
}
LLAMA_FN_HOST_ACC_INLINE constexpr friend auto operator!=(const Array&, const Array&) -> bool
{
return false;
}
LLAMA_FN_HOST_ACC_INLINE constexpr friend auto operator+(const Array&, const Array&) -> Array
{
return {};
}
};
template<typename First, typename... Args>
Array(First, Args... args) -> Array<First, sizeof...(Args) + 1>;
template<typename T, std::size_t N>
auto operator<<(std::ostream& os, const Array<T, N>& a) -> std::ostream&
{
os << "Array{";
bool first = true;
for(auto e : a)
{
if(first)
first = false;
else
os << ", ";
os << e;
}
os << "}";
return os;
}
template<typename T, std::size_t N>
LLAMA_FN_HOST_ACC_INLINE constexpr auto push_front([[maybe_unused]] Array<T, N> a, T v) -> Array<T, N + 1>
{
Array<T, N + 1> r{};
r[0] = v;
if constexpr(N > 0)
for(std::size_t i = 0; i < N; i++)
r[i + 1] = a[i];
return r;
}
template<typename T, std::size_t N>
LLAMA_FN_HOST_ACC_INLINE constexpr auto push_back([[maybe_unused]] Array<T, N> a, T v) -> Array<T, N + 1>
{
Array<T, N + 1> r{};
if constexpr(N > 0)
for(std::size_t i = 0; i < N; i++)
r[i] = a[i];
r[N] = v;
return r;
}
template<typename T, std::size_t N>
LLAMA_FN_HOST_ACC_INLINE constexpr auto pop_back([[maybe_unused]] Array<T, N> a)
{
static_assert(N > 0);
Array<T, N - 1> r{};
if constexpr(N > 1)
for(std::size_t i = 0; i < N - 1; i++)
r[i] = a[i];
return r;
}
template<typename T, std::size_t N>
LLAMA_FN_HOST_ACC_INLINE constexpr auto pop_front([[maybe_unused]] Array<T, N> a)
{
static_assert(N > 0);
Array<T, N - 1> r{};
if constexpr(N > 1)
for(std::size_t i = 0; i < N - 1; i++)
r[i] = a[i + 1];
return r;
}
template<typename T, std::size_t N>
LLAMA_FN_HOST_ACC_INLINE constexpr auto product(Array<T, N> a) -> T
{
T prod = 1;
for(auto s : a)
prod *= s;
return prod;
}
} // namespace llama
namespace std
{
template<typename T, size_t N>
struct tuple_size<llama::Array<T, N>> : integral_constant<size_t, N>
{
};
template<size_t I, typename T, size_t N>
struct tuple_element<I, llama::Array<T, N>>
{
using type = T;
};
} // namespace std
// ==
// == ./Array.hpp ==
// ============================================================================
// #include "Meta.hpp" // amalgamate: file already expanded
#include <limits>
#include <type_traits>
namespace llama
{
// TODO(bgruber): make this an alias in C++20, when we have CTAD for aliases
/// Represents a run-time index into the array dimensions.
/// \tparam Dim Compile-time number of dimensions.
template<std::size_t Dim>
struct ArrayIndex : Array<std::size_t, Dim>
{
static constexpr std::size_t rank = Dim;
};
static_assert(
std::is_trivially_default_constructible_v<ArrayIndex<1>>); // so ArrayIndex<1>{} will produce a zeroed
// index. Should hold for all dimensions,
// but just checking for <1> here.
static_assert(std::is_trivially_copy_constructible_v<ArrayIndex<1>>);
static_assert(std::is_trivially_move_constructible_v<ArrayIndex<1>>);
static_assert(std::is_trivially_copy_assignable_v<ArrayIndex<1>>);
static_assert(std::is_trivially_move_assignable_v<ArrayIndex<1>>);
template<typename... Args>
ArrayIndex(Args...) -> ArrayIndex<sizeof...(Args)>;
} // namespace llama
template<size_t N>
struct std::tuple_size<llama::ArrayIndex<N>> : std::integral_constant<size_t, N>
{
};
template<size_t I, size_t N>
struct std::tuple_element<I, llama::ArrayIndex<N>>
{
using type = size_t;
};
namespace llama
{
/// Used as a template argument to \ref ArrayExtents to mark a dynamic extent.
inline constexpr std::size_t dyn = std::numeric_limits<std::size_t>::max();
/// ArrayExtents holding compile and runtime indices. This is conceptually equivalent to the std::extent of
/// std::mdspan. See: https://wg21.link/P0009
template<std::size_t... Sizes>
struct ArrayExtents : Array<typename ArrayIndex<sizeof...(Sizes)>::value_type, ((Sizes == dyn) + ... + 0)>
{
static constexpr std::size_t rank = sizeof...(Sizes);
static constexpr auto rank_dynamic = ((Sizes == dyn) + ... + 0);
static constexpr auto rank_static = rank - rank_dynamic;
using Index = ArrayIndex<rank>;
using value_type = typename Index::value_type;
template<std::size_t I>
LLAMA_FN_HOST_ACC_INLINE constexpr auto get() const
{
using namespace boost::mp11;
using TypeList = mp_list_c<std::size_t, Sizes...>;
constexpr auto extent = mp_at_c<TypeList, I>::value;
if constexpr(extent != dyn)
return extent;
else
return static_cast<const Array<value_type, rank_dynamic>&>(
*this)[+mp_count<mp_take_c<TypeList, I>, mp_size_t<dyn>>::value];
}
LLAMA_FN_HOST_ACC_INLINE constexpr auto operator[](std::size_t i) const
{
return boost::mp11::mp_with_index<rank>(i, [&](auto ic) { return get<decltype(ic)::value>(); });
}
private:
template<std::size_t... Is>
LLAMA_FN_HOST_ACC_INLINE constexpr auto toArray(std::index_sequence<Is...>) const -> Index
{
return {get<Is>()...};
}
public:
LLAMA_FN_HOST_ACC_INLINE constexpr auto toArray() const -> Index
{
return toArray(std::make_index_sequence<rank>{});
}
LLAMA_FN_HOST_ACC_INLINE constexpr operator Index() const
{
return toArray();
}
};
template<>
struct ArrayExtents<>
{
static constexpr std::size_t rank = 0;
static constexpr auto rank_dynamic = 0;
static constexpr auto rank_static = 0;
using Index = ArrayIndex<rank>;
using value_type = typename Index::value_type;
LLAMA_FN_HOST_ACC_INLINE constexpr auto toArray() const -> Index
{
return {};
}
LLAMA_FN_HOST_ACC_INLINE constexpr operator Index() const
{
return toArray();
}
};
template<typename... Args>
ArrayExtents(Args... args) -> ArrayExtents<(Args{}, dyn)...>;
static_assert(std::is_trivially_default_constructible_v<ArrayExtents<1>>);
static_assert(std::is_trivially_copy_constructible_v<ArrayExtents<1>>);
static_assert(std::is_trivially_move_constructible_v<ArrayExtents<1>>);
static_assert(std::is_trivially_copy_assignable_v<ArrayExtents<1>>);
static_assert(std::is_trivially_move_assignable_v<ArrayExtents<1>>);
template<std::size_t... SizesA, std::size_t... SizesB>
LLAMA_FN_HOST_ACC_INLINE constexpr auto operator==(ArrayExtents<SizesA...> a, ArrayExtents<SizesB...> b) -> bool
{
return a.toArray() == b.toArray();
}
template<std::size_t... SizesA, std::size_t... SizesB>
LLAMA_FN_HOST_ACC_INLINE constexpr auto operator!=(ArrayExtents<SizesA...> a, ArrayExtents<SizesB...> b) -> bool
{
return !(a == b);
}
template<std::size_t... Sizes>
LLAMA_FN_HOST_ACC_INLINE constexpr auto product(ArrayExtents<Sizes...> e) ->
typename ArrayExtents<Sizes...>::value_type
{
return product(e.toArray());
}
/// N-dimensional ArrayExtents where all values are dynamic.
template<std::size_t N>
using ArrayExtentsDynamic = internal::
mp_unwrap_values_into<boost::mp11::mp_repeat_c<boost::mp11::mp_list_c<std::size_t, dyn>, N>, ArrayExtents>;
/// N-dimensional ArrayExtents where all values are Extent.
template<std::size_t N, std::size_t Extent>
using ArrayExtentsStatic = internal::
mp_unwrap_values_into<boost::mp11::mp_repeat_c<boost::mp11::mp_list_c<std::size_t, Extent>, N>, ArrayExtents>;
template<std::size_t Dim, typename Func, typename... OuterIndices>
LLAMA_FN_HOST_ACC_INLINE void forEachADCoord(
[[maybe_unused]] ArrayIndex<Dim> adSize,
Func&& func,
OuterIndices... outerIndices)
{
if constexpr(Dim > 0)
for(std::size_t i = 0; i < adSize[0]; i++)
forEachADCoord(ArrayIndex<Dim - 1>{pop_front(adSize)}, std::forward<Func>(func), outerIndices..., i);
else
std::forward<Func>(func)(ArrayIndex<sizeof...(outerIndices)>{outerIndices...});
}
template<std::size_t... Sizes, typename Func>
LLAMA_FN_HOST_ACC_INLINE void forEachADCoord(ArrayExtents<Sizes...> extents, Func&& func)
{
forEachADCoord(extents.toArray(), std::forward<Func>(func));
}
} // namespace llama
template<std::size_t... Sizes>
struct std::tuple_size<llama::ArrayExtents<Sizes...>> : std::integral_constant<std::size_t, sizeof...(Sizes)>
{
};
template<std::size_t I, std::size_t... Sizes>
struct std::tuple_element<I, llama::ArrayExtents<Sizes...>>
{
using type = typename llama::ArrayExtents<Sizes...>::value_type;
};
// ==
// == ./ArrayExtents.hpp ==
// ============================================================================
// ============================================================================
// == ./Core.hpp ==
// ==
// Copyright 2018 Alexander Matthes
// SPDX-License-Identifier: GPL-3.0-or-later
// #pragma once
// #include "ArrayExtents.hpp" // amalgamate: file already expanded
// #include "Meta.hpp" // amalgamate: file already expanded
// ============================================================================
// == ./RecordCoord.hpp ==
// ==
// Copyright 2018 Alexander Matthes
// SPDX-License-Identifier: GPL-3.0-or-later
// #pragma once
// #include "Meta.hpp" // amalgamate: file already expanded
#include <array>
// #include <ostream> // amalgamate: file already included
// #include <type_traits> // amalgamate: file already included
namespace llama
{
/// Represents a coordinate for a record inside the record dimension tree.
/// \tparam Coords... the compile time coordinate.
template<std::size_t... Coords>
struct RecordCoord
{
/// The list of integral coordinates as `boost::mp11::mp_list`.
using List = boost::mp11::mp_list_c<std::size_t, Coords...>;
static constexpr std::size_t front = boost::mp11::mp_front<List>::value;
static constexpr std::size_t back = boost::mp11::mp_back<List>::value;
static constexpr std::size_t size = sizeof...(Coords);
};
template<>
struct RecordCoord<>
{
using List = boost::mp11::mp_list_c<std::size_t>;
static constexpr std::size_t size = 0;
};
template<std::size_t... CoordsA, std::size_t... CoordsB>
LLAMA_FN_HOST_ACC_INLINE constexpr auto operator==(RecordCoord<CoordsA...>, RecordCoord<CoordsB...>)
{
return false;
}
template<std::size_t... Coords>
LLAMA_FN_HOST_ACC_INLINE constexpr auto operator==(RecordCoord<Coords...>, RecordCoord<Coords...>)
{
return true;
}
template<std::size_t... CoordsA, std::size_t... CoordsB>
LLAMA_FN_HOST_ACC_INLINE constexpr auto operator!=(RecordCoord<CoordsA...> a, RecordCoord<CoordsB...> b)
{
return !(a == b);
}
template<typename T>
inline constexpr bool isRecordCoord = false;
template<std::size_t... Coords>
inline constexpr bool isRecordCoord<RecordCoord<Coords...>> = true;
template<std::size_t... RCs>
auto operator<<(std::ostream& os, RecordCoord<RCs...>) -> std::ostream&
{
os << "RecordCoord<";
bool first = true;
for(auto rc : std::array<std::size_t, sizeof...(RCs)>{RCs...})
{
if(first)
first = false;
else
os << ", ";
os << rc;
}
os << ">";
return os;
}
inline namespace literals
{
/// Literal operator for converting a numeric literal into a \ref RecordCoord.
template<char... Digits>
constexpr auto operator"" _RC()
{
constexpr auto coord = []() constexpr
{
char digits[] = {(Digits - 48)...};
std::size_t acc = 0;
std ::size_t powerOf10 = 1;
for(int i = sizeof...(Digits) - 1; i >= 0; i--)
{
acc += digits[i] * powerOf10;
powerOf10 *= 10;
}
return acc;
}
();
return RecordCoord<coord>{};
}
} // namespace literals
/// Converts a type list of integral constants into a \ref RecordCoord.
template<typename L>
using RecordCoordFromList = internal::mp_unwrap_values_into<L, RecordCoord>;
/// Concatenate a set of \ref RecordCoord%s.
template<typename... RecordCoords>
using Cat = RecordCoordFromList<boost::mp11::mp_append<typename RecordCoords::List...>>;
/// Concatenate a set of \ref RecordCoord%s instances.
template<typename... RecordCoords>
constexpr auto cat(RecordCoords...)
{
return Cat<RecordCoords...>{};
}
/// RecordCoord without first coordinate component.
template<typename RecordCoord>
using PopFront = RecordCoordFromList<boost::mp11::mp_pop_front<typename RecordCoord::List>>;
namespace internal
{
template<typename First, typename Second>
struct RecordCoordCommonPrefixIsBiggerImpl;
template<std::size_t... Coords1, std::size_t... Coords2>
struct RecordCoordCommonPrefixIsBiggerImpl<RecordCoord<Coords1...>, RecordCoord<Coords2...>>
{
static constexpr auto value = []() constexpr
{
// CTAD does not work if Coords1/2 is an empty pack
std::array<std::size_t, sizeof...(Coords1)> a1{Coords1...};
std::array<std::size_t, sizeof...(Coords2)> a2{Coords2...};
for(std::size_t i = 0; i < std::min(a1.size(), a2.size()); i++)
{
if(a1[i] > a2[i])
return true;
if(a1[i] < a2[i])
return false;
}
return false;
}
();
};
} // namespace internal
/// Checks wether the first RecordCoord is bigger than the second.
template<typename First, typename Second>
inline constexpr auto RecordCoordCommonPrefixIsBigger
= internal::RecordCoordCommonPrefixIsBiggerImpl<First, Second>::value;
namespace internal
{
template<typename First, typename Second>
struct RecordCoordCommonPrefixIsSameImpl;
template<std::size_t... Coords1, std::size_t... Coords2>
struct RecordCoordCommonPrefixIsSameImpl<RecordCoord<Coords1...>, RecordCoord<Coords2...>>
{
static constexpr auto value = []() constexpr
{
// CTAD does not work if Coords1/2 is an empty pack
std::array<std::size_t, sizeof...(Coords1)> a1{Coords1...};
std::array<std::size_t, sizeof...(Coords2)> a2{Coords2...};
for(std::size_t i = 0; i < std::min(a1.size(), a2.size()); i++)
if(a1[i] != a2[i])
return false;
return true;
}
();
};
} // namespace internal
/// Checks whether two \ref RecordCoord%s are the same or one is the prefix of the other.
template<typename First, typename Second>
inline constexpr auto RecordCoordCommonPrefixIsSame
= internal::RecordCoordCommonPrefixIsSameImpl<First, Second>::value;
} // namespace llama
// ==
// == ./RecordCoord.hpp ==
// ============================================================================
#include <boost/core/demangle.hpp>
#include <iostream>
#include <string>
// #include <type_traits> // amalgamate: file already included
namespace llama
{
/// Anonymous naming for a \ref Field.
struct NoName
{
};
/// A type list of \ref Field%s which may be used to define a record dimension.
template<typename... Fields>
struct Record
{
};
/// @brief Tells whether the given type is allowed as a field type in LLAMA. Such types need to be trivially
/// constructible and trivially destructible.
template<typename T>
inline constexpr bool isAllowedFieldType = std::is_trivially_destructible_v<T>;
/// Record dimension tree node which may either be a leaf or refer to a child tree presented as another \ref
/// Record.
/// \tparam Tag Name of the node. May be any type (struct, class).
/// \tparam Type Type of the node. May be one of three cases. 1. another sub tree consisting of a nested \ref
/// Record. 2. an array of static size of any type, in which case a Record with as many \ref Field as the array
/// size is created, named \ref RecordCoord specialized on consecutive numbers I. 3. A scalar type different from
/// \ref Record, making this node a leaf of this type.
template<typename Tag, typename Type>
struct Field
{
static_assert(isAllowedFieldType<Type>, "This field's type is not allowed");
};
struct NrAndOffset
{
std::size_t nr;
std::size_t offset;
friend auto operator==(const NrAndOffset& a, const NrAndOffset& b) -> bool
{
return a.nr == b.nr && a.offset == b.offset;
}
friend auto operator!=(const NrAndOffset& a, const NrAndOffset& b) -> bool
{
return !(a == b);
}
friend auto operator<<(std::ostream& os, const NrAndOffset& value) -> std::ostream&
{
return os << "NrAndOffset{" << value.nr << ", " << value.offset << "}";
}
};
/// Get the tag from a \ref Field.
template<typename Field>
using GetFieldTag = boost::mp11::mp_first<Field>;
/// Get the type from a \ref Field.
template<typename Field>
using GetFieldType = boost::mp11::mp_second<Field>;
template<typename T>
inline constexpr auto isRecord = false;
template<typename... Fields>
inline constexpr auto isRecord<Record<Fields...>> = true;
namespace internal
{
template<typename RecordDim, typename RecordCoord>
struct GetTagsImpl;
template<typename... Fields, std::size_t FirstCoord, std::size_t... Coords>
struct GetTagsImpl<Record<Fields...>, RecordCoord<FirstCoord, Coords...>>
{
using Field = boost::mp11::mp_at_c<boost::mp11::mp_list<Fields...>, FirstCoord>;
using ChildTag = GetFieldTag<Field>;
using ChildType = GetFieldType<Field>;
using type
= boost::mp11::mp_push_front<typename GetTagsImpl<ChildType, RecordCoord<Coords...>>::type, ChildTag>;
};
template<typename ChildType, std::size_t Count, std::size_t FirstCoord, std::size_t... Coords>
struct GetTagsImpl<ChildType[Count], RecordCoord<FirstCoord, Coords...>>
{
using ChildTag = RecordCoord<FirstCoord>;
using type
= boost::mp11::mp_push_front<typename GetTagsImpl<ChildType, RecordCoord<Coords...>>::type, ChildTag>;
};
template<typename T>
struct GetTagsImpl<T, RecordCoord<>>
{
using type = boost::mp11::mp_list<>;
};
} // namespace internal
/// Get the tags of all \ref Field%s from the root of the record dimension tree until to the node identified by
/// \ref RecordCoord.
template<typename RecordDim, typename RecordCoord>
using GetTags = typename internal::GetTagsImpl<RecordDim, RecordCoord>::type;
namespace internal
{
template<typename RecordDim, typename RecordCoord>
struct GetTagImpl
{
using type = boost::mp11::mp_back<GetTags<RecordDim, RecordCoord>>;
};
template<typename RecordDim>
struct GetTagImpl<RecordDim, RecordCoord<>>
{
using type = NoName;
};
} // namespace internal
/// Get the tag of the \ref Field at a \ref RecordCoord inside the record dimension tree.
template<typename RecordDim, typename RecordCoord>
using GetTag = typename internal::GetTagImpl<RecordDim, RecordCoord>::type;
/// Is true if, starting at two coordinates in two record dimensions, all subsequent nodes in the record dimension
/// tree have the same tag.
/// \tparam RecordDimA First record dimension.
/// \tparam LocalA \ref RecordCoord based on StartA along which the tags are compared.
/// \tparam RecordDimB second record dimension.
/// \tparam LocalB \ref RecordCoord based on StartB along which the tags are compared.
template<typename RecordDimA, typename LocalA, typename RecordDimB, typename LocalB>
inline constexpr auto hasSameTags = []() constexpr
{
if constexpr(LocalA::size != LocalB::size)
return false;
else if constexpr(LocalA::size == 0 && LocalB::size == 0)
return true;
else
return std::is_same_v<GetTags<RecordDimA, LocalA>, GetTags<RecordDimB, LocalB>>;
}
();
namespace internal
{
template<typename FieldList, typename Tag>
struct FindFieldByTag
{
template<typename Field>
using HasTag = std::is_same<GetFieldTag<Field>, Tag>;
static constexpr auto value = boost::mp11::mp_find_if<FieldList, HasTag>::value;
};
template<typename RecordDim, typename RecordCoord, typename... Tags>
struct GetCoordFromTagsImpl
{
static_assert(boost::mp11::mp_size<RecordDim>::value != 0, "Tag combination is not valid");
};
template<typename... Fields, std::size_t... ResultCoords, typename FirstTag, typename... Tags>
struct GetCoordFromTagsImpl<Record<Fields...>, RecordCoord<ResultCoords...>, FirstTag, Tags...>
{
static constexpr auto tagIndex = FindFieldByTag<boost::mp11::mp_list<Fields...>, FirstTag>::value;
static_assert(
tagIndex < sizeof...(Fields),
"FirstTag was not found inside this Record. Does your record dimension contain the tag you access "
"with?");
using ChildType = GetFieldType<boost::mp11::mp_at_c<Record<Fields...>, tagIndex>>;
using type =
typename GetCoordFromTagsImpl<ChildType, RecordCoord<ResultCoords..., tagIndex>, Tags...>::type;
};
template<
typename ChildType,
std::size_t Count,
std::size_t... ResultCoords,
typename FirstTag,
typename... Tags>
struct GetCoordFromTagsImpl<ChildType[Count], RecordCoord<ResultCoords...>, FirstTag, Tags...>
{
static_assert(isRecordCoord<FirstTag>, "Please use a RecordCoord<I> to index into static arrays");
static_assert(FirstTag::size == 1, "Expected RecordCoord with 1 coordinate");
static_assert(FirstTag::front < Count, "Index out of bounds");
using type =
typename GetCoordFromTagsImpl<ChildType, RecordCoord<ResultCoords..., FirstTag::front>, Tags...>::type;
};
template<typename RecordDim, typename RecordCoord>
struct GetCoordFromTagsImpl<RecordDim, RecordCoord>
{
using type = RecordCoord;
};
// unpack a list of tags
template<typename... Fields, typename... Tags>
struct GetCoordFromTagsImpl<Record<Fields...>, RecordCoord<>, boost::mp11::mp_list<Tags...>>
: GetCoordFromTagsImpl<Record<Fields...>, RecordCoord<>, Tags...>
{
};
template<typename ChildType, std::size_t Count, typename... Tags>
struct GetCoordFromTagsImpl<ChildType[Count], RecordCoord<>, boost::mp11::mp_list<Tags...>>
: GetCoordFromTagsImpl<ChildType[Count], RecordCoord<>, Tags...>
{
};
} // namespace internal
/// Converts a series of tags, or a list of tags, navigating down a record dimension into a \ref RecordCoord.
template<typename RecordDim, typename... Tags>
using GetCoordFromTags = typename internal::GetCoordFromTagsImpl<RecordDim, RecordCoord<>, Tags...>::type;
namespace internal
{
template<typename RecordDim, typename... RecordCoordOrTags>
struct GetTypeImpl
{
using type = typename GetTypeImpl<RecordDim, GetCoordFromTags<RecordDim, RecordCoordOrTags...>>::type;
};
template<typename... Children, std::size_t HeadCoord, std::size_t... TailCoords>
struct GetTypeImpl<Record<Children...>, RecordCoord<HeadCoord, TailCoords...>>
{
using ChildType = GetFieldType<boost::mp11::mp_at_c<Record<Children...>, HeadCoord>>;
using type = typename GetTypeImpl<ChildType, RecordCoord<TailCoords...>>::type;
};
template<typename ChildType, std::size_t N, std::size_t HeadCoord, std::size_t... TailCoords>
struct GetTypeImpl<ChildType[N], RecordCoord<HeadCoord, TailCoords...>>
{
using type = typename GetTypeImpl<ChildType, RecordCoord<TailCoords...>>::type;
};
template<typename T>
struct GetTypeImpl<T, RecordCoord<>>
{
static_assert(isAllowedFieldType<T>);
using type = T;
};
} // namespace internal
/// Returns the type of a node in a record dimension tree identified by a given \ref RecordCoord or a series of
/// tags.
template<typename RecordDim, typename... RecordCoordOrTags>
using GetType = typename internal::GetTypeImpl<RecordDim, RecordCoordOrTags...>::type;
namespace internal
{
template<typename RecordDim, typename RecordCoord>
struct LeafRecordCoordsImpl;
template<typename T, std::size_t... RCs>
struct LeafRecordCoordsImpl<T, RecordCoord<RCs...>>
{
using type = boost::mp11::mp_list<RecordCoord<RCs...>>;
};
template<typename... Fields, std::size_t... RCs>
struct LeafRecordCoordsImpl<Record<Fields...>, RecordCoord<RCs...>>
{
template<std::size_t... Is>
static auto help(std::index_sequence<Is...>)
{
return boost::mp11::mp_append<
typename LeafRecordCoordsImpl<GetFieldType<Fields>, RecordCoord<RCs..., Is>>::type...>{};
}
using type = decltype(help(std::make_index_sequence<sizeof...(Fields)>{}));
};
template<typename Child, std::size_t N, std::size_t... RCs>
struct LeafRecordCoordsImpl<Child[N], RecordCoord<RCs...>>
{
template<std::size_t... Is>
static auto help(std::index_sequence<Is...>)
{
return boost::mp11::mp_append<
typename LeafRecordCoordsImpl<Child, RecordCoord<RCs..., Is>>::type...>{};
}
using type = decltype(help(std::make_index_sequence<N>{}));
};
} // namespace internal
/// Returns a flat type list containing all record coordinates to all leaves of the given record dimension.
template<typename RecordDim>
using LeafRecordCoords = typename internal::LeafRecordCoordsImpl<RecordDim, RecordCoord<>>::type;
namespace internal
{
// adapted from boost::mp11, but with LLAMA_FN_HOST_ACC_INLINE
template<template<typename...> typename L, typename... T, typename F>
LLAMA_FN_HOST_ACC_INLINE constexpr void mp_for_each_inlined(L<T...>, F&& f)
{
using A = int[sizeof...(T)];
(void) A{((void) f(T{}), 0)...};
}
} // namespace internal
/// Iterates over the record dimension tree and calls a functor on each element.
/// \param functor Functor to execute at each element of. Needs to have `operator()` with a template parameter for
/// the \ref RecordCoord in the record dimension tree.
/// \param baseCoord \ref RecordCoord at which the iteration should be started. The functor is called on elements
/// beneath this coordinate.
template<typename RecordDim, typename Functor, std::size_t... Coords>
LLAMA_FN_HOST_ACC_INLINE constexpr void forEachLeafCoord(Functor&& functor, RecordCoord<Coords...> baseCoord)
{
LLAMA_FORCE_INLINE_RECURSIVE
internal::mp_for_each_inlined(
LeafRecordCoords<GetType<RecordDim, RecordCoord<Coords...>>>{},
[&](auto innerCoord) LLAMA_LAMBDA_INLINE_WITH_SPECIFIERS(constexpr)
{ std::forward<Functor>(functor)(cat(baseCoord, innerCoord)); });
}
/// Iterates over the record dimension tree and calls a functor on each element.
/// \param functor Functor to execute at each element of. Needs to have `operator()` with a template parameter for
/// the \ref RecordCoord in the record dimension tree.
/// \param baseTags Tags used to define where the iteration should be started. The functor is called on elements
/// beneath this coordinate.
template<typename RecordDim, typename Functor, typename... Tags>
LLAMA_FN_HOST_ACC_INLINE constexpr void forEachLeafCoord(Functor&& functor, Tags... /*baseTags*/)
{
LLAMA_FORCE_INLINE_RECURSIVE
forEachLeafCoord<RecordDim>(std::forward<Functor>(functor), GetCoordFromTags<RecordDim, Tags...>{});
}
namespace internal
{
template<typename T>
struct FlattenRecordDimImpl
{
using type = boost::mp11::mp_list<T>;
};
template<typename... Fields>
struct FlattenRecordDimImpl<Record<Fields...>>
{
using type = boost::mp11::mp_append<typename FlattenRecordDimImpl<GetFieldType<Fields>>::type...>;
};
template<typename Child, std::size_t N>
struct FlattenRecordDimImpl<Child[N]>
{
using type = boost::mp11::mp_repeat_c<typename FlattenRecordDimImpl<Child>::type, N>;
};
} // namespace internal
/// Returns a flat type list containing all leaf field types of the given record dimension.
template<typename RecordDim>
using FlatRecordDim = typename internal::FlattenRecordDimImpl<RecordDim>::type;
/// The total number of fields in the recursively expanded record dimension.
template<typename RecordDim>
inline constexpr std::size_t flatFieldCount = 1;
template<typename... Children>
inline constexpr std::size_t flatFieldCount<
Record<Children...>> = (flatFieldCount<GetFieldType<Children>> + ... + 0);
template<typename Child, std::size_t N>
inline constexpr std::size_t flatFieldCount<Child[N]> = flatFieldCount<Child>* N;
namespace internal
{
template<std::size_t I, typename RecordDim>
inline constexpr std::size_t flatFieldCountBefore = 0;
template<typename... Children>
inline constexpr std::size_t flatFieldCountBefore<0, Record<Children...>> = 0;
// recursive formulation to benefit from template instantiation memoization
// this massively improves compilation time when this template is instantiated with a lot of different I
template<std::size_t I, typename... Children>
inline constexpr std::size_t flatFieldCountBefore<
I,
Record<
Children...>> = flatFieldCountBefore<I - 1, Record<Children...>> + flatFieldCount<GetFieldType<boost::mp11::mp_at_c<Record<Children...>, I - 1>>>;
} // namespace internal
/// The equivalent zero based index into a flat record dimension (\ref FlatRecordDim) of the given hierarchical
/// record coordinate.
template<typename RecordDim, typename RecordCoord>
inline constexpr std::size_t flatRecordCoord = 0;
template<typename T>
inline constexpr std::size_t flatRecordCoord<T, RecordCoord<>> = 0;
template<typename... Children, std::size_t I, std::size_t... Is>
inline constexpr std::size_t flatRecordCoord<
Record<Children...>,
RecordCoord<
I,
Is...>> = internal::
flatFieldCountBefore<
I,
Record<
Children...>> + flatRecordCoord<GetFieldType<boost::mp11::mp_at_c<Record<Children...>, I>>, RecordCoord<Is...>>;
template<typename Child, std::size_t N, std::size_t I, std::size_t... Is>
inline constexpr std::size_t flatRecordCoord<Child[N], RecordCoord<I, Is...>> = flatFieldCount<Child>* I
+ flatRecordCoord<Child, RecordCoord<Is...>>;
namespace internal
{
template<typename TypeList>
constexpr auto flatAlignOfImpl()
{
using namespace boost::mp11;
std::size_t maxAlign = 0;
mp_for_each<mp_transform<mp_identity, TypeList>>([&](auto e) constexpr
{
using T = typename decltype(e)::type;
maxAlign = std::max(maxAlign, alignof(T));
});
return maxAlign;
}
} // namespace internal
/// The alignment of a type list if its elements would be in a normal struct.
template<typename TypeList>
inline constexpr std::size_t flatAlignOf = internal::flatAlignOfImpl<TypeList>();
/// The alignment of a type T.
template<typename T>
inline constexpr std::size_t alignOf = alignof(T);
/// The alignment of a record dimension if its fields would be in a normal struct.
template<typename... Fields>
inline constexpr std::size_t alignOf<Record<Fields...>> = flatAlignOf<FlatRecordDim<Record<Fields...>>>;
namespace internal
{
constexpr void roundUpToMultiple(std::size_t& value, std::size_t multiple)
{
value = ((value + multiple - 1) / multiple) * multiple;
}
template<typename TypeList, bool Align, bool IncludeTailPadding>
constexpr auto sizeOfImpl() -> std::size_t
{
using namespace boost::mp11;
std::size_t size = 0;
std::size_t maxAlign = 0;
mp_for_each<mp_transform<mp_identity, TypeList>>([&](auto e) constexpr
{
using T = typename decltype(e)::type;
if constexpr(Align)
{
roundUpToMultiple(size, alignof(T));
maxAlign = std::max(maxAlign, alignof(T));
}
// NOLINTNEXTLINE(readability-misleading-indentation)
size += sizeof(T);
});
// final padding, so next struct can start right away
if constexpr(Align && IncludeTailPadding)
roundUpToMultiple(size, maxAlign); // TODO(bgruber): we could use flatAlignOf<TypeList> here, at the
// cost of more template instantiations
return size;
}
template<bool Align, typename TypeList, std::size_t I>
constexpr auto offsetOfImplWorkaround() -> std::size_t;
// recursive formulation to benefit from template instantiation memoization
// this massively improves compilation time when this template is instantiated with a lot of different I
template<bool Align, typename TypeList, std::size_t I>
inline constexpr std::size_t offsetOfImpl
= offsetOfImplWorkaround<Align, TypeList, I>(); // FIXME: MSVC fails to compile an IILE here.
template<bool Align, typename TypeList>
inline constexpr std::size_t offsetOfImpl<Align, TypeList, 0> = 0;
template<bool Align, typename TypeList, std::size_t I>
constexpr auto offsetOfImplWorkaround() -> std::size_t
{
std::size_t offset = offsetOfImpl<Align, TypeList, I - 1> + sizeof(boost::mp11::mp_at_c<TypeList, I - 1>);
if constexpr(Align)
roundUpToMultiple(offset, alignof(boost::mp11::mp_at_c<TypeList, I>));
return offset;
}
} // namespace internal
/// The size of a type list if its elements would be in a normal struct.
template<typename TypeList, bool Align, bool IncludeTailPadding = true>
inline constexpr std::size_t flatSizeOf = internal::sizeOfImpl<TypeList, Align, IncludeTailPadding>();
/// The size of a type T.
template<typename T, bool Align = false, bool IncludeTailPadding = true>
inline constexpr std::size_t sizeOf = sizeof(T);
/// The size of a record dimension if its fields would be in a normal struct.
template<typename... Fields, bool Align, bool IncludeTailPadding>
inline constexpr std::size_t sizeOf<Record<Fields...>, Align, IncludeTailPadding> = flatSizeOf<
FlatRecordDim<Record<Fields...>>,
Align,
IncludeTailPadding>;
/// The byte offset of an element in a type list ifs elements would be in a normal struct.
template<typename TypeList, std::size_t I, bool Align>
inline constexpr std::size_t flatOffsetOf = internal::offsetOfImpl<Align, TypeList, I>;
/// The byte offset of an element in a record dimension if it would be a normal struct.
/// \tparam RecordDim Record dimension tree.
/// \tparam RecordCoord Record coordinate of an element inrecord dimension tree.
template<typename RecordDim, typename RecordCoord, bool Align = false>
inline constexpr std::size_t offsetOf
= flatOffsetOf<FlatRecordDim<RecordDim>, flatRecordCoord<RecordDim, RecordCoord>, Align>;
template<typename S>
auto structName(S = {}) -> std::string
{
auto s = boost::core::demangle(typeid(S).name());
if(const auto pos = s.rfind(':'); pos != std::string::npos)
s = s.substr(pos + 1);
return s;
}
namespace internal
{
template<typename T>
struct IndirectValue
{
T value;
auto operator->() -> T*
{
return &value;
}
auto operator->() const -> const T*
{
return &value;
}
};
// TODO(bgruber): replace in C++20
template<class T>
struct IsBoundedArray : std::false_type
{
};
template<class T, std::size_t N>
struct IsBoundedArray<T[N]> : std::true_type
{
};
} // namespace internal
/// Returns the integral n rounded up to be a multiple of mult.
template<typename Integral>
LLAMA_FN_HOST_ACC_INLINE constexpr auto roundUpToMultiple(Integral n, Integral mult) -> Integral
{
return (n + mult - 1) / mult * mult;
}
namespace internal
{
template<typename T, template<typename> typename TypeFunctor>
struct TransformLeavesImpl
{
using type = TypeFunctor<T>;
};
template<typename... Fields, template<typename> typename TypeFunctor>
struct TransformLeavesImpl<Record<Fields...>, TypeFunctor>
{
using type = Record<
Field<GetFieldTag<Fields>, typename TransformLeavesImpl<GetFieldType<Fields>, TypeFunctor>::type>...>;
};
template<typename Child, std::size_t N, template<typename> typename TypeFunctor>
struct TransformLeavesImpl<Child[N], TypeFunctor>
{
using type = typename TransformLeavesImpl<Child, TypeFunctor>::type[N];
};
} // namespace internal
/// Creates a new record dimension where each new leaf field's type is the result of applying FieldTypeFunctor to
/// the original leaf field's type.
template<typename RecordDim, template<typename> typename FieldTypeFunctor>
using TransformLeaves = typename internal::TransformLeavesImpl<RecordDim, FieldTypeFunctor>::type;
namespace internal
{
// TODO: we might implement this better by expanding a record dim into a list of tag lists and then computing a
// real set union of the two tag list lists
template<typename A, typename B>
auto mergeRecordDimsImpl(boost::mp11::mp_identity<A> a, boost::mp11::mp_identity<B>)
{
static_assert(std::is_same_v<A, B>, "Cannot merge record and non-record or fields with different types");
return a;
}
template<typename A, std::size_t NA, typename B, std::size_t NB>
auto mergeRecordDimsImpl(
[[maybe_unused]] boost::mp11::mp_identity<A[NA]> a,
[[maybe_unused]] boost::mp11::mp_identity<B[NB]> b)
{
static_assert(std::is_same_v<A, B>, "Cannot merge arrays of different type");
if constexpr(NA < NB)
return b;
else
return a;
}
template<typename... FieldsA>
auto mergeRecordDimsImpl(boost::mp11::mp_identity<Record<FieldsA...>> a, boost::mp11::mp_identity<Record<>>)
{
return a;
}
template<
typename... FieldsA,
typename FieldB,
typename... FieldsB,
auto pos = FindFieldByTag<Record<FieldsA...>, GetFieldTag<FieldB>>::value>
auto mergeRecordDimsImpl(
boost::mp11::mp_identity<Record<FieldsA...>>,
boost::mp11::mp_identity<Record<FieldB, FieldsB...>>)
{
using namespace boost::mp11;
if constexpr(pos == sizeof...(FieldsA))
{
return mergeRecordDimsImpl(
mp_identity<Record<FieldsA..., FieldB>>{},
mp_identity<Record<FieldsB...>>{});
}
else
{
using OldFieldA = mp_at_c<Record<FieldsA...>, pos>;
using NewFieldA = Field<
GetFieldTag<OldFieldA>,
typename decltype(mergeRecordDimsImpl(
mp_identity<GetFieldType<OldFieldA>>{},
mp_identity<GetFieldType<FieldB>>{}))::type>;
using NewRecordA = mp_replace_at_c<Record<FieldsA...>, pos, NewFieldA>;
return mergeRecordDimsImpl(mp_identity<NewRecordA>{}, mp_identity<Record<FieldsB...>>{});
}
}
} // namespace internal
/// Creates a merged record dimension, where duplicated, nested fields are unified.
template<typename RecordDimA, typename RecordDimB>
using MergedRecordDims = typename decltype(internal::mergeRecordDimsImpl(
boost::mp11::mp_identity<RecordDimA>{},
boost::mp11::mp_identity<RecordDimB>{}))::type;
/// Returns the tags interspersed by '.' represented by the given record coord in the given record dimension.
template<typename RecordDim, std::size_t... Coords>
auto recordCoordTags(RecordCoord<Coords...>) -> std::string
{
using Tags = GetTags<RecordDim, RecordCoord<Coords...>>;
std::string r;
boost::mp11::mp_for_each<Tags>(
[&](auto tag)
{
using Tag = decltype(tag);
if(!r.empty())
r += '.';
if constexpr(isRecordCoord<Tag>)
{
static_assert(Tag::size == 1);
r += std::to_string(Tag::front); // handle array indices
}
else
r += structName(tag);
});
return r;
}
} // namespace llama
// ==
// == ./Core.hpp ==
// ============================================================================
#include <algorithm>
#include <iterator>
// #include <limits> // amalgamate: file already included
#if CAN_USE_RANGES
# include <ranges>
#endif
namespace llama
{
/// Iterator supporting \ref ArrayIndexRange.
template<typename ArrayExtents>
struct ArrayIndexIterator
{
static_assert(!std::is_const_v<ArrayExtents>);
using value_type = typename ArrayExtents::Index;
using difference_type = std::ptrdiff_t;
using reference = value_type;
using pointer = internal::IndirectValue<value_type>;
using iterator_category = std::random_access_iterator_tag;
static constexpr std::size_t rank = ArrayExtents::rank;
constexpr ArrayIndexIterator() noexcept = default;
LLAMA_FN_HOST_ACC_INLINE constexpr ArrayIndexIterator(ArrayExtents extents, value_type current) noexcept
: extents(extents)
, current(current)
{
}
LLAMA_FN_HOST_ACC_INLINE
constexpr auto operator*() const noexcept -> value_type
{
return current;
}
LLAMA_FN_HOST_ACC_INLINE
constexpr auto operator->() const noexcept -> pointer
{
return {**this};
}
LLAMA_FN_HOST_ACC_INLINE
constexpr auto operator++() noexcept -> ArrayIndexIterator&
{
current[rank - 1]++;
for(auto i = static_cast<int>(rank) - 2; i >= 0; i--)
{
if(current[i + 1] != extents[i + 1])
return *this;
current[i + 1] = 0;
current[i]++;
}
return *this;
}
LLAMA_FN_HOST_ACC_INLINE
constexpr auto operator++(int) noexcept -> ArrayIndexIterator
{
auto tmp = *this;
++*this;
return tmp;
}
LLAMA_FN_HOST_ACC_INLINE
constexpr auto operator--() noexcept -> ArrayIndexIterator&
{
current[rank - 1]--;
for(auto i = static_cast<int>(rank) - 2; i >= 0; i--)
{
if(current[i + 1] != std::numeric_limits<std::size_t>::max())
return *this;
current[i + 1] = extents[i] - 1;
current[i]--;
}
// decrementing beyond [0, 0, ..., 0] is UB
return *this;
}
LLAMA_FN_HOST_ACC_INLINE
constexpr auto operator--(int) noexcept -> ArrayIndexIterator
{
auto tmp = *this;
--*this;
return tmp;
}
LLAMA_FN_HOST_ACC_INLINE
constexpr auto operator[](difference_type i) const noexcept -> reference
{
return *(*this + i);
}
LLAMA_FN_HOST_ACC_INLINE
constexpr auto operator+=(difference_type n) noexcept -> ArrayIndexIterator&
{
// add n to all lower dimensions with carry
for(auto i = static_cast<int>(rank) - 1; i > 0 && n != 0; i--)
{
n += static_cast<difference_type>(current[i]);
const auto s = static_cast<difference_type>(extents[i]);
auto mod = n % s;
n /= s;
if(mod < 0)
{
mod += s;
n--;
}
current[i] = mod;
assert(current[i] < extents[i]);
}
current[0] = static_cast<difference_type>(current[0]) + n;
// current is either within bounds or at the end ([last + 1, 0, 0, ..., 0])
assert(
(current[0] < extents[0]
|| (current[0] == extents[0]
&& std::all_of(std::begin(current) + 1, std::end(current), [](auto c) { return c == 0; })))
&& "Iterator was moved past the end");
return *this;
}
LLAMA_FN_HOST_ACC_INLINE
friend constexpr auto operator+(ArrayIndexIterator it, difference_type n) noexcept -> ArrayIndexIterator
{
it += n;
return it;
}
LLAMA_FN_HOST_ACC_INLINE
friend constexpr auto operator+(difference_type n, ArrayIndexIterator it) noexcept -> ArrayIndexIterator
{
return it + n;
}
LLAMA_FN_HOST_ACC_INLINE
constexpr auto operator-=(difference_type n) noexcept -> ArrayIndexIterator&
{
return operator+=(-n);
}
LLAMA_FN_HOST_ACC_INLINE
friend constexpr auto operator-(ArrayIndexIterator it, difference_type n) noexcept -> ArrayIndexIterator
{
it -= n;
return it;
}
LLAMA_FN_HOST_ACC_INLINE
friend constexpr auto operator-(const ArrayIndexIterator& a, const ArrayIndexIterator& b) noexcept
-> difference_type
{
assert(a.extents == b.extents);
difference_type n = a.current[rank - 1] - b.current[rank - 1];
difference_type size = a.extents[rank - 1];
for(auto i = static_cast<int>(rank) - 2; i >= 0; i--)
{
n += (a.current[i] - b.current[i]) * size;
size *= a.extents[i];
}
return n;
}
LLAMA_FN_HOST_ACC_INLINE
friend constexpr auto operator==(
const ArrayIndexIterator<ArrayExtents>& a,
const ArrayIndexIterator<ArrayExtents>& b) noexcept -> bool
{
assert(a.extents == b.extents);
return a.current == b.current;
}
LLAMA_FN_HOST_ACC_INLINE
friend constexpr auto operator!=(
const ArrayIndexIterator<ArrayExtents>& a,
const ArrayIndexIterator<ArrayExtents>& b) noexcept -> bool
{
return !(a == b);
}
LLAMA_FN_HOST_ACC_INLINE
friend constexpr auto operator<(const ArrayIndexIterator& a, const ArrayIndexIterator& b) noexcept -> bool
{
assert(a.extents == b.extents);
return std::lexicographical_compare(
std::begin(a.current),
std::end(a.current),
std::begin(b.current),
std::end(b.current));
}
LLAMA_FN_HOST_ACC_INLINE
friend constexpr auto operator>(const ArrayIndexIterator& a, const ArrayIndexIterator& b) noexcept -> bool
{
return b < a;
}
LLAMA_FN_HOST_ACC_INLINE
friend constexpr auto operator<=(const ArrayIndexIterator& a, const ArrayIndexIterator& b) noexcept -> bool
{
return !(a > b);
}
LLAMA_FN_HOST_ACC_INLINE
friend constexpr auto operator>=(const ArrayIndexIterator& a, const ArrayIndexIterator& b) noexcept -> bool
{
return !(a < b);
}
private:
ArrayExtents extents; // TODO(bgruber): we only need to store rank - 1 sizes
value_type current;
};
/// Range allowing to iterate over all indices in an \ref ArrayExtents.
template<typename ArrayExtents>
struct ArrayIndexRange
: private ArrayExtents
#if CAN_USE_RANGES
, std::ranges::view_base
#endif
{
static_assert(!std::is_const_v<ArrayExtents>);
constexpr ArrayIndexRange() noexcept = default;
LLAMA_FN_HOST_ACC_INLINE
constexpr explicit ArrayIndexRange(ArrayExtents extents) noexcept : ArrayExtents(extents)
{
}
LLAMA_FN_HOST_ACC_INLINE
constexpr auto begin() const noexcept -> ArrayIndexIterator<ArrayExtents>
{
return {*this, typename ArrayExtents::Index{}};
}
LLAMA_FN_HOST_ACC_INLINE
constexpr auto end() const noexcept -> ArrayIndexIterator<ArrayExtents>
{
auto endPos = typename ArrayExtents::Index{};
endPos[0] = this->toArray()[0];
return {*this, endPos};
}
};
} // namespace llama
// ==
// == ./ArrayIndexRange.hpp ==
// ============================================================================
// #include "Core.hpp" // amalgamate: file already expanded
namespace llama
{
namespace internal
{
constexpr auto divRoundUp(std::size_t dividend, std::size_t divisor) -> std::size_t
{
return (dividend + divisor - 1) / divisor;
}
} // namespace internal
// FIXME: this test is actually not correct, because __cpp_constexpr_dynamic_alloc only guarantees constexpr
// std::allocator
#ifdef __cpp_constexpr_dynamic_alloc
namespace internal
{
template<typename T>
struct DynArray
{
constexpr DynArray() = default;
constexpr DynArray(std::size_t n)
{
data = new T[n]{};
}
constexpr ~DynArray()
{
delete[] data;
}
constexpr void resize(std::size_t n)
{
delete[] data;
data = new T[n]{};
}
T* data = nullptr;
};
} // namespace internal
/// Proofs by exhaustion of the array and record dimensions, that all values mapped to memory do not overlap.
// Unfortunately, this only works for smallish array dimensions, because of compiler limits on constexpr evaluation
// depth.
template<typename Mapping>
constexpr auto mapsNonOverlappingly(const Mapping& m) -> bool
{
internal::DynArray<internal::DynArray<std::uint64_t>> blobByteMapped(m.blobCount);
for(std::size_t i = 0; i < m.blobCount; i++)
blobByteMapped.data[i].resize(internal::divRoundUp(m.blobSize(i), 64));
auto testAndSet = [&](auto blob, auto offset) constexpr
{
const auto bit = std::uint64_t{1} << (offset % 64);
if(blobByteMapped.data[blob].data[offset / 64] & bit)
return true;
blobByteMapped.data[blob].data[offset / 64] |= bit;
return false;
};
bool collision = false;
forEachLeafCoord<typename Mapping::RecordDim>([&](auto rc) constexpr
{
if(collision)
return;
for(auto ai : ArrayIndexRange{m.extents()})
{
using Type
= GetType<typename Mapping::RecordDim, decltype(rc)>;
const auto [blob, offset] = m.blobNrAndOffset(ai, rc);
for(std::size_t b = 0; b < sizeof(Type); b++)
if(testAndSet(blob, offset + b))
{
collision = true;
break;
}
}
});
return !collision;
}
#endif
/// Proofs by exhaustion of the array and record dimensions, that at least PieceLength elements are always stored
/// contiguously.
// Unfortunately, this only works for smallish array dimensions, because of compiler limits on constexpr evaluation
// depth.
template<std::size_t PieceLength, typename Mapping>
constexpr auto mapsPiecewiseContiguous(const Mapping& m) -> bool
{
bool collision = false;
forEachLeafCoord<typename Mapping::RecordDim>([&](auto rc) constexpr
{
std::size_t flatIndex = 0;
std::size_t lastBlob
= std::numeric_limits<std::size_t>::max();
std::size_t lastOffset
= std::numeric_limits<std::size_t>::max();
for(auto ai : ArrayIndexRange{m.extents()})
{
using Type
= GetType<typename Mapping::RecordDim, decltype(rc)>;
const auto [blob, offset] = m.blobNrAndOffset(ai, rc);
if(flatIndex % PieceLength != 0
&& (lastBlob != blob
|| lastOffset + sizeof(Type) != offset))
{
collision = true;
break;
}
lastBlob = blob;
lastOffset = offset;
flatIndex++;
}
});
return !collision;
}
} // namespace llama
// ==
// == ./Proofs.hpp ==
// ============================================================================
// ============================================================================
// == ./Vector.hpp ==
// ==
// SPDX-License-Identifier: GPL-3.0-or-later
// #pragma once
// ============================================================================
// == ./View.hpp ==
// ==
// Copyright 2018 Alexander Matthes
// SPDX-License-Identifier: GPL-3.0-or-later
// #pragma once
// #include "Array.hpp" // amalgamate: file already expanded
// #include "ArrayIndexRange.hpp" // amalgamate: file already expanded
// ============================================================================
// == ./BlobAllocators.hpp ==
// ==
// Copyright 2018 Alexander Matthes
// SPDX-License-Identifier: GPL-3.0-or-later
// #pragma once
// #include "Array.hpp" // amalgamate: file already expanded
// ============================================================================
// == ./Concepts.hpp ==
// ==
// #pragma once
// #include "Array.hpp" // amalgamate: file already expanded
// #include "Core.hpp" // amalgamate: file already expanded
// #include <type_traits> // amalgamate: file already included
#if __has_include(<concepts>)
# include <concepts>
#endif
#ifdef __cpp_lib_concepts
namespace llama
{
// clang-format off
template <typename M>
concept Mapping = requires(M m) {
typename M::ArrayExtents;
typename M::ArrayIndex;
typename M::RecordDim;
{ m.extents() } -> std::same_as<typename M::ArrayExtents>;
{ M::blobCount } -> std::convertible_to<std::size_t>;
Array<int, M::blobCount>{}; // validates constexpr-ness
{ m.blobSize(std::size_t{}) } -> std::same_as<std::size_t>;
{ m.blobNrAndOffset(typename M::ArrayIndex{}) } -> std::same_as<NrAndOffset>;
{ m.template blobNrAndOffset<0>(typename M::ArrayIndex{}) } -> std::same_as<NrAndOffset>;
{ m.blobNrAndOffset(typename M::ArrayIndex{}, llama::RecordCoord<0>{}) } -> std::same_as<NrAndOffset>;
};
// clang-format on
template<typename B>
concept Blob = requires(B b, std::size_t i)
{
// according to http://eel.is/c++draft/intro.object#3 only std::byte and unsigned char can provide storage for
// other types
std::is_same_v<decltype(b[i]), std::byte&> || std::is_same_v<decltype(b[i]), unsigned char&>;
};
// clang-format off
template <typename BA>
concept BlobAllocator = requires(BA ba, std::integral_constant<std::size_t, 16> alignment, std::size_t size) {
{ ba(alignment, size) } -> Blob;
};
// clang-format on
} // namespace llama
#endif
// ==
// == ./Concepts.hpp ==
// ============================================================================
// #include "macros.hpp" // amalgamate: file already expanded
#include <cstddef>
#include <memory>
#include <vector>
#if defined(_LIBCPP_VERSION) && _LIBCPP_VERSION < 11000
# include <boost/shared_ptr.hpp>
#endif
namespace llama::bloballoc
{
/// Allocates stack memory for a \ref View, which is copied each time a \ref View is copied.
/// \tparam BytesToReserve the amount of memory to reserve.
template<std::size_t BytesToReserve>
struct Stack
{
template<std::size_t Alignment>
LLAMA_FN_HOST_ACC_INLINE auto operator()(std::integral_constant<std::size_t, Alignment>, std::size_t) const
{
struct alignas(Alignment) AlignedArray : Array<std::byte, BytesToReserve>
{
};
return AlignedArray{};
}
};
#ifdef __cpp_lib_concepts
static_assert(BlobAllocator<Stack<64>>);
#endif
/// Allocates heap memory managed by a `std::shared_ptr` for a \ref View. This memory is shared between all copies
/// of a \ref View.
struct SharedPtr
{
// libc++ below 11.0.0 does not yet support shared_ptr with arrays
template<typename T>
using shared_ptr =
#if defined(_LIBCPP_VERSION) && _LIBCPP_VERSION < 11000
boost::shared_ptr<T>;
#else
std::shared_ptr<T>;
#endif
template<std::size_t Alignment>
auto operator()(std::integral_constant<std::size_t, Alignment>, std::size_t count) const
-> shared_ptr<std::byte[]>
{
auto* ptr
= static_cast<std::byte*>(::operator new[](count * sizeof(std::byte), std::align_val_t{Alignment}));
auto deleter = [=](std::byte* ptr) { ::operator delete[](ptr, std::align_val_t{Alignment}); };
return shared_ptr<std::byte[]>{ptr, deleter};
}
};
#ifdef __cpp_lib_concepts
static_assert(BlobAllocator<SharedPtr>);
#endif
/// An STL compatible allocator allowing to specify alignment.
template<typename T, std::size_t Alignment>
struct AlignedAllocator
{
using value_type = T;
inline AlignedAllocator() noexcept = default;
template<typename T2>
inline explicit AlignedAllocator(AlignedAllocator<T2, Alignment> const&) noexcept
{
}
inline auto allocate(std::size_t n) -> T*
{
return static_cast<T*>(::operator new[](n * sizeof(T), std::align_val_t{Alignment}));
}
inline void deallocate(T* p, std::size_t)
{
::operator delete[](p, std::align_val_t{Alignment});
}
template<typename T2>
struct rebind // NOLINT(readability-identifier-naming)
{
using other = AlignedAllocator<T2, Alignment>;
};
auto operator!=(const AlignedAllocator<T, Alignment>& other) const -> bool
{
return !(*this == other);
}
auto operator==(const AlignedAllocator<T, Alignment>&) const -> bool
{
return true;
}
};
/// Allocates heap memory managed by a `std::vector` for a \ref View, which is copied each time a \ref View is
/// copied.
struct Vector
{
template<std::size_t Alignment>
inline auto operator()(std::integral_constant<std::size_t, Alignment>, std::size_t count) const
{
return std::vector<std::byte, AlignedAllocator<std::byte, Alignment>>(count);
}
};
#ifdef __cpp_lib_concepts
static_assert(BlobAllocator<Vector>);
#endif
} // namespace llama::bloballoc
// ==
// == ./BlobAllocators.hpp ==
// ============================================================================
// #include "Concepts.hpp" // amalgamate: file already expanded
// #include "Core.hpp" // amalgamate: file already expanded
// #include "macros.hpp" // amalgamate: file already expanded
// ============================================================================
// == ./mapping/One.hpp ==
// ==
// Copyright 2018 Alexander Matthes
// SPDX-License-Identifier: GPL-3.0-or-later
// #pragma once
// #include "../Core.hpp" // amalgamate: file already expanded
// ============================================================================
// == ./mapping/Common.hpp ==
// ==
// Copyright 2018 Alexander Matthes
// SPDX-License-Identifier: GPL-3.0-or-later
// #pragma once
// #include "../Core.hpp" // amalgamate: file already expanded
#include <climits>
namespace llama::mapping
{
/// Functor that maps an \ref ArrayIndex into linear numbers the way C++ arrays work. The fast moving index of the
/// ArrayIndex object should be the last one. E.g. ArrayIndex<3> a; stores 3 indices where a[2] should be
/// incremented in the innermost loop.
struct LinearizeArrayDimsCpp
{
template<typename ArrayExtents>
LLAMA_FN_HOST_ACC_INLINE constexpr auto size(const ArrayExtents& extents) -> std::size_t
{
return product(extents);
}
/// \param ai Index in the array dimensions.
/// \param extents Total size of the array dimensions.
/// \return Linearized index.
template<typename ArrayExtents>
LLAMA_FN_HOST_ACC_INLINE constexpr auto operator()(
const typename ArrayExtents::Index& ai,
const ArrayExtents& extents) const -> std::size_t
{
if constexpr(ArrayExtents::rank == 0)
return 0;
else
{
std::size_t address = ai[0];
for(std::size_t i = 1; i < ArrayExtents::rank; i++)
{
address *= extents[i];
address += ai[i];
}
return address;
}
}
};
/// Functor that maps a \ref ArrayIndex into linear numbers the way Fortran arrays work. The fast moving index of
/// the ArrayIndex object should be the last one. E.g. ArrayIndex<3> a; stores 3 indices where a[2] should be
/// incremented in the innermost loop.
struct LinearizeArrayDimsFortran
{
template<typename ArrayExtents>
LLAMA_FN_HOST_ACC_INLINE constexpr auto size(const ArrayExtents& extents) -> std::size_t
{
return product(extents);
}
/// \param ai Index in the array dimensions.
/// \param extents Total size of the array dimensions.
/// \return Linearized index.
template<typename ArrayExtents>
LLAMA_FN_HOST_ACC_INLINE constexpr auto operator()(
const typename ArrayExtents::Index& ai,
const ArrayExtents& extents) const -> std::size_t
{
if constexpr(ArrayExtents::rank == 0)
return 0;
else
{
std::size_t address = ai[ArrayExtents::rank - 1];
for(int i = static_cast<int>(ArrayExtents::rank) - 2; i >= 0; i--)
{
address *= extents[i];
address += ai[i];
}
return address;
}
}
};
/// Functor that maps an \ref ArrayIndex into linear numbers using the Z-order space filling curve (Morton codes).
struct LinearizeArrayDimsMorton
{
template<typename ArrayExtents>
LLAMA_FN_HOST_ACC_INLINE constexpr auto size(const ArrayExtents& extents) const -> std::size_t
{
if constexpr(ArrayExtents::rank == 0)
return 0;
else
{
std::size_t longest = extents[0];
for(std::size_t i = 1; i < ArrayExtents::rank; i++)
longest = std::max(longest, extents[i]);
const auto longestPO2 = bit_ceil(longest);
return intPow(longestPO2, ArrayExtents::rank);
}
}
/// \param ai Coordinate in the array dimensions.
/// \param extents Total size of the array dimensions.
/// \return Linearized index.
template<typename ArrayExtents>
LLAMA_FN_HOST_ACC_INLINE constexpr auto operator()(
const typename ArrayExtents::Index& ai,
[[maybe_unused]] const ArrayExtents& extents) const -> std::size_t
{
std::size_t r = 0;
for(std::size_t bit = 0; bit < (sizeof(std::size_t) * CHAR_BIT) / ArrayExtents::rank; bit++)
for(std::size_t i = 0; i < ArrayExtents::rank; i++)
r |= (ai[i] & (std::size_t{1} << bit)) << ((bit + 1) * (ArrayExtents::rank - 1) - i);
return r;
}
private:
LLAMA_FN_HOST_ACC_INLINE static constexpr auto bit_ceil(std::size_t n) -> std::size_t
{
std::size_t r = 1;
while(r < n)
r <<= 1u;
return r;
}
LLAMA_FN_HOST_ACC_INLINE static constexpr auto intPow(std::size_t b, std::size_t e) -> std::size_t
{
e--;
auto r = b;
while(e != 0u)
{
r *= b;
e--;
}
return r;
}
};
/// Flattens the record dimension in the order fields are written.
template<typename RecordDim>
struct FlattenRecordDimInOrder
{
using FlatRecordDim = llama::FlatRecordDim<RecordDim>;
template<std::size_t... RecordCoords>
static constexpr std::size_t flatIndex = flatRecordCoord<RecordDim, RecordCoord<RecordCoords...>>;
};
/// Flattens the record dimension by sorting the fields according to a given predicate on the field types.
/// @tparam Less A binary predicate accepting two field types, which exposes a member value. Value must be true if
/// the first field type is less than the second one, otherwise false.
template<typename RecordDim, template<typename, typename> typename Less>
struct FlattenRecordDimSorted
{
private:
using FlatOrigRecordDim = llama::FlatRecordDim<RecordDim>;
using FlatSortedRecordDim = boost::mp11::mp_sort<FlatOrigRecordDim, Less>;
template<typename A, typename B>
using LessWithIndices
= Less<boost::mp11::mp_at<FlatOrigRecordDim, A>, boost::mp11::mp_at<FlatOrigRecordDim, B>>;
// A permutation from new FlatSortedRecordDim index to old FlatOrigRecordDim index
using PermutedIndices
= boost::mp11::mp_sort<boost::mp11::mp_iota<boost::mp11::mp_size<FlatOrigRecordDim>>, LessWithIndices>;
template<typename A, typename B>
using LessInvertPermutation = std::bool_constant<(
boost::mp11::mp_at<PermutedIndices, A>::value < boost::mp11::mp_at<PermutedIndices, B>::value)>;
// A permutation from old FlatOrigRecordDim index to new FlatSortedRecordDim index
using InversePermutedIndices = boost::mp11::
mp_sort<boost::mp11::mp_iota<boost::mp11::mp_size<FlatOrigRecordDim>>, LessInvertPermutation>;
public:
using FlatRecordDim = FlatSortedRecordDim;
template<std::size_t... RecordCoords>
static constexpr std::size_t flatIndex = []() constexpr
{
constexpr auto indexBefore = flatRecordCoord<RecordDim, RecordCoord<RecordCoords...>>;
constexpr auto indexAfter = boost::mp11::mp_at_c<InversePermutedIndices, indexBefore>::value;
return indexAfter;
}
();
};
namespace internal
{
template<typename A, typename B>
using LessAlignment = std::bool_constant<alignof(A) < alignof(B)>;
template<typename A, typename B>
using MoreAlignment = std::bool_constant<(alignof(A) > alignof(B))>;
} // namespace internal
/// Flattens and sorts the record dimension by increasing alignment of its fields.
template<typename RecordDim>
using FlattenRecordDimIncreasingAlignment = FlattenRecordDimSorted<RecordDim, internal::LessAlignment>;
/// Flattens and sorts the record dimension by decreasing alignment of its fields.
template<typename RecordDim>
using FlattenRecordDimDecreasingAlignment = FlattenRecordDimSorted<RecordDim, internal::MoreAlignment>;
/// Flattens and sorts the record dimension by the alignment of its fields to minimize padding.
template<typename RecordDim>
using FlattenRecordDimMinimizePadding = FlattenRecordDimIncreasingAlignment<RecordDim>;
} // namespace llama::mapping
// ==
// == ./mapping/Common.hpp ==
// ============================================================================
namespace llama::mapping
{
/// Maps all array dimension indices to the same location and layouts struct members consecutively. This mapping is
/// used for temporary, single element views.
/// \tparam AlignAndPad If true, padding bytes are inserted to guarantee that struct members are properly aligned.
/// If false, struct members are tightly packed.
/// \tparam FlattenRecordDim Defines how the record dimension's fields should be flattened. See \ref
/// FlattenRecordDimInOrder, \ref FlattenRecordDimIncreasingAlignment, \ref FlattenRecordDimDecreasingAlignment and
/// \ref FlattenRecordDimMinimizePadding.
template<
typename TArrayExtents,
typename TRecordDim,
bool AlignAndPad = true,
template<typename> typename FlattenRecordDim = FlattenRecordDimMinimizePadding>
struct One : TArrayExtents
{
using ArrayExtents = TArrayExtents;
using ArrayIndex = typename ArrayExtents::Index;
using RecordDim = TRecordDim;
static constexpr std::size_t blobCount = 1;
constexpr One() = default;
LLAMA_FN_HOST_ACC_INLINE
constexpr explicit One(ArrayExtents extents, RecordDim = {}) : ArrayExtents(extents)
{
}
LLAMA_FN_HOST_ACC_INLINE constexpr auto extents() const -> ArrayExtents
{
return ArrayExtents{*this};
}
LLAMA_FN_HOST_ACC_INLINE constexpr auto blobSize(std::size_t) const -> std::size_t
{
return flatSizeOf<typename Flattener::FlatRecordDim, AlignAndPad, false>; // no tail padding
}
template<std::size_t... RecordCoords>
LLAMA_FN_HOST_ACC_INLINE constexpr auto blobNrAndOffset(ArrayIndex, RecordCoord<RecordCoords...> = {}) const
-> NrAndOffset
{
constexpr std::size_t flatFieldIndex =
#ifdef __NVCC__
*& // mess with nvcc compiler state to workaround bug
#endif
Flattener::template flatIndex<RecordCoords...>;
constexpr auto offset = flatOffsetOf<typename Flattener::FlatRecordDim, flatFieldIndex, AlignAndPad>;
return {0, offset};
}
private:
using Flattener = FlattenRecordDim<TRecordDim>;
};
/// One mapping preserving the alignment of the field types by inserting padding.
/// \see One
template<typename ArrayExtents, typename RecordDim>
using AlignedOne = One<ArrayExtents, RecordDim, true, FlattenRecordDimInOrder>;
/// One mapping preserving the alignment of the field types by inserting padding and permuting the field order to
/// minimize this padding.
/// \see One
template<typename ArrayExtents, typename RecordDim>
using MinAlignedOne = One<ArrayExtents, RecordDim, true, FlattenRecordDimMinimizePadding>;
/// One mapping packing the field types tightly, violating the types' alignment requirements.
/// \see One
template<typename ArrayExtents, typename RecordDim>
using PackedOne = One<ArrayExtents, RecordDim, false, FlattenRecordDimInOrder>;
template<typename Mapping>
inline constexpr bool isOne = false;
template<typename ArrayExtents, typename RecordDim, bool AlignAndPad, template<typename> typename FlattenRecordDim>
inline constexpr bool isOne<One<ArrayExtents, RecordDim, AlignAndPad, FlattenRecordDim>> = true;
} // namespace llama::mapping
// ==
// == ./mapping/One.hpp ==
// ============================================================================
// #include <type_traits> // amalgamate: file already included
namespace llama
{
#ifdef __cpp_lib_concepts
template<typename TMapping, Blob BlobType>
#else
template<typename TMapping, typename BlobType>
#endif
struct View;
namespace internal
{
template<typename Allocator, typename RecordDim>
using AllocatorBlobType
= decltype(std::declval<Allocator>()(std::integral_constant<std::size_t, alignOf<RecordDim>>{}, 0));
LLAMA_SUPPRESS_HOST_DEVICE_WARNING
template<typename Allocator, typename Mapping, std::size_t... Is>
LLAMA_FN_HOST_ACC_INLINE auto makeBlobArray(
const Allocator& alloc,
const Mapping& mapping,
std::integer_sequence<std::size_t, Is...>)
-> Array<AllocatorBlobType<Allocator, typename Mapping::RecordDim>, Mapping::blobCount>
{
[[maybe_unused]] constexpr auto alignment
= alignOf<typename Mapping::RecordDim>; // g++-12 warns that alignment is unsed
return {alloc(std::integral_constant<std::size_t, alignment>{}, mapping.blobSize(Is))...};
}
} // namespace internal
/// Same as \ref allocView but does not run field constructors.
#ifdef __cpp_lib_concepts
template<typename Mapping, BlobAllocator Allocator = bloballoc::Vector>
#else
template<typename Mapping, typename Allocator = bloballoc::Vector>
#endif
LLAMA_FN_HOST_ACC_INLINE auto allocViewUninitialized(Mapping mapping = {}, const Allocator& alloc = {})
-> View<Mapping, internal::AllocatorBlobType<Allocator, typename Mapping::RecordDim>>
{
auto blobs = internal::makeBlobArray(alloc, mapping, std::make_index_sequence<Mapping::blobCount>{});
return {std::move(mapping), std::move(blobs)};
}
namespace internal
{
template<typename Mapping, typename RecordCoord, typename = void>
struct IsComputed : std::false_type
{
};
template<typename Mapping, typename RecordCoord>
struct IsComputed<Mapping, RecordCoord, std::void_t<decltype(Mapping::isComputed(RecordCoord{}))>>
: std::bool_constant<Mapping::isComputed(RecordCoord{})>
{
};
} // namespace internal
/// Returns true if the field accessed via the given mapping and record coordinate is a computed value.
template<typename Mapping, typename RecordCoord>
inline constexpr bool isComputed = internal::IsComputed<Mapping, RecordCoord>::value;
/// Runs the constructor of all fields reachable through the given view. Computed fields are not constructed.
template<typename Mapping, typename BlobType>
LLAMA_FN_HOST_ACC_INLINE void constructFields(View<Mapping, BlobType>& view)
{
using View = View<Mapping, BlobType>;
using RecordDim = typename View::RecordDim;
forEachADCoord(
view.mapping().extents(),
[&](typename View::ArrayIndex ai)
{
if constexpr(isRecord<RecordDim> || internal::IsBoundedArray<RecordDim>::value)
forEachLeafCoord<RecordDim>(
[&](auto rc)
{
// TODO(bgruber): we could initialize computed fields if we can write to those. We could
// test if the returned value can be cast to a T& and then attempt to write.
if constexpr(!isComputed<Mapping, decltype(rc)>)
new(&view(ai)(rc)) GetType<RecordDim, decltype(rc)>;
});
else if constexpr(!isComputed<Mapping, RecordCoord<>>)
new(&view(ai)) RecordDim;
});
}
/// Creates a view based on the given mapping, e.g. \ref AoS or \ref :SoA. For allocating the view's underlying
/// memory, the specified allocator callable is used (or the default one, which is \ref bloballoc::Vector). The
/// allocator callable is called with the alignment and size of bytes to allocate for each blob of the mapping.
/// The constructors are run for all fields by calling \ref constructFields. This function is the preferred way to
/// create a \ref View. See also \ref allocViewUninitialized.
#ifdef __cpp_lib_concepts
template<typename Mapping, BlobAllocator Allocator = bloballoc::Vector>
#else
template<typename Mapping, typename Allocator = bloballoc::Vector>
#endif
LLAMA_FN_HOST_ACC_INLINE auto allocView(Mapping mapping = {}, const Allocator& alloc = {})
-> View<Mapping, internal::AllocatorBlobType<Allocator, typename Mapping::RecordDim>>
{
auto view = allocViewUninitialized(std::move(mapping), alloc);
constructFields(view);
return view;
}
/// Allocates a \ref View holding a single record backed by stack memory (\ref bloballoc::Stack).
/// \tparam Dim Dimension of the \ref ArrayExtents of the \ref View.
template<std::size_t Dim, typename RecordDim>
LLAMA_FN_HOST_ACC_INLINE auto allocViewStack() -> decltype(auto)
{
constexpr auto mapping = mapping::MinAlignedOne<ArrayExtentsStatic<Dim, 1>, RecordDim>{};
return allocView(mapping, bloballoc::Stack<mapping.blobSize(0)>{});
}
template<typename View, typename BoundRecordCoord = RecordCoord<>, bool OwnView = false>
struct VirtualRecord;
/// A \ref VirtualRecord that owns and holds a single value.
template<typename RecordDim>
using One = VirtualRecord<decltype(allocViewStack<0, RecordDim>()), RecordCoord<>, true>;
// TODO(bgruber): Higher dimensional iterators might not have good codegen. Multiple nested loops seem to be
// superior to a single iterator over multiple dimensions. At least compilers are able to produce better code.
// std::mdspan also discovered similar difficulties and there was a discussion in WG21 in Oulu 2016 to
// remove/postpone iterators from the design. In std::mdspan's design, the iterator iterated over the co-domain.
template<typename View>
struct Iterator
{
using ArrayIndexIterator = llama::ArrayIndexIterator<typename View::ArrayExtents>;
using iterator_category = std::random_access_iterator_tag;
using value_type = One<typename View::RecordDim>;
using difference_type = typename ArrayIndexIterator::difference_type;
using pointer = internal::IndirectValue<VirtualRecord<View>>;
using reference = VirtualRecord<View>;
constexpr Iterator() = default;
LLAMA_FN_HOST_ACC_INLINE constexpr Iterator(ArrayIndexIterator arrayIndex, View* view)
: arrayIndex(arrayIndex)
, view(view)
{
}
LLAMA_FN_HOST_ACC_INLINE
constexpr auto operator++() -> Iterator&
{
++arrayIndex;
return *this;
}
LLAMA_FN_HOST_ACC_INLINE
constexpr auto operator++(int) -> Iterator
{
auto tmp = *this;
++*this;
return tmp;
}
LLAMA_FN_HOST_ACC_INLINE
constexpr auto operator--() -> Iterator&
{
--arrayIndex;
return *this;
}
LLAMA_FN_HOST_ACC_INLINE
constexpr auto operator--(int) -> Iterator
{
auto tmp{*this};
--*this;
return tmp;
}
LLAMA_FN_HOST_ACC_INLINE
constexpr auto operator*() const -> reference
{
return (*view)(*arrayIndex);
}
LLAMA_FN_HOST_ACC_INLINE
constexpr auto operator->() const -> pointer
{
return {**this};
}
LLAMA_FN_HOST_ACC_INLINE
constexpr auto operator[](difference_type i) const -> reference
{
return *(*this + i);
}
LLAMA_FN_HOST_ACC_INLINE
constexpr auto operator+=(difference_type n) -> Iterator&
{
arrayIndex += n;
return *this;
}
LLAMA_FN_HOST_ACC_INLINE
friend constexpr auto operator+(Iterator it, difference_type n) -> Iterator
{
it += n;
return it;
}
LLAMA_FN_HOST_ACC_INLINE
friend constexpr auto operator+(difference_type n, Iterator it) -> Iterator
{
return it + n;
}
LLAMA_FN_HOST_ACC_INLINE
constexpr auto operator-=(difference_type n) -> Iterator&
{
arrayIndex -= n;
return *this;
}
LLAMA_FN_HOST_ACC_INLINE
friend constexpr auto operator-(Iterator it, difference_type n) -> Iterator
{
it -= n;
return it;
}
LLAMA_FN_HOST_ACC_INLINE
friend constexpr auto operator-(const Iterator& a, const Iterator& b) -> difference_type
{
assert(a.view == b.view);
return static_cast<std::ptrdiff_t>(a.arrayIndex - b.arrayIndex);
}
LLAMA_FN_HOST_ACC_INLINE
friend constexpr auto operator==(const Iterator& a, const Iterator& b) -> bool
{
assert(a.view == b.view);
return a.arrayIndex == b.arrayIndex;
}
LLAMA_FN_HOST_ACC_INLINE
friend constexpr auto operator!=(const Iterator& a, const Iterator& b) -> bool
{
return !(a == b);
}
LLAMA_FN_HOST_ACC_INLINE
friend constexpr auto operator<(const Iterator& a, const Iterator& b) -> bool
{
assert(a.view == b.view);
return a.arrayIndex < b.arrayIndex;
}
LLAMA_FN_HOST_ACC_INLINE
friend constexpr auto operator>(const Iterator& a, const Iterator& b) -> bool
{
return b < a;
}
LLAMA_FN_HOST_ACC_INLINE
friend constexpr auto operator<=(const Iterator& a, const Iterator& b) -> bool
{
return !(a > b);
}
LLAMA_FN_HOST_ACC_INLINE
friend constexpr auto operator>=(const Iterator& a, const Iterator& b) -> bool
{
return !(a < b);
}
ArrayIndexIterator arrayIndex;
View* view;
};
/// Central LLAMA class holding memory for storage and giving access to values stored there defined by a mapping. A
/// view should be created using \ref allocView.
/// \tparam TMapping The mapping used by the view to map accesses into memory.
/// \tparam BlobType The storage type used by the view holding memory.
#ifdef __cpp_lib_concepts
template<typename TMapping, Blob BlobType>
#else
template<typename TMapping, typename BlobType>
#endif
struct View
: private TMapping
#if CAN_USE_RANGES
, std::ranges::view_base
#endif
{
static_assert(!std::is_const_v<TMapping>);
using Mapping = TMapping;
using ArrayExtents = typename Mapping::ArrayExtents;
using ArrayIndex = typename Mapping::ArrayIndex;
using RecordDim = typename Mapping::RecordDim;
using iterator = Iterator<View>;
using const_iterator = Iterator<const View>;
static_assert(
std::is_same_v<Mapping, std::decay_t<Mapping>>,
"Mapping must not be const qualified or a reference. Are you using decltype(...) as View template "
"argument?");
static_assert(
std::is_same_v<ArrayExtents, std::decay_t<ArrayExtents>>,
"Mapping::ArrayExtents must not be const qualified or a reference. Are you using decltype(...) as mapping "
"template argument?");
View() = default;
LLAMA_FN_HOST_ACC_INLINE
View(Mapping mapping, Array<BlobType, Mapping::blobCount> storageBlobs)
: Mapping(std::move(mapping))
, storageBlobs(std::move(storageBlobs))
{
}
LLAMA_FN_HOST_ACC_INLINE auto mapping() -> Mapping&
{
return static_cast<Mapping&>(*this);
}
LLAMA_FN_HOST_ACC_INLINE auto mapping() const -> const Mapping&
{
return static_cast<const Mapping&>(*this);
}
/// Retrieves the \ref VirtualRecord at the given \ref ArrayIndex index.
LLAMA_FN_HOST_ACC_INLINE auto operator()(ArrayIndex ai) const -> decltype(auto)
{
if constexpr(isRecord<RecordDim> || internal::IsBoundedArray<RecordDim>::value)
{
LLAMA_FORCE_INLINE_RECURSIVE
return VirtualRecord<const View>{ai, *this};
}
else
{
LLAMA_FORCE_INLINE_RECURSIVE
return accessor(ai, RecordCoord<>{});
}
}
LLAMA_FN_HOST_ACC_INLINE auto operator()(ArrayIndex ai) -> decltype(auto)
{
if constexpr(isRecord<RecordDim> || internal::IsBoundedArray<RecordDim>::value)
{
LLAMA_FORCE_INLINE_RECURSIVE
return VirtualRecord<View>{ai, *this};
}
else
{
LLAMA_FORCE_INLINE_RECURSIVE
return accessor(ai, RecordCoord<>{});
}
}
/// Retrieves the \ref VirtualRecord at the \ref ArrayIndex index constructed from the passed component
/// indices.
template<typename... Indices>
LLAMA_FN_HOST_ACC_INLINE auto operator()(Indices... indices) const -> decltype(auto)
{
static_assert(
sizeof...(Indices) == ArrayIndex::rank,
"Please specify as many indices as you have array dimensions");
static_assert(
std::conjunction_v<std::is_convertible<Indices, std::size_t>...>,
"Indices must be convertible to std::size_t");
LLAMA_FORCE_INLINE_RECURSIVE
return (*this)(ArrayIndex{static_cast<typename ArrayIndex::value_type>(indices)...});
}
template<typename... Indices>
LLAMA_FN_HOST_ACC_INLINE auto operator()(Indices... indices) -> decltype(auto)
{
static_assert(
sizeof...(Indices) == ArrayIndex::rank,
"Please specify as many indices as you have array dimensions");
static_assert(
std::conjunction_v<std::is_convertible<Indices, std::size_t>...>,
"Indices must be convertible to std::size_t");
LLAMA_FORCE_INLINE_RECURSIVE
return (*this)(ArrayIndex{static_cast<typename ArrayIndex::value_type>(indices)...});
}
/// Retrieves the \ref VirtualRecord at the \ref ArrayIndex index constructed from the passed component
/// indices.
LLAMA_FN_HOST_ACC_INLINE auto operator[](ArrayIndex ai) const -> decltype(auto)
{
LLAMA_FORCE_INLINE_RECURSIVE
return (*this)(ai);
}
LLAMA_FN_HOST_ACC_INLINE auto operator[](ArrayIndex ai) -> decltype(auto)
{
LLAMA_FORCE_INLINE_RECURSIVE
return (*this)(ai);
}
/// Retrieves the \ref VirtualRecord at the 1D \ref ArrayIndex index constructed from the passed index.
LLAMA_FN_HOST_ACC_INLINE auto operator[](std::size_t index) const -> decltype(auto)
{
LLAMA_FORCE_INLINE_RECURSIVE
return (*this)(index);
}
LLAMA_FN_HOST_ACC_INLINE auto operator[](std::size_t index) -> decltype(auto)
{
LLAMA_FORCE_INLINE_RECURSIVE
return (*this)(index);
}
LLAMA_FN_HOST_ACC_INLINE
auto begin() -> iterator
{
return {ArrayIndexRange<ArrayExtents>{mapping().extents()}.begin(), this};
}
LLAMA_FN_HOST_ACC_INLINE
auto begin() const -> const_iterator
{
return {ArrayIndexRange<ArrayExtents>{mapping().extents()}.begin(), this};
}
LLAMA_FN_HOST_ACC_INLINE
auto end() -> iterator
{
return {ArrayIndexRange<ArrayExtents>{mapping().extents()}.end(), this};
}
LLAMA_FN_HOST_ACC_INLINE
auto end() const -> const_iterator
{
return {ArrayIndexRange<ArrayExtents>{mapping().extents()}.end(), this};
}
Array<BlobType, Mapping::blobCount> storageBlobs;
private:
template<typename TView, typename TBoundRecordCoord, bool OwnView>
friend struct VirtualRecord;
LLAMA_SUPPRESS_HOST_DEVICE_WARNING
template<std::size_t... Coords>
LLAMA_FN_HOST_ACC_INLINE auto accessor(ArrayIndex ai, RecordCoord<Coords...> rc = {}) const -> decltype(auto)
{
if constexpr(llama::isComputed<Mapping, RecordCoord<Coords...>>)
return mapping().compute(ai, rc, storageBlobs);
else
{
const auto [nr, offset] = mapping().blobNrAndOffset(ai, rc);
using Type = GetType<RecordDim, RecordCoord<Coords...>>;
return reinterpret_cast<const Type&>(storageBlobs[nr][offset]);
}
}
LLAMA_SUPPRESS_HOST_DEVICE_WARNING
template<std::size_t... Coords>
LLAMA_FN_HOST_ACC_INLINE auto accessor(ArrayIndex ai, RecordCoord<Coords...> rc = {}) -> decltype(auto)
{
if constexpr(llama::isComputed<Mapping, RecordCoord<Coords...>>)
return mapping().compute(ai, rc, storageBlobs);
else
{
const auto [nr, offset] = mapping().blobNrAndOffset(ai, rc);
using Type = GetType<RecordDim, RecordCoord<Coords...>>;
using QualifiedType = std::conditional_t<
std::is_const_v<std::remove_reference_t<decltype(storageBlobs[nr][offset])>>,
const Type,
Type>;
return reinterpret_cast<QualifiedType&>(storageBlobs[nr][offset]);
}
}
};
template<typename View>
inline constexpr auto IsView = false;
template<typename Mapping, typename BlobType>
inline constexpr auto IsView<View<Mapping, BlobType>> = true;
/// Acts like a \ref View, but shows only a smaller and/or shifted part of another view it references, the parent
/// view.
template<typename TParentView>
struct VirtualView
{
using ParentView = TParentView; ///< type of the parent view
using Mapping = typename ParentView::Mapping; ///< mapping of the parent view
using ArrayExtents = typename Mapping::ArrayExtents; ///< array extents of the parent view
using ArrayIndex = typename Mapping::ArrayIndex; ///< array index of the parent view
/// Creates a VirtualView given a parent \ref View and offset.
LLAMA_FN_HOST_ACC_INLINE
VirtualView(ParentView& parentView, ArrayIndex offset) : parentView(parentView), offset(offset)
{
}
template<std::size_t... Coords>
LLAMA_FN_HOST_ACC_INLINE auto accessor(ArrayIndex ai) const -> const auto&
{
return parentView.template accessor<Coords...>(ArrayIndex{ai + offset});
}
template<std::size_t... Coords>
LLAMA_FN_HOST_ACC_INLINE auto accessor(ArrayIndex ai) -> auto&
{
return parentView.template accessor<Coords...>(ArrayIndex{ai + offset});
}
/// Same as \ref View::operator()(ArrayIndex), but shifted by the offset of this \ref VirtualView.
LLAMA_FN_HOST_ACC_INLINE auto operator()(ArrayIndex ai) const -> decltype(auto)
{
LLAMA_FORCE_INLINE_RECURSIVE
return parentView(ArrayIndex{ai + offset});
}
LLAMA_FN_HOST_ACC_INLINE auto operator()(ArrayIndex ai) -> decltype(auto)
{
LLAMA_FORCE_INLINE_RECURSIVE
return parentView(ArrayIndex{ai + offset});
}
/// Same as corresponding operator in \ref View, but shifted by the offset of this \ref VirtualView.
template<typename... Indices>
LLAMA_FN_HOST_ACC_INLINE auto operator()(Indices... indices) const -> decltype(auto)
{
static_assert(
sizeof...(Indices) == ArrayIndex::rank,
"Please specify as many indices as you have array dimensions");
static_assert(
std::conjunction_v<std::is_convertible<Indices, std::size_t>...>,
"Indices must be convertible to std::size_t");
LLAMA_FORCE_INLINE_RECURSIVE
return parentView(
ArrayIndex{ArrayIndex{static_cast<typename ArrayIndex::value_type>(indices)...} + offset});
}
template<typename... Indices>
LLAMA_FN_HOST_ACC_INLINE auto operator()(Indices... indices) -> decltype(auto)
{
static_assert(
sizeof...(Indices) == ArrayIndex::rank,
"Please specify as many indices as you have array dimensions");
static_assert(
std::conjunction_v<std::is_convertible<Indices, std::size_t>...>,
"Indices must be convertible to std::size_t");
LLAMA_FORCE_INLINE_RECURSIVE
return parentView(
ArrayIndex{ArrayIndex{static_cast<typename ArrayIndex::value_type>(indices)...} + offset});
}
template<std::size_t... Coord>
LLAMA_FN_HOST_ACC_INLINE auto operator()(RecordCoord<Coord...> = {}) const -> decltype(auto)
{
LLAMA_FORCE_INLINE_RECURSIVE
return accessor<Coord...>(ArrayIndex{});
}
template<std::size_t... Coord>
LLAMA_FN_HOST_ACC_INLINE auto operator()(RecordCoord<Coord...> = {}) -> decltype(auto)
{
LLAMA_FORCE_INLINE_RECURSIVE
return accessor<Coord...>(ArrayIndex{});
}
ParentView& parentView; ///< reference to parent view.
const ArrayIndex
offset; ///< offset this view's \ref ArrayIndex indices are shifted when passed to the parent view.
};
} // namespace llama
// ==
// == ./View.hpp ==
// ============================================================================
// ============================================================================
// == ./VirtualRecord.hpp ==
// ==
// Copyright 2018 Alexander Matthes
// SPDX-License-Identifier: GPL-3.0-or-later
// #pragma once
// #include "View.hpp" // amalgamate: file already expanded
#include <iosfwd>
// #include <type_traits> // amalgamate: file already included
namespace llama
{
template<typename View, typename BoundRecordCoord, bool OwnView>
struct VirtualRecord;
template<typename View>
inline constexpr auto is_VirtualRecord = false;
template<typename View, typename BoundRecordCoord, bool OwnView>
inline constexpr auto is_VirtualRecord<VirtualRecord<View, BoundRecordCoord, OwnView>> = true;
/// Creates a single \ref VirtualRecord owning a view with stack memory and copies all values from an existing \ref
/// VirtualRecord.
template<typename VirtualRecord>
LLAMA_FN_HOST_ACC_INLINE auto copyVirtualRecordStack(const VirtualRecord& vd) -> decltype(auto)
{
One<typename VirtualRecord::AccessibleRecordDim> temp;
temp = vd;
return temp;
}
namespace internal
{
template<
typename Functor,
typename LeftRecord,
typename RightView,
typename RightBoundRecordDim,
bool RightOwnView>
LLAMA_FN_HOST_ACC_INLINE auto virtualRecordArithOperator(
LeftRecord& left,
const VirtualRecord<RightView, RightBoundRecordDim, RightOwnView>& right) -> LeftRecord&
{
using RightRecord = VirtualRecord<RightView, RightBoundRecordDim, RightOwnView>;
// if the record dimension left and right is the same, a single loop is enough and no tag check is needed.
// this safes a lot of compilation time.
if constexpr(std::is_same_v<
typename LeftRecord::AccessibleRecordDim,
typename RightRecord::AccessibleRecordDim>)
{
forEachLeafCoord<typename LeftRecord::AccessibleRecordDim>([&](auto rc) LLAMA_LAMBDA_INLINE
{ Functor{}(left(rc), right(rc)); });
}
else
{
forEachLeafCoord<typename LeftRecord::AccessibleRecordDim>(
[&](auto leftRC) LLAMA_LAMBDA_INLINE
{
using LeftInnerCoord = decltype(leftRC);
forEachLeafCoord<typename RightRecord::AccessibleRecordDim>(
[&](auto rightRC) LLAMA_LAMBDA_INLINE
{
using RightInnerCoord = decltype(rightRC);
if constexpr(hasSameTags<
typename LeftRecord::AccessibleRecordDim,
LeftInnerCoord,
typename RightRecord::AccessibleRecordDim,
RightInnerCoord>)
{
Functor{}(left(leftRC), right(rightRC));
}
});
});
}
return left;
}
template<typename Functor, typename LeftRecord, typename T>
LLAMA_FN_HOST_ACC_INLINE auto virtualRecordArithOperator(LeftRecord& left, const T& right) -> LeftRecord&
{
forEachLeafCoord<typename LeftRecord::AccessibleRecordDim>([&](auto leftRC) LLAMA_LAMBDA_INLINE
{ Functor{}(left(leftRC), right); });
return left;
}
template<
typename Functor,
typename LeftRecord,
typename RightView,
typename RightBoundRecordDim,
bool RightOwnView>
LLAMA_FN_HOST_ACC_INLINE auto virtualRecordRelOperator(
const LeftRecord& left,
const VirtualRecord<RightView, RightBoundRecordDim, RightOwnView>& right) -> bool
{
using RightRecord = VirtualRecord<RightView, RightBoundRecordDim, RightOwnView>;
bool result = true;
// if the record dimension left and right is the same, a single loop is enough and no tag check is needed.
// this safes a lot of compilation time.
if constexpr(std::is_same_v<
typename LeftRecord::AccessibleRecordDim,
typename RightRecord::AccessibleRecordDim>)
{
forEachLeafCoord<typename LeftRecord::AccessibleRecordDim>(
[&](auto rc) LLAMA_LAMBDA_INLINE { result &= Functor{}(left(rc), right(rc)); });
}
else
{
forEachLeafCoord<typename LeftRecord::AccessibleRecordDim>(
[&](auto leftRC) LLAMA_LAMBDA_INLINE
{
using LeftInnerCoord = decltype(leftRC);
forEachLeafCoord<typename RightRecord::AccessibleRecordDim>(
[&](auto rightRC) LLAMA_LAMBDA_INLINE
{
using RightInnerCoord = decltype(rightRC);
if constexpr(hasSameTags<
typename LeftRecord::AccessibleRecordDim,
LeftInnerCoord,
typename RightRecord::AccessibleRecordDim,
RightInnerCoord>)
{
result &= Functor{}(left(leftRC), right(rightRC));
}
});
});
}
return result;
}
template<typename Functor, typename LeftRecord, typename T>
LLAMA_FN_HOST_ACC_INLINE auto virtualRecordRelOperator(const LeftRecord& left, const T& right) -> bool
{
bool result = true;
forEachLeafCoord<typename LeftRecord::AccessibleRecordDim>(
[&](auto leftRC) LLAMA_LAMBDA_INLINE {
result &= Functor{}(
left(leftRC),
static_cast<std::remove_reference_t<decltype(left(leftRC))>>(right));
});
return result;
}
struct Assign
{
template<typename A, typename B>
LLAMA_FN_HOST_ACC_INLINE auto operator()(A&& a, const B& b) const -> decltype(auto)
{
return std::forward<A>(a) = b;
}
};
struct PlusAssign
{
template<typename A, typename B>
LLAMA_FN_HOST_ACC_INLINE auto operator()(A&& a, const B& b) const -> decltype(auto)
{
return std::forward<A>(a) += b;
}
};
struct MinusAssign
{
template<typename A, typename B>
LLAMA_FN_HOST_ACC_INLINE auto operator()(A&& a, const B& b) const -> decltype(auto)
{
return std::forward<A>(a) -= b;
}
};
struct MultiplyAssign
{
template<typename A, typename B>
LLAMA_FN_HOST_ACC_INLINE auto operator()(A&& a, const B& b) const -> decltype(auto)
{
return std::forward<A>(a) *= b;
}
};
struct DivideAssign
{
template<typename A, typename B>
LLAMA_FN_HOST_ACC_INLINE auto operator()(A&& a, const B& b) const -> decltype(auto)
{
return std::forward<A>(a) /= b;
}
};
struct ModuloAssign
{
template<typename A, typename B>
LLAMA_FN_HOST_ACC_INLINE auto operator()(A&& a, const B& b) const -> decltype(auto)
{
return std::forward<A>(a) %= b;
}
};
template<typename TWithOptionalConst, typename T>
LLAMA_FN_HOST_ACC_INLINE auto asTupleImpl(TWithOptionalConst& leaf, T) -> std::enable_if_t<
!is_VirtualRecord<std::decay_t<TWithOptionalConst>>,
std::reference_wrapper<TWithOptionalConst>>
{
return leaf;
}
template<typename VirtualRecord, typename T, std::size_t N, std::size_t... Is>
LLAMA_FN_HOST_ACC_INLINE auto asTupleImplArr(VirtualRecord&& vd, T(&&)[N], std::index_sequence<Is...>)
{
return std::make_tuple(asTupleImpl(vd(RecordCoord<Is>{}), T{})...);
}
template<typename VirtualRecord, typename T, std::size_t N>
LLAMA_FN_HOST_ACC_INLINE auto asTupleImpl(VirtualRecord&& vd, T(&&a)[N])
{
return asTupleImplArr(std::forward<VirtualRecord>(vd), std::move(a), std::make_index_sequence<N>{});
}
template<typename VirtualRecord, typename... Fields>
LLAMA_FN_HOST_ACC_INLINE auto asTupleImpl(VirtualRecord&& vd, Record<Fields...>)
{
return std::make_tuple(asTupleImpl(vd(GetFieldTag<Fields>{}), GetFieldType<Fields>{})...);
}
template<typename TWithOptionalConst, typename T>
LLAMA_FN_HOST_ACC_INLINE auto asFlatTupleImpl(TWithOptionalConst& leaf, T)
-> std::enable_if_t<!is_VirtualRecord<std::decay_t<TWithOptionalConst>>, std::tuple<TWithOptionalConst&>>
{
return {leaf};
}
template<typename VirtualRecord, typename T, std::size_t N, std::size_t... Is>
LLAMA_FN_HOST_ACC_INLINE auto asFlatTupleImplArr(VirtualRecord&& vd, T(&&)[N], std::index_sequence<Is...>)
{
return std::tuple_cat(asFlatTupleImpl(vd(RecordCoord<Is>{}), T{})...);
}
template<typename VirtualRecord, typename T, std::size_t N>
LLAMA_FN_HOST_ACC_INLINE auto asFlatTupleImpl(VirtualRecord&& vd, T(&&a)[N])
{
return asFlatTupleImplArr(std::forward<VirtualRecord>(vd), std::move(a), std::make_index_sequence<N>{});
}
template<typename VirtualRecord, typename... Fields>
LLAMA_FN_HOST_ACC_INLINE auto asFlatTupleImpl(VirtualRecord&& vd, Record<Fields...>)
{
return std::tuple_cat(asFlatTupleImpl(vd(GetFieldTag<Fields>{}), GetFieldType<Fields>{})...);
}
template<typename T, typename = void>
constexpr inline auto isTupleLike = false;
// get<I>(t) and std::tuple_size<T> must be available
using std::get; // make sure a get<0>() can be found, so the compiler can compile the trait
template<typename T>
constexpr inline auto
isTupleLike<T, std::void_t<decltype(get<0>(std::declval<T>())), std::tuple_size<T>>> = true;
template<typename... Ts>
constexpr inline auto dependentFalse = false;
template<typename Tuple1, typename Tuple2, std::size_t... Is>
LLAMA_FN_HOST_ACC_INLINE void assignTuples(Tuple1&& dst, Tuple2&& src, std::index_sequence<Is...>);
template<typename T1, typename T2>
LLAMA_FN_HOST_ACC_INLINE void assignTupleElement(T1&& dst, T2&& src)
{
if constexpr(isTupleLike<std::decay_t<T1>> && isTupleLike<std::decay_t<T2>>)
{
static_assert(std::tuple_size_v<std::decay_t<T1>> == std::tuple_size_v<std::decay_t<T2>>);
assignTuples(dst, src, std::make_index_sequence<std::tuple_size_v<std::decay_t<T1>>>{});
}
else if constexpr(!isTupleLike<std::decay_t<T1>> && !isTupleLike<std::decay_t<T2>>)
std::forward<T1>(dst) = std::forward<T2>(src);
else
static_assert(
dependentFalse<T1, T2>,
"Elements to assign are not tuple/tuple or non-tuple/non-tuple.");
}
template<typename Tuple1, typename Tuple2, std::size_t... Is>
LLAMA_FN_HOST_ACC_INLINE void assignTuples(Tuple1&& dst, Tuple2&& src, std::index_sequence<Is...>)
{
static_assert(std::tuple_size_v<std::decay_t<Tuple1>> == std::tuple_size_v<std::decay_t<Tuple2>>);
using std::get;
(assignTupleElement(get<Is>(std::forward<Tuple1>(dst)), get<Is>(std::forward<Tuple2>(src))), ...);
}
template<typename T, typename Tuple, std::size_t... Is>
LLAMA_FN_HOST_ACC_INLINE auto makeFromTuple(Tuple&& src, std::index_sequence<Is...>)
{
using std::get;
return T{get<Is>(std::forward<Tuple>(src))...};
}
template<typename T, typename SFINAE, typename... Args>
constexpr inline auto isDirectListInitializableImpl = false;
template<typename T, typename... Args>
constexpr inline auto
isDirectListInitializableImpl<T, std::void_t<decltype(T{std::declval<Args>()...})>, Args...> = true;
template<typename T, typename... Args>
constexpr inline auto isDirectListInitializable = isDirectListInitializableImpl<T, void, Args...>;
template<typename T, typename Tuple>
constexpr inline auto isDirectListInitializableFromTuple = false;
template<typename T, template<typename...> typename Tuple, typename... Args>
constexpr inline auto
isDirectListInitializableFromTuple<T, Tuple<Args...>> = isDirectListInitializable<T, Args...>;
} // namespace internal
/// Virtual record type returned by \ref View after resolving an array dimensions coordinate or partially resolving
/// a \ref RecordCoord. A virtual record does not hold data itself (thus named "virtual"), it just binds enough
/// information (array dimensions coord and partial record coord) to retrieve it from a \ref View later. Virtual
/// records should not be created by the user. They are returned from various access functions in \ref View and
/// VirtualRecord itself.
template<typename TView, typename TBoundRecordCoord, bool OwnView>
struct VirtualRecord : private TView::Mapping::ArrayIndex
{
using View = TView; ///< View this virtual record points into.
using BoundRecordCoord
= TBoundRecordCoord; ///< Record coords into View::RecordDim which are already bound by this VirtualRecord.
private:
using ArrayIndex = typename View::Mapping::ArrayIndex;
using RecordDim = typename View::Mapping::RecordDim;
std::conditional_t<OwnView, View, View&> view;
public:
/// Subtree of the record dimension of View starting at BoundRecordCoord. If BoundRecordCoord is
/// `RecordCoord<>` (default) AccessibleRecordDim is the same as `Mapping::RecordDim`.
using AccessibleRecordDim = GetType<RecordDim, BoundRecordCoord>;
/// Creates an empty VirtualRecord. Only available for if the view is owned. Used by llama::One.
LLAMA_FN_HOST_ACC_INLINE VirtualRecord()
/* requires(OwnView) */
: ArrayIndex{}
, view{allocViewStack<0, RecordDim>()}
{
static_assert(OwnView, "The default constructor of VirtualRecord is only available if it owns the view.");
}
LLAMA_FN_HOST_ACC_INLINE
VirtualRecord(ArrayIndex ai, std::conditional_t<OwnView, View&&, View&> view)
: ArrayIndex{ai}
, view{static_cast<decltype(view)>(view)}
{
}
VirtualRecord(const VirtualRecord&) = default;
// NOLINTNEXTLINE(cert-oop54-cpp)
LLAMA_FN_HOST_ACC_INLINE auto operator=(const VirtualRecord& other) -> VirtualRecord&
{
// NOLINTNEXTLINE(cppcoreguidelines-c-copy-assignment-signature,misc-unconventional-assign-operator)
return this->operator=<VirtualRecord>(other);
}
VirtualRecord(VirtualRecord&&) noexcept = default;
auto operator=(VirtualRecord&&) noexcept -> VirtualRecord& = default;
~VirtualRecord() = default;
LLAMA_FN_HOST_ACC_INLINE constexpr auto arrayIndex() const -> ArrayIndex
{
return *this;
}
/// Create a VirtuaRecord from a different VirtualRecord. Only available for if the view is owned. Used by
/// llama::One.
template<typename OtherView, typename OtherBoundRecordCoord, bool OtherOwnView>
// NOLINTNEXTLINE(google-explicit-constructor,hicpp-explicit-conversions)
LLAMA_FN_HOST_ACC_INLINE VirtualRecord(
const VirtualRecord<OtherView, OtherBoundRecordCoord, OtherOwnView>& virtualRecord)
/* requires(OwnView) */
: VirtualRecord()
{
static_assert(
OwnView,
"The copy constructor of VirtualRecord from a different VirtualRecord is only available if it owns "
"the "
"view.");
*this = virtualRecord;
}
// TODO(bgruber): unify with previous in C++20 and use explicit(cond)
/// Create a VirtuaRecord from a scalar. Only available for if the view is owned. Used by llama::One.
template<typename T, typename = std::enable_if_t<!is_VirtualRecord<T>>>
LLAMA_FN_HOST_ACC_INLINE explicit VirtualRecord(const T& scalar)
/* requires(OwnView) */
: VirtualRecord()
{
static_assert(
OwnView,
"The constructor of VirtualRecord from a scalar is only available if it owns the view.");
*this = scalar;
}
/// Access a record in the record dimension underneath the current virtual record using a \ref RecordCoord. If
/// the access resolves to a leaf, a reference to a variable inside the \ref View storage is returned,
/// otherwise another virtual record.
template<std::size_t... Coord>
LLAMA_FN_HOST_ACC_INLINE auto operator()(RecordCoord<Coord...> = {}) const -> decltype(auto)
{
using AbsolutCoord = Cat<BoundRecordCoord, RecordCoord<Coord...>>;
using AccessedType = GetType<RecordDim, AbsolutCoord>;
if constexpr(isRecord<AccessedType> || internal::IsBoundedArray<AccessedType>::value)
{
LLAMA_FORCE_INLINE_RECURSIVE
return VirtualRecord<const View, AbsolutCoord>{arrayIndex(), this->view};
}
else
{
LLAMA_FORCE_INLINE_RECURSIVE
return this->view.accessor(arrayIndex(), AbsolutCoord{});
}
}
// FIXME(bgruber): remove redundancy
template<std::size_t... Coord>
LLAMA_FN_HOST_ACC_INLINE auto operator()(RecordCoord<Coord...> = {}) -> decltype(auto)
{
using AbsolutCoord = Cat<BoundRecordCoord, RecordCoord<Coord...>>;
using AccessedType = GetType<RecordDim, AbsolutCoord>;
if constexpr(isRecord<AccessedType> || internal::IsBoundedArray<AccessedType>::value)
{
LLAMA_FORCE_INLINE_RECURSIVE
return VirtualRecord<View, AbsolutCoord>{arrayIndex(), this->view};
}
else
{
LLAMA_FORCE_INLINE_RECURSIVE
return this->view.accessor(arrayIndex(), AbsolutCoord{});
}
}
/// Access a record in the record dimension underneath the current virtual record using a series of tags. If
/// the access resolves to a leaf, a reference to a variable inside the \ref View storage is returned,
/// otherwise another virtual record.
template<typename... Tags>
LLAMA_FN_HOST_ACC_INLINE auto operator()(Tags...) const -> decltype(auto)
{
using RecordCoord = GetCoordFromTags<AccessibleRecordDim, Tags...>;
LLAMA_FORCE_INLINE_RECURSIVE
return operator()(RecordCoord{});
}
// FIXME(bgruber): remove redundancy
template<typename... Tags>
LLAMA_FN_HOST_ACC_INLINE auto operator()(Tags...) -> decltype(auto)
{
using RecordCoord = GetCoordFromTags<AccessibleRecordDim, Tags...>;
LLAMA_FORCE_INLINE_RECURSIVE
return operator()(RecordCoord{});
}
template<typename T>
LLAMA_FN_HOST_ACC_INLINE auto operator=(const T& other) -> VirtualRecord&
{
// NOLINTNEXTLINE(cppcoreguidelines-c-copy-assignment-signature,misc-unconventional-assign-operator)
return internal::virtualRecordArithOperator<internal::Assign>(*this, other);
}
template<typename T>
LLAMA_FN_HOST_ACC_INLINE auto operator+=(const T& other) -> VirtualRecord&
{
return internal::virtualRecordArithOperator<internal::PlusAssign>(*this, other);
}
template<typename T>
LLAMA_FN_HOST_ACC_INLINE auto operator-=(const T& other) -> VirtualRecord&
{
return internal::virtualRecordArithOperator<internal::MinusAssign>(*this, other);
}
template<typename T>
LLAMA_FN_HOST_ACC_INLINE auto operator*=(const T& other) -> VirtualRecord&
{
return internal::virtualRecordArithOperator<internal::MultiplyAssign>(*this, other);
}
template<typename T>
LLAMA_FN_HOST_ACC_INLINE auto operator/=(const T& other) -> VirtualRecord&
{
return internal::virtualRecordArithOperator<internal::DivideAssign>(*this, other);
}
template<typename T>
LLAMA_FN_HOST_ACC_INLINE auto operator%=(const T& other) -> VirtualRecord&
{
return internal::virtualRecordArithOperator<internal::ModuloAssign>(*this, other);
}
template<typename T>
LLAMA_FN_HOST_ACC_INLINE friend auto operator+(const VirtualRecord& vd, const T& t)
{
return copyVirtualRecordStack(vd) += t;
}
template<typename T, typename = std::enable_if_t<!is_VirtualRecord<T>>>
LLAMA_FN_HOST_ACC_INLINE friend auto operator+(const T& t, const VirtualRecord& vd)
{
return vd + t;
}
template<typename T>
LLAMA_FN_HOST_ACC_INLINE friend auto operator-(const VirtualRecord& vd, const T& t)
{
return copyVirtualRecordStack(vd) -= t;
}
template<typename T>
LLAMA_FN_HOST_ACC_INLINE friend auto operator*(const VirtualRecord& vd, const T& t)
{
return copyVirtualRecordStack(vd) *= t;
}
template<typename T, typename = std::enable_if_t<!is_VirtualRecord<T>>>
LLAMA_FN_HOST_ACC_INLINE friend auto operator*(const T& t, const VirtualRecord& vd)
{
return vd * t;
}
template<typename T>
LLAMA_FN_HOST_ACC_INLINE friend auto operator/(const VirtualRecord& vd, const T& t)
{
return copyVirtualRecordStack(vd) /= t;
}
template<typename T>
LLAMA_FN_HOST_ACC_INLINE friend auto operator%(const VirtualRecord& vd, const T& t)
{
return copyVirtualRecordStack(vd) %= t;
}
template<typename T>
LLAMA_FN_HOST_ACC_INLINE friend auto operator==(const VirtualRecord& vd, const T& t) -> bool
{
return internal::virtualRecordRelOperator<std::equal_to<>>(vd, t);
}
template<typename T, typename = std::enable_if_t<!is_VirtualRecord<T>>>
LLAMA_FN_HOST_ACC_INLINE friend auto operator==(const T& t, const VirtualRecord& vd) -> bool
{
return vd == t;
}
template<typename T>
LLAMA_FN_HOST_ACC_INLINE friend auto operator!=(const VirtualRecord& vd, const T& t) -> bool
{
return internal::virtualRecordRelOperator<std::not_equal_to<>>(vd, t);
}
template<typename T, typename = std::enable_if_t<!is_VirtualRecord<T>>>
LLAMA_FN_HOST_ACC_INLINE friend auto operator!=(const T& t, const VirtualRecord& vd) -> bool
{
return vd != t;
}
template<typename T>
LLAMA_FN_HOST_ACC_INLINE friend auto operator<(const VirtualRecord& vd, const T& t) -> bool
{
return internal::virtualRecordRelOperator<std::less<>>(vd, t);
}
template<typename T, typename = std::enable_if_t<!is_VirtualRecord<T>>>
LLAMA_FN_HOST_ACC_INLINE friend auto operator<(const T& t, const VirtualRecord& vd) -> bool
{
return vd > t;
}
template<typename T>
LLAMA_FN_HOST_ACC_INLINE friend auto operator<=(const VirtualRecord& vd, const T& t) -> bool
{
return internal::virtualRecordRelOperator<std::less_equal<>>(vd, t);
}
template<typename T, typename = std::enable_if_t<!is_VirtualRecord<T>>>
LLAMA_FN_HOST_ACC_INLINE friend auto operator<=(const T& t, const VirtualRecord& vd) -> bool
{
return vd >= t;
}
template<typename T>
LLAMA_FN_HOST_ACC_INLINE friend auto operator>(const VirtualRecord& vd, const T& t) -> bool
{
return internal::virtualRecordRelOperator<std::greater<>>(vd, t);
}
template<typename T, typename = std::enable_if_t<!is_VirtualRecord<T>>>
LLAMA_FN_HOST_ACC_INLINE friend auto operator>(const T& t, const VirtualRecord& vd) -> bool
{
return vd < t;
}
template<typename T>
LLAMA_FN_HOST_ACC_INLINE friend auto operator>=(const VirtualRecord& vd, const T& t) -> bool
{
return internal::virtualRecordRelOperator<std::greater_equal<>>(vd, t);
}
template<typename T, typename = std::enable_if_t<!is_VirtualRecord<T>>>
LLAMA_FN_HOST_ACC_INLINE friend auto operator>=(const T& t, const VirtualRecord& vd) -> bool
{
return vd <= t;
}
LLAMA_FN_HOST_ACC_INLINE auto asTuple()
{
return internal::asTupleImpl(*this, AccessibleRecordDim{});
}
LLAMA_FN_HOST_ACC_INLINE auto asTuple() const
{
return internal::asTupleImpl(*this, AccessibleRecordDim{});
}
LLAMA_FN_HOST_ACC_INLINE auto asFlatTuple()
{
return internal::asFlatTupleImpl(*this, AccessibleRecordDim{});
}
LLAMA_FN_HOST_ACC_INLINE auto asFlatTuple() const
{
return internal::asFlatTupleImpl(*this, AccessibleRecordDim{});
}
template<std::size_t I>
LLAMA_FN_HOST_ACC_INLINE auto get() -> decltype(auto)
{
return operator()(RecordCoord<I>{});
}
template<std::size_t I>
LLAMA_FN_HOST_ACC_INLINE auto get() const -> decltype(auto)
{
return operator()(RecordCoord<I>{});
}
template<typename TupleLike>
LLAMA_FN_HOST_ACC_INLINE auto loadAs() -> TupleLike
{
static_assert(
internal::isDirectListInitializableFromTuple<TupleLike, decltype(asFlatTuple())>,
"TupleLike must be constructible from as many values as this VirtualRecord recursively represents "
"like "
"this: TupleLike{values...}");
return internal::makeFromTuple<TupleLike>(
asFlatTuple(),
std::make_index_sequence<std::tuple_size_v<decltype(asFlatTuple())>>{});
}
template<typename TupleLike>
LLAMA_FN_HOST_ACC_INLINE auto loadAs() const -> TupleLike
{
static_assert(
internal::isDirectListInitializableFromTuple<TupleLike, decltype(asFlatTuple())>,
"TupleLike must be constructible from as many values as this VirtualRecord recursively represents "
"like "
"this: TupleLike{values...}");
return internal::makeFromTuple<TupleLike>(
asFlatTuple(),
std::make_index_sequence<std::tuple_size_v<decltype(asFlatTuple())>>{});
}
struct Loader
{
VirtualRecord& vd;
template<typename T>
// NOLINTNEXTLINE(google-explicit-constructor,hicpp-explicit-conversions)
LLAMA_FN_HOST_ACC_INLINE operator T()
{
return vd.loadAs<T>();
}
};
struct LoaderConst
{
const VirtualRecord& vd;
template<typename T>
// NOLINTNEXTLINE(google-explicit-constructor,hicpp-explicit-conversions)
LLAMA_FN_HOST_ACC_INLINE operator T() const
{
return vd.loadAs<T>();
}
};
LLAMA_FN_HOST_ACC_INLINE auto load() -> Loader
{
return {*this};
}
LLAMA_FN_HOST_ACC_INLINE auto load() const -> LoaderConst
{
return {*this};
}
template<typename TupleLike>
LLAMA_FN_HOST_ACC_INLINE void store(const TupleLike& t)
{
internal::assignTuples(asTuple(), t, std::make_index_sequence<std::tuple_size_v<TupleLike>>{});
}
// swap for equal VirtualRecord
LLAMA_FN_HOST_ACC_INLINE friend void swap(
std::conditional_t<OwnView, VirtualRecord&, VirtualRecord> a,
std::conditional_t<OwnView, VirtualRecord&, VirtualRecord> b) noexcept
{
forEachLeafCoord<AccessibleRecordDim>(
[&](auto rc) LLAMA_LAMBDA_INLINE
{
using std::swap;
swap(a(rc), b(rc));
});
}
};
// swap for heterogeneous VirtualRecord
template<
typename ViewA,
typename BoundRecordDimA,
bool OwnViewA,
typename ViewB,
typename BoundRecordDimB,
bool OwnViewB>
LLAMA_FN_HOST_ACC_INLINE auto swap(
VirtualRecord<ViewA, BoundRecordDimA, OwnViewA>& a,
VirtualRecord<ViewB, BoundRecordDimB, OwnViewB>& b) noexcept
-> std::enable_if_t<std::is_same_v<
typename VirtualRecord<ViewA, BoundRecordDimA, OwnViewA>::AccessibleRecordDim,
typename VirtualRecord<ViewB, BoundRecordDimB, OwnViewB>::AccessibleRecordDim>>
{
using LeftRecord = VirtualRecord<ViewA, BoundRecordDimA, OwnViewA>;
forEachLeafCoord<typename LeftRecord::AccessibleRecordDim>(
[&](auto rc) LLAMA_LAMBDA_INLINE
{
using std::swap;
swap(a(rc), b(rc));
});
}
template<typename View, typename BoundRecordCoord, bool OwnView>
auto operator<<(std::ostream& os, const VirtualRecord<View, BoundRecordCoord, OwnView>& vr) -> std::ostream&
{
using RecordDim = typename VirtualRecord<View, BoundRecordCoord, OwnView>::AccessibleRecordDim;
os << "{";
// TODO(bgruber): I tried refactoring both branches into one, but MSVC and icpc have troubles with correctly
// discarding the discarded if constexpr branch and not instantiating templates inside them.
if constexpr(std::is_array_v<RecordDim>)
{
constexpr auto size = std::extent_v<RecordDim>;
boost::mp11::mp_for_each<boost::mp11::mp_iota_c<size>>(
[&](auto ic)
{
constexpr std::size_t i = decltype(ic)::value;
os << '[' << i << ']' << ": " << vr(RecordCoord<i>{});
if(i + 1 < size)
os << ", ";
});
}
else
{
constexpr auto size = boost::mp11::mp_size<RecordDim>::value;
boost::mp11::mp_for_each<boost::mp11::mp_iota_c<size>>(
[&](auto ic)
{
constexpr std::size_t i = decltype(ic)::value;
using Field = boost::mp11::mp_at_c<RecordDim, i>;
using Tag = GetFieldTag<Field>;
os << structName<Tag>() << ": " << vr(RecordCoord<i>{});
if(i + 1 < size)
os << ", ";
});
}
os << "}";
return os;
}
template<typename VirtualRecordFwd, typename Functor>
LLAMA_FN_HOST_ACC_INLINE constexpr void forEachLeaf(VirtualRecordFwd&& vr, Functor&& functor)
{
using VirtualRecord = std::remove_reference_t<VirtualRecordFwd>;
LLAMA_FORCE_INLINE_RECURSIVE
forEachLeafCoord<typename VirtualRecord::AccessibleRecordDim>(
[functor = std::forward<Functor>(functor), &vr = vr](auto rc)
LLAMA_LAMBDA_INLINE_WITH_SPECIFIERS(constexpr mutable) { std::forward<Functor>(functor)(vr(rc)); });
}
} // namespace llama
template<typename View, typename BoundRecordCoord, bool OwnView>
struct std::tuple_size<llama::VirtualRecord<View, BoundRecordCoord, OwnView>>
: boost::mp11::mp_size<typename llama::VirtualRecord<View, BoundRecordCoord, OwnView>::AccessibleRecordDim>
{
};
template<std::size_t I, typename View, typename BoundRecordCoord, bool OwnView>
struct std::tuple_element<I, llama::VirtualRecord<View, BoundRecordCoord, OwnView>>
{
using type = decltype(std::declval<llama::VirtualRecord<View, BoundRecordCoord, OwnView>>().template get<I>());
};
template<std::size_t I, typename View, typename BoundRecordCoord, bool OwnView>
struct std::tuple_element<I, const llama::VirtualRecord<View, BoundRecordCoord, OwnView>>
{
using type
= decltype(std::declval<const llama::VirtualRecord<View, BoundRecordCoord, OwnView>>().template get<I>());
};
#if CAN_USE_RANGES
template<
typename ViewA,
typename BoundA,
bool OwnA,
typename ViewB,
typename BoundB,
bool OwnB,
template<class>
class TQual,
template<class>
class UQual>
struct std::basic_common_reference<
llama::VirtualRecord<ViewA, BoundA, OwnA>,
llama::VirtualRecord<ViewB, BoundB, OwnB>,
TQual,
UQual>
{
using type = std::enable_if_t<
std::is_same_v<
typename llama::VirtualRecord<ViewA, BoundA, OwnA>::AccessibleRecordDim,
typename llama::VirtualRecord<ViewB, BoundB, OwnB>::AccessibleRecordDim>,
llama::One<typename ViewA::RecordDim>>;
};
#endif
// ==
// == ./VirtualRecord.hpp ==
// ============================================================================
// #include <algorithm> // amalgamate: file already included
#include <stdexcept>
// #include <string> // amalgamate: file already included
namespace llama
{
// TODO(bgruber): expose blob allocator
/// An equivalent of std::vector<T> backed by a \ref View. Elements are never value initialized though. No strong
/// exception guarantee.
/// WARNING: This class is experimental.
/// @tparam Mapping The mapping to be used for the underlying view. Needs to have 1 array dimension.
template<typename Mapping>
struct Vector
{
static_assert(Mapping::ArrayExtents::rank == 1, "llama::Vector only supports 1D mappings");
using ViewType = decltype(allocViewUninitialized<Mapping>());
using RecordDim = typename Mapping::RecordDim;
using iterator = decltype(std::declval<ViewType>().begin());
using value_type = typename iterator::value_type;
Vector() = default;
template<typename VirtualRecord = One<RecordDim>>
LLAMA_FN_HOST_ACC_INLINE explicit Vector(std::size_t count, const VirtualRecord& value = {})
{
reserve(count);
for(std::size_t i = 0; i < count; i++)
push_back(value);
}
template<typename Iterator>
LLAMA_FN_HOST_ACC_INLINE Vector(Iterator first, Iterator last)
{
if constexpr(std::is_same_v<
typename std::iterator_traits<Iterator>::iterator_category,
std::random_access_iterator_tag>)
reserve(std::distance(first, last));
for(; first != last; ++first)
push_back(*first);
}
Vector(const Vector& other) = default;
LLAMA_FN_HOST_ACC_INLINE Vector(Vector&& other) noexcept
{
swap(other);
}
auto operator=(const Vector& other) -> Vector& = default;
LLAMA_FN_HOST_ACC_INLINE auto operator=(Vector&& other) noexcept -> Vector&
{
swap(other);
return *this;
}
~Vector() = default;
// TODO(bgruber): assign
LLAMA_FN_HOST_ACC_INLINE auto at(std::size_t i) -> decltype(auto)
{
if(i >= m_size)
throw std::out_of_range{
"Index " + std::to_string(i) + "out of range [0:" + std::to_string(m_size) + "["};
return m_view(i);
}
LLAMA_FN_HOST_ACC_INLINE auto at(std::size_t i) const -> decltype(auto)
{
if(i >= m_size)
throw std::out_of_range{
"Index " + std::to_string(i) + "out of range [0:" + std::to_string(m_size) + "["};
return m_view(i);
}
LLAMA_FN_HOST_ACC_INLINE auto operator[](std::size_t i) -> decltype(auto)
{
return m_view(i);
}
LLAMA_FN_HOST_ACC_INLINE auto operator[](std::size_t i) const -> decltype(auto)
{
return m_view(i);
}
LLAMA_FN_HOST_ACC_INLINE auto front() -> decltype(auto)
{
return m_view(0);
}
LLAMA_FN_HOST_ACC_INLINE auto front() const -> decltype(auto)
{
return m_view(0);
}
LLAMA_FN_HOST_ACC_INLINE auto back() -> decltype(auto)
{
return m_view(m_size - 1);
}
LLAMA_FN_HOST_ACC_INLINE auto back() const -> decltype(auto)
{
return m_view(m_size - 1);
}
LLAMA_FN_HOST_ACC_INLINE auto begin() -> decltype(auto)
{
return m_view.begin();
}
LLAMA_FN_HOST_ACC_INLINE auto begin() const -> decltype(auto)
{
return m_view.begin();
}
LLAMA_FN_HOST_ACC_INLINE auto cbegin() -> decltype(auto)
{
return std::as_const(m_view).begin();
}
LLAMA_FN_HOST_ACC_INLINE auto cbegin() const -> decltype(auto)
{
return m_view.begin();
}
LLAMA_FN_HOST_ACC_INLINE auto end() -> decltype(auto)
{
return m_view.begin() + m_size;
}
LLAMA_FN_HOST_ACC_INLINE auto end() const -> decltype(auto)
{
return m_view.begin() + m_size;
}
LLAMA_FN_HOST_ACC_INLINE auto cend() -> decltype(auto)
{
return std::as_const(m_view).begin() + m_size;
}
LLAMA_FN_HOST_ACC_INLINE auto cend() const -> decltype(auto)
{
return m_view.begin() + m_size;
}
LLAMA_FN_HOST_ACC_INLINE auto empty() const -> bool
{
return m_size == 0;
}
LLAMA_FN_HOST_ACC_INLINE auto size() const -> std::size_t
{
return m_size;
}
LLAMA_FN_HOST_ACC_INLINE void reserve(std::size_t cap)
{
if(cap > capacity())
changeCapacity(cap);
}
LLAMA_FN_HOST_ACC_INLINE auto capacity() const -> std::size_t
{
return m_view.mapping().extents()[0];
}
LLAMA_FN_HOST_ACC_INLINE void shrink_to_fit()
{
changeCapacity(m_size);
}
LLAMA_FN_HOST_ACC_INLINE void clear()
{
m_size = 0;
}
template<typename T>
LLAMA_FN_HOST_ACC_INLINE auto insert(iterator pos, T&& t) -> iterator
{
const auto i = pos - begin();
reserve(m_size + 1); // might invalidate pos
pos = begin() + i;
std::copy_backward(pos, end(), end() + 1);
m_view[i] = std::forward<T>(t);
m_size++;
return pos;
}
// TODO(bgruber): more insert overloads
// TODO(bgruber): emplace
LLAMA_FN_HOST_ACC_INLINE auto erase(iterator pos) -> iterator
{
std::copy(pos + 1, end(), pos);
m_size--;
return pos;
}
// TODO(bgruber): more erase overloads
// TODO(bgruber): T here is probably a virtual record. We could also allow any struct that is storable to the
// view via VirtualRecord::store().
template<typename T>
LLAMA_FN_HOST_ACC_INLINE void push_back(T&& t)
{
if(const auto cap = capacity(); m_size == cap)
reserve(std::max(cap + cap / 2, m_size + 1));
m_view[m_size++] = std::forward<T>(t);
}
// TODO(bgruber): emplace_back
LLAMA_FN_HOST_ACC_INLINE void pop_back()
{
m_size--;
}
template<typename VirtualRecord = One<RecordDim>>
LLAMA_FN_HOST_ACC_INLINE void resize(std::size_t count, const VirtualRecord& value = {})
{
reserve(count);
for(std::size_t i = m_size; i < count; i++)
m_view[i] = value;
m_size = count;
}
LLAMA_FN_HOST_ACC_INLINE friend auto operator==(const Vector& a, const Vector& b) -> bool
{
if(a.m_size != b.m_size)
return false;
return std::equal(a.begin(), a.end(), b.begin());
}
LLAMA_FN_HOST_ACC_INLINE friend auto operator!=(const Vector& a, const Vector& b) -> bool
{
return !(a == b);
}
LLAMA_FN_HOST_ACC_INLINE friend auto operator<(const Vector& a, const Vector& b) -> bool
{
return std::lexicographical_compare(a.begin(), a.end(), b.begin(), b.end());
}
LLAMA_FN_HOST_ACC_INLINE friend auto operator<=(const Vector& a, const Vector& b) -> bool
{
return !(b < a);
}
LLAMA_FN_HOST_ACC_INLINE friend auto operator>(const Vector& a, const Vector& b) -> bool
{
return b < a;
}
LLAMA_FN_HOST_ACC_INLINE friend auto operator>=(const Vector& a, const Vector& b) -> bool
{
return !(a < b);
}
LLAMA_FN_HOST_ACC_INLINE friend void swap(Vector& a, Vector& b) noexcept
{
a.swap(b);
}
private:
LLAMA_FN_HOST_ACC_INLINE void changeCapacity(std::size_t cap)
{
auto newView = allocViewUninitialized<Mapping>(Mapping{typename Mapping::ArrayExtents{cap}});
auto b = begin();
std::copy(begin(), b + std::min(m_size, cap), newView.begin());
using std::swap;
swap(m_view, newView); // depends on move semantic of View
}
LLAMA_FN_HOST_ACC_INLINE void swap(Vector& other) noexcept
{
using std::swap;
swap(m_view, other.m_view); // depends on move semantic of View
swap(m_size, other.m_size);
}
ViewType m_view = {};
std::size_t m_size = 0;
};
} // namespace llama
// ==
// == ./Vector.hpp ==
// ============================================================================
// ============================================================================
// == ./Copy.hpp ==
// ==
// SPDX-License-Identifier: GPL-3.0-or-later
// #pragma once
// #include "View.hpp" // amalgamate: file already expanded
// ============================================================================
// == ./mapping/AoSoA.hpp ==
// ==
// SPDX-License-Identifier: GPL-3.0-or-later
// #pragma once
// #include "Common.hpp" // amalgamate: file already expanded
// #include <limits> // amalgamate: file already included
namespace llama::mapping
{
/// The maximum number of vector lanes that can be used to fetch each leaf type in the record dimension into a
/// vector register of the given size in bits.
template<typename RecordDim, std::size_t VectorRegisterBits>
inline constexpr std::size_t maxLanes = []() constexpr
{
auto max = std::numeric_limits<std::size_t>::max();
forEachLeafCoord<RecordDim>(
[&](auto rc)
{
using AttributeType = GetType<RecordDim, decltype(rc)>;
max = std::min(max, VectorRegisterBits / (sizeof(AttributeType) * CHAR_BIT));
});
return max;
}
();
/// Array of struct of arrays mapping. Used to create a \ref View via \ref allocView.
/// \tparam Lanes The size of the inner arrays of this array of struct of arrays.
/// \tparam FlattenRecordDim Defines how the record dimension's fields should be flattened. See \ref
/// FlattenRecordDimInOrder, \ref FlattenRecordDimIncreasingAlignment, \ref FlattenRecordDimDecreasingAlignment and
/// \ref FlattenRecordDimMinimizePadding.
template<
typename TArrayExtents,
typename TRecordDim,
std::size_t Lanes,
typename TLinearizeArrayDimsFunctor = LinearizeArrayDimsCpp,
template<typename> typename FlattenRecordDim = FlattenRecordDimInOrder>
struct AoSoA : private TArrayExtents
{
using ArrayExtents = TArrayExtents;
using ArrayIndex = typename ArrayExtents::Index;
using RecordDim = TRecordDim;
using LinearizeArrayDimsFunctor = TLinearizeArrayDimsFunctor;
static constexpr std::size_t blobCount = 1;
constexpr AoSoA() = default;
LLAMA_FN_HOST_ACC_INLINE constexpr explicit AoSoA(ArrayExtents extents, RecordDim = {}) : ArrayExtents(extents)
{
}
LLAMA_FN_HOST_ACC_INLINE constexpr auto extents() const -> ArrayExtents
{
return ArrayExtents{*this};
}
LLAMA_FN_HOST_ACC_INLINE constexpr auto blobSize(std::size_t) const -> std::size_t
{
return roundUpToMultiple(
LinearizeArrayDimsFunctor{}.size(extents()) * sizeOf<RecordDim>,
Lanes * sizeOf<RecordDim>);
}
template<std::size_t... RecordCoords>
LLAMA_FN_HOST_ACC_INLINE constexpr auto blobNrAndOffset(ArrayIndex ai, RecordCoord<RecordCoords...> = {}) const
-> NrAndOffset
{
constexpr std::size_t flatFieldIndex =
#ifdef __NVCC__
*& // mess with nvcc compiler state to workaround bug
#endif
Flattener::template flatIndex<RecordCoords...>;
const auto flatArrayIndex = LinearizeArrayDimsFunctor{}(ai, extents());
const auto blockIndex = flatArrayIndex / Lanes;
const auto laneIndex = flatArrayIndex % Lanes;
const auto offset = (sizeOf<RecordDim> * Lanes) * blockIndex
+ flatOffsetOf<typename Flattener::FlatRecordDim, flatFieldIndex, false> * Lanes
+ sizeof(GetType<RecordDim, RecordCoord<RecordCoords...>>) * laneIndex;
return {0, offset};
}
private:
using Flattener = FlattenRecordDim<TRecordDim>;
};
template<std::size_t Lanes, typename LinearizeArrayDimsFunctor = LinearizeArrayDimsCpp>
struct PreconfiguredAoSoA
{
template<typename ArrayExtents, typename RecordDim>
using type = AoSoA<ArrayExtents, RecordDim, Lanes, LinearizeArrayDimsFunctor>;
};
template<typename Mapping>
inline constexpr bool isAoSoA = false;
template<typename AD, typename RD, std::size_t L>
inline constexpr bool isAoSoA<AoSoA<AD, RD, L>> = true;
} // namespace llama::mapping
// ==
// == ./mapping/AoSoA.hpp ==
// ============================================================================
// ============================================================================
// == ./mapping/SoA.hpp ==
// ==
// Copyright 2018 Alexander Matthes
// SPDX-License-Identifier: GPL-3.0-or-later
// #pragma once
// #include "Common.hpp" // amalgamate: file already expanded
// #include <limits> // amalgamate: file already included
namespace llama::mapping
{
/// Struct of array mapping. Used to create a \ref View via \ref allocView.
/// \tparam SeparateBuffers If true, every element of the record dimension is mapped to its own buffer.
/// \tparam LinearizeArrayDimsFunctor Defines how the array dimensions should be mapped into linear numbers and
/// how big the linear domain gets.
/// \tparam FlattenRecordDim Defines how the record dimension's fields should be flattened if SeparateBuffers is
/// false. See \ref FlattenRecordDimInOrder, \ref FlattenRecordDimIncreasingAlignment, \ref
/// FlattenRecordDimDecreasingAlignment and \ref FlattenRecordDimMinimizePadding.
template<
typename TArrayExtents,
typename TRecordDim,
bool SeparateBuffers = true,
typename TLinearizeArrayDimsFunctor = LinearizeArrayDimsCpp,
template<typename> typename FlattenRecordDimSingleBlob = FlattenRecordDimInOrder>
struct SoA : private TArrayExtents
{
using ArrayExtents = TArrayExtents;
using ArrayIndex = typename ArrayExtents::Index;
using RecordDim = TRecordDim;
using LinearizeArrayDimsFunctor = TLinearizeArrayDimsFunctor;
static constexpr std::size_t blobCount
= SeparateBuffers ? boost::mp11::mp_size<FlatRecordDim<RecordDim>>::value : 1;
constexpr SoA() = default;
LLAMA_FN_HOST_ACC_INLINE
constexpr explicit SoA(ArrayExtents extents, RecordDim = {}) : ArrayExtents(extents)
{
}
LLAMA_FN_HOST_ACC_INLINE constexpr auto extents() const -> ArrayExtents
{
return ArrayExtents{*this};
}
LLAMA_FN_HOST_ACC_INLINE
constexpr auto blobSize([[maybe_unused]] std::size_t blobIndex) const -> std::size_t
{
if constexpr(SeparateBuffers)
{
constexpr Array<std::size_t, blobCount> typeSizes = []() constexpr
{
Array<std::size_t, blobCount> r{};
forEachLeafCoord<RecordDim>([&r, i = 0](auto rc) mutable constexpr
{ r[i++] = sizeof(GetType<RecordDim, decltype(rc)>); });
return r;
}
();
return LinearizeArrayDimsFunctor{}.size(extents()) * typeSizes[blobIndex];
}
else
{
return LinearizeArrayDimsFunctor{}.size(extents()) * sizeOf<RecordDim>;
}
}
template<std::size_t... RecordCoords>
LLAMA_FN_HOST_ACC_INLINE constexpr auto blobNrAndOffset(ArrayIndex ad, RecordCoord<RecordCoords...> = {}) const
-> NrAndOffset
{
if constexpr(SeparateBuffers)
{
constexpr auto blob = flatRecordCoord<RecordDim, RecordCoord<RecordCoords...>>;
const auto offset = LinearizeArrayDimsFunctor{}(ad, extents())
* sizeof(GetType<RecordDim, RecordCoord<RecordCoords...>>);
return {blob, offset};
}
else
{
constexpr std::size_t flatFieldIndex =
#ifdef __NVCC__
*& // mess with nvcc compiler state to workaround bug
#endif
Flattener::template flatIndex<RecordCoords...>;
const auto offset = LinearizeArrayDimsFunctor{}(ad, extents())
* sizeof(GetType<RecordDim, RecordCoord<RecordCoords...>>)
+ flatOffsetOf<
typename Flattener::FlatRecordDim,
flatFieldIndex,
false> * LinearizeArrayDimsFunctor{}.size(extents());
return {0, offset};
}
}
private:
using Flattener = FlattenRecordDimSingleBlob<TRecordDim>;
};
/// Struct of array mapping storing the entire layout in a single blob.
/// \see SoA
template<typename ArrayExtents, typename RecordDim, typename LinearizeArrayDimsFunctor = LinearizeArrayDimsCpp>
using SingleBlobSoA = SoA<ArrayExtents, RecordDim, false, LinearizeArrayDimsFunctor>;
/// Struct of array mapping storing each attribute of the record dimension in a separate blob.
/// \see SoA
template<typename ArrayExtents, typename RecordDim, typename LinearizeArrayDimsFunctor = LinearizeArrayDimsCpp>
using MultiBlobSoA = SoA<ArrayExtents, RecordDim, true, LinearizeArrayDimsFunctor>;
template<bool SeparateBuffers = true, typename LinearizeArrayDimsFunctor = LinearizeArrayDimsCpp>
struct PreconfiguredSoA
{
template<typename ArrayExtents, typename RecordDim>
using type = SoA<ArrayExtents, RecordDim, SeparateBuffers, LinearizeArrayDimsFunctor>;
};
template<typename Mapping>
inline constexpr bool isSoA = false;
template<typename ArrayExtents, typename RecordDim, bool SeparateBuffers, typename LinearizeArrayDimsFunctor>
inline constexpr bool isSoA<SoA<ArrayExtents, RecordDim, SeparateBuffers, LinearizeArrayDimsFunctor>> = true;
} // namespace llama::mapping
// ==
// == ./mapping/SoA.hpp ==
// ============================================================================
#include <cstring>
#include <numeric>
namespace llama
{
namespace internal
{
template<typename RecordDim>
void assertTrivialCopyable()
{
forEachLeafCoord<RecordDim>(
[](auto rc)
{
static_assert(
std::is_trivially_copyable_v<GetType<RecordDim, decltype(rc)>>,
"All types in the record dimension must be trivially copyable");
});
}
using memcopyFunc = void* (*) (void*, const void*, std::size_t);
inline void parallel_memcpy(
std::byte* dst,
const std::byte* src,
std::size_t size,
std::size_t threadId = 0,
std::size_t threadCount = 1,
memcopyFunc singleThreadMemcpy = std::memcpy)
{
const auto sizePerThread = size / threadCount;
const auto sizeLastThread = sizePerThread + size % threadCount;
const auto sizeThisThread = threadId == threadCount - 1 ? sizeLastThread : sizePerThread;
singleThreadMemcpy(dst + threadId * sizePerThread, src + threadId * sizePerThread, sizeThisThread);
}
} // namespace internal
/// Direct memcpy from source view blobs to destination view blobs. Both views need to have the same mappings with
/// the same array dimensions.
/// @param threadId Optional. Zero-based id of calling thread for multi-threaded invocations.
/// @param threadCount Optional. Thread count in case of multi-threaded invocation.
template<typename Mapping, typename SrcBlob, typename DstBlob>
void blobMemcpy(
const View<Mapping, SrcBlob>& srcView,
View<Mapping, DstBlob>& dstView,
std::size_t threadId = 0,
std::size_t threadCount = 1)
{
internal::assertTrivialCopyable<typename Mapping::RecordDim>();
// TODO(bgruber): we do not verify if the mappings have other runtime state than the array dimensions
if(srcView.mapping().extents() != dstView.mapping().extents())
throw std::runtime_error{"Array dimensions sizes are different"};
// TODO(bgruber): this is maybe not the best parallel copying strategy
for(std::size_t i = 0; i < Mapping::blobCount; i++)
internal::parallel_memcpy(
&dstView.storageBlobs[i][0],
&srcView.storageBlobs[i][0],
dstView.mapping().blobSize(i),
threadId,
threadCount);
}
/// Field-wise copy from source to destination view. Both views need to have the same array and record dimensions.
/// @param threadId Optional. Thread id in case of multi-threaded copy.
/// @param threadCount Optional. Thread count in case of multi-threaded copy.
template<typename SrcMapping, typename SrcBlob, typename DstMapping, typename DstBlob>
void fieldWiseCopy(
const View<SrcMapping, SrcBlob>& srcView,
View<DstMapping, DstBlob>& dstView,
std::size_t threadId = 0,
std::size_t threadCount = 1)
{
// TODO(bgruber): think if we can remove this restriction
static_assert(
std::is_same_v<typename SrcMapping::RecordDim, typename DstMapping::RecordDim>,
"The source and destination record dimensions must be the same");
if(srcView.mapping().extents() != dstView.mapping().extents())
throw std::runtime_error{"Array dimensions sizes are different"};
auto copyOne = [&](auto ai) LLAMA_LAMBDA_INLINE
{
forEachLeafCoord<typename DstMapping::RecordDim>([&](auto rc) LLAMA_LAMBDA_INLINE
{ dstView(ai)(rc) = srcView(ai)(rc); });
};
constexpr auto dims = SrcMapping::ArrayExtents::rank;
const auto extents = srcView.mapping().extents().toArray();
const auto workPerThread = (extents[0] + threadCount - 1) / threadCount;
const auto start = threadId * workPerThread;
const auto end = std::min((threadId + 1) * workPerThread, extents[0]);
for(auto i = start; i < end; i++)
{
if constexpr(dims > 1)
forEachADCoord(ArrayIndex<dims - 1>{pop_front(extents)}, copyOne, static_cast<std::size_t>(i));
else
copyOne(ArrayIndex<dims>{static_cast<std::size_t>(i)});
}
}
namespace internal
{
template<typename Mapping>
inline constexpr std::size_t aosoaLanes = 0;
template<typename ArrayExtents, typename RecordDim, bool SeparateBuffers, typename LinearizeArrayDimsFunctor>
inline constexpr std::size_t aosoaLanes<
mapping::SoA<ArrayExtents, RecordDim, SeparateBuffers, LinearizeArrayDimsFunctor>> = std::
numeric_limits<std::size_t>::max();
template<typename ArrayExtents, typename RecordDim, std::size_t Lanes, typename LinearizeArrayDimsFunctor>
inline constexpr std::size_t
aosoaLanes<mapping::AoSoA<ArrayExtents, RecordDim, Lanes, LinearizeArrayDimsFunctor>> = Lanes;
} // namespace internal
/// AoSoA copy strategy which transfers data in common blocks. SoA mappings are also allowed for at most 1
/// argument.
/// @param threadId Optional. Zero-based id of calling thread for multi-threaded invocations.
/// @param threadCount Optional. Thread count in case of multi-threaded invocation.
template<typename SrcMapping, typename SrcBlob, typename DstMapping, typename DstBlob>
void aosoaCommonBlockCopy(
const View<SrcMapping, SrcBlob>& srcView,
View<DstMapping, DstBlob>& dstView,
bool readOpt,
std::size_t threadId = 0,
std::size_t threadCount = 1)
{
// TODO(bgruber): think if we can remove this restriction
static_assert(
std::is_same_v<typename SrcMapping::RecordDim, typename DstMapping::RecordDim>,
"The source and destination record dimensions must be the same");
static_assert(
std::is_same_v<
typename SrcMapping::LinearizeArrayDimsFunctor,
typename DstMapping::LinearizeArrayDimsFunctor>,
"Source and destination mapping need to use the same array dimensions linearizer");
using RecordDim = typename SrcMapping::RecordDim;
internal::assertTrivialCopyable<RecordDim>();
[[maybe_unused]] static constexpr bool MBSrc = SrcMapping::blobCount > 1;
[[maybe_unused]] static constexpr bool MBDst = DstMapping::blobCount > 1;
static constexpr auto LanesSrc = internal::aosoaLanes<SrcMapping>;
static constexpr auto LanesDst = internal::aosoaLanes<DstMapping>;
if(srcView.mapping().extents() != dstView.mapping().extents())
throw std::runtime_error{"Array dimensions sizes are different"};
static constexpr auto srcIsAoSoA = LanesSrc != std::numeric_limits<std::size_t>::max();
static constexpr auto dstIsAoSoA = LanesDst != std::numeric_limits<std::size_t>::max();
static_assert(srcIsAoSoA || dstIsAoSoA, "At least one of the mappings must be an AoSoA mapping");
static_assert(
!srcIsAoSoA || std::tuple_size_v<decltype(srcView.storageBlobs)> == 1,
"Implementation assumes AoSoA with single blob");
static_assert(
!dstIsAoSoA || std::tuple_size_v<decltype(dstView.storageBlobs)> == 1,
"Implementation assumes AoSoA with single blob");
const auto flatSize = product(dstView.mapping().extents());
// TODO(bgruber): implement the following by adding additional copy loops for the remaining elements
if(!srcIsAoSoA && flatSize % LanesDst != 0)
throw std::runtime_error{"Source SoA mapping's total array elements must be evenly divisible by the "
"destination AoSoA Lane count."};
if(!dstIsAoSoA && flatSize % LanesSrc != 0)
throw std::runtime_error{"Destination SoA mapping's total array elements must be evenly divisible by the "
"source AoSoA Lane count."};
// the same as AoSoA::blobNrAndOffset but takes a flat array index
auto mapAoSoA = [](std::size_t flatArrayIndex, auto rc, std::size_t Lanes) LLAMA_LAMBDA_INLINE
{
const auto blockIndex = flatArrayIndex / Lanes;
const auto laneIndex = flatArrayIndex % Lanes;
const auto offset = (sizeOf<RecordDim> * Lanes) * blockIndex + offsetOf<RecordDim, decltype(rc)> * Lanes
+ sizeof(GetType<RecordDim, decltype(rc)>) * laneIndex;
return offset;
};
// the same as SoA::blobNrAndOffset but takes a flat array index
auto mapSoA = [&](std::size_t flatArrayIndex, auto rc, bool mb) LLAMA_LAMBDA_INLINE
{
const auto blob = mb * flatRecordCoord<RecordDim, decltype(rc)>;
const auto offset = !mb * offsetOf<RecordDim, decltype(rc)> * flatSize
+ sizeof(GetType<RecordDim, decltype(rc)>) * flatArrayIndex;
return NrAndOffset{blob, offset};
};
auto mapSrc = [&](std::size_t flatArrayIndex, auto rc) LLAMA_LAMBDA_INLINE
{
if constexpr(srcIsAoSoA)
return &srcView.storageBlobs[0][0] + mapAoSoA(flatArrayIndex, rc, LanesSrc);
else
{
const auto [blob, off] = mapSoA(flatArrayIndex, rc, MBSrc);
return &srcView.storageBlobs[blob][off];
}
};
auto mapDst = [&](std::size_t flatArrayIndex, auto rc) LLAMA_LAMBDA_INLINE
{
if constexpr(dstIsAoSoA)
return &dstView.storageBlobs[0][0] + mapAoSoA(flatArrayIndex, rc, LanesDst);
else
{
const auto [blob, off] = mapSoA(flatArrayIndex, rc, MBDst);
return &dstView.storageBlobs[blob][off];
}
};
static constexpr auto L = []
{
if constexpr(srcIsAoSoA && dstIsAoSoA)
return std::gcd(LanesSrc, LanesDst);
return std::min(LanesSrc, LanesDst);
}();
if(readOpt)
{
// optimized for linear reading
constexpr auto srcL = srcIsAoSoA ? LanesSrc : L;
const auto elementsPerThread = flatSize / srcL / threadCount * srcL;
{
const auto start = threadId * elementsPerThread;
const auto stop = threadId == threadCount - 1 ? flatSize : (threadId + 1) * elementsPerThread;
auto copyLBlock = [&](const std::byte*& threadSrc, std::size_t dstIndex, auto rc) LLAMA_LAMBDA_INLINE
{
constexpr auto bytes = L * sizeof(GetType<RecordDim, decltype(rc)>);
std::memcpy(mapDst(dstIndex, rc), threadSrc, bytes);
threadSrc += bytes;
};
if constexpr(srcIsAoSoA)
{
auto* threadSrc = mapSrc(start, RecordCoord<>{});
for(std::size_t i = start; i < stop; i += LanesSrc)
forEachLeafCoord<RecordDim>(
[&](auto rc) LLAMA_LAMBDA_INLINE
{
for(std::size_t j = 0; j < LanesSrc; j += L)
copyLBlock(threadSrc, i + j, rc);
});
}
else
{
forEachLeafCoord<RecordDim>(
[&](auto rc) LLAMA_LAMBDA_INLINE
{
auto* threadSrc = mapSrc(start, rc);
for(std::size_t i = start; i < stop; i += L)
copyLBlock(threadSrc, i, rc);
});
}
}
}
else
{
// optimized for linear writing
constexpr auto dstL = dstIsAoSoA ? LanesDst : L;
const auto elementsPerThread = flatSize / dstL / threadCount * dstL;
{
const auto start = threadId * elementsPerThread;
const auto stop = threadId == threadCount - 1 ? flatSize : (threadId + 1) * elementsPerThread;
auto copyLBlock = [&](std::byte*& threadDst, std::size_t srcIndex, auto rc) LLAMA_LAMBDA_INLINE
{
constexpr auto bytes = L * sizeof(GetType<RecordDim, decltype(rc)>);
std::memcpy(threadDst, mapSrc(srcIndex, rc), bytes);
threadDst += bytes;
};
if constexpr(dstIsAoSoA)
{
auto* threadDst = mapDst(start, RecordCoord<>{});
for(std::size_t i = start; i < stop; i += LanesDst)
forEachLeafCoord<RecordDim>(
[&](auto rc) LLAMA_LAMBDA_INLINE
{
for(std::size_t j = 0; j < LanesDst; j += L)
copyLBlock(threadDst, i + j, rc);
});
}
else
{
forEachLeafCoord<RecordDim>(
[&](auto rc) LLAMA_LAMBDA_INLINE
{
auto* threadDst = mapDst(start, rc);
for(std::size_t i = start; i < stop; i += L)
copyLBlock(threadDst, i, rc);
});
}
}
}
}
/// @brief Generic implementation of \ref copy defaulting to \ref fieldWiseCopy. LLAMA provides several
/// specializations of this construct for specific mappings. Users are encouraged to also specialize this template
/// with better copy algorithms for further combinations of mappings, if they can and want to provide a better
/// implementation.
template<typename SrcMapping, typename DstMapping, typename SFINAE = void>
struct Copy
{
template<typename SrcView, typename DstView>
void operator()(const SrcView& srcView, DstView& dstView, std::size_t threadId, std::size_t threadCount) const
{
fieldWiseCopy(srcView, dstView, threadId, threadCount);
}
};
template<typename Mapping>
struct Copy<Mapping, Mapping>
{
template<typename SrcView, typename DstView>
void operator()(const SrcView& srcView, DstView& dstView, std::size_t threadId, std::size_t threadCount) const
{
blobMemcpy(srcView, dstView, threadId, threadCount);
}
};
template<
typename ArrayExtents,
typename RecordDim,
typename LinearizeArrayDims,
std::size_t LanesSrc,
std::size_t LanesDst>
struct Copy<
mapping::AoSoA<ArrayExtents, RecordDim, LanesSrc, LinearizeArrayDims>,
mapping::AoSoA<ArrayExtents, RecordDim, LanesDst, LinearizeArrayDims>,
std::enable_if_t<LanesSrc != LanesDst>>
{
template<typename SrcBlob, typename DstBlob>
void operator()(
const View<mapping::AoSoA<ArrayExtents, RecordDim, LanesSrc, LinearizeArrayDims>, SrcBlob>& srcView,
View<mapping::AoSoA<ArrayExtents, RecordDim, LanesDst, LinearizeArrayDims>, DstBlob>& dstView,
std::size_t threadId,
std::size_t threadCount)
{
constexpr auto readOpt = true; // TODO(bgruber): how to choose?
aosoaCommonBlockCopy(srcView, dstView, readOpt, threadId, threadCount);
}
};
template<
typename ArrayExtents,
typename RecordDim,
typename LinearizeArrayDims,
std::size_t LanesSrc,
bool DstSeparateBuffers>
struct Copy<
mapping::AoSoA<ArrayExtents, RecordDim, LanesSrc, LinearizeArrayDims>,
mapping::SoA<ArrayExtents, RecordDim, DstSeparateBuffers, LinearizeArrayDims>>
{
template<typename SrcBlob, typename DstBlob>
void operator()(
const View<mapping::AoSoA<ArrayExtents, RecordDim, LanesSrc, LinearizeArrayDims>, SrcBlob>& srcView,
View<mapping::SoA<ArrayExtents, RecordDim, DstSeparateBuffers, LinearizeArrayDims>, DstBlob>& dstView,
std::size_t threadId,
std::size_t threadCount)
{
constexpr auto readOpt = true; // TODO(bgruber): how to choose?
aosoaCommonBlockCopy(srcView, dstView, readOpt, threadId, threadCount);
}
};
template<
typename ArrayExtents,
typename RecordDim,
typename LinearizeArrayDims,
std::size_t LanesDst,
bool SrcSeparateBuffers>
struct Copy<
mapping::SoA<ArrayExtents, RecordDim, SrcSeparateBuffers, LinearizeArrayDims>,
mapping::AoSoA<ArrayExtents, RecordDim, LanesDst, LinearizeArrayDims>>
{
template<typename SrcBlob, typename DstBlob>
void operator()(
const View<mapping::SoA<ArrayExtents, RecordDim, SrcSeparateBuffers, LinearizeArrayDims>, SrcBlob>&
srcView,
View<mapping::AoSoA<ArrayExtents, RecordDim, LanesDst, LinearizeArrayDims>, DstBlob>& dstView,
std::size_t threadId,
std::size_t threadCount)
{
constexpr auto readOpt = true; // TODO(bgruber): how to choose?
aosoaCommonBlockCopy(srcView, dstView, readOpt, threadId, threadCount);
}
};
/// Copy data from source view to destination view. Both views need to have the same array and record
/// dimensions. Delegates to \ref Copy to choose an implementation.
/// @param threadId Optional. Zero-based id of calling thread for multi-threaded invocations.
/// @param threadCount Optional. Thread count in case of multi-threaded invocation.
template<typename SrcMapping, typename SrcBlob, typename DstMapping, typename DstBlob>
void copy(
const View<SrcMapping, SrcBlob>& srcView,
View<DstMapping, DstBlob>& dstView,
std::size_t threadId = 0,
std::size_t threadCount = 1)
{
Copy<SrcMapping, DstMapping>{}(srcView, dstView, threadId, threadCount);
}
} // namespace llama
// ==
// == ./Copy.hpp ==
// ============================================================================
// ============================================================================
// == ./DumpMapping.hpp ==
// ==
// SPDX-License-Identifier: GPL-3.0-or-later
// #pragma once
#if !__has_include(<fmt/format.h>)
# error DumpMapping.hpp requires the fmt library
#endif
// #include "ArrayIndexRange.hpp" // amalgamate: file already expanded
// #include "Core.hpp" // amalgamate: file already expanded
#include <boost/functional/hash.hpp>
#include <fmt/format.h>
// #include <string> // amalgamate: file already included
// #include <vector> // amalgamate: file already included
namespace llama
{
namespace internal
{
template<std::size_t... Coords>
auto toVec(RecordCoord<Coords...>) -> std::vector<std::size_t>
{
return {Coords...};
}
inline auto color(const std::vector<std::size_t>& recordCoord) -> std::size_t
{
auto c = boost::hash<std::vector<std::size_t>>{}(recordCoord) &0xFFFFFF;
c |= 0x404040; // ensure color per channel is at least 0x40.
return c;
}
template<std::size_t Dim>
auto formatArrayIndex(const ArrayIndex<Dim>& ai)
{
if constexpr(Dim == 1)
return std::to_string(ai[0]);
else
{
std::string s = "{";
for(auto v : ai)
{
if(s.size() >= 2)
s += ",";
s += std::to_string(v);
}
s += "}";
return s;
}
}
template<std::size_t Dim>
struct FieldBox
{
ArrayIndex<Dim> arrayIndex;
std::vector<std::size_t> recordCoord;
std::string recordTags;
NrAndOffset nrAndOffset;
std::size_t size;
};
template<typename Mapping>
auto boxesFromMapping(const Mapping& mapping) -> std::vector<FieldBox<Mapping::ArrayIndex::rank>>
{
std::vector<FieldBox<Mapping::ArrayIndex::rank>> infos;
using RecordDim = typename Mapping::RecordDim;
for(auto ai : ArrayIndexRange{mapping.extents()})
{
forEachLeafCoord<RecordDim>(
[&](auto rc)
{
infos.push_back(
{ai,
internal::toVec(rc),
recordCoordTags<RecordDim>(rc),
mapping.blobNrAndOffset(ai, rc),
sizeof(GetType<RecordDim, decltype(rc)>)});
});
}
return infos;
}
template<std::size_t Dim>
auto breakBoxes(std::vector<FieldBox<Dim>> boxes, std::size_t wrapByteCount) -> std::vector<FieldBox<Dim>>
{
for(std::size_t i = 0; i < boxes.size(); i++)
{
auto& fb = boxes[i];
if(fb.nrAndOffset.offset / wrapByteCount != (fb.nrAndOffset.offset + fb.size - 1) / wrapByteCount)
{
const auto remainingSpace = wrapByteCount - fb.nrAndOffset.offset % wrapByteCount;
auto newFb = fb;
newFb.nrAndOffset.offset = fb.nrAndOffset.offset + remainingSpace;
newFb.size = fb.size - remainingSpace;
fb.size = remainingSpace;
boxes.push_back(newFb);
}
}
return boxes;
}
inline auto cssClass(std::string tags)
{
std::replace(begin(tags), end(tags), '.', '_');
std::replace(begin(tags), end(tags), '<', '_');
std::replace(begin(tags), end(tags), '>', '_');
return tags;
};
} // namespace internal
/// Returns an SVG image visualizing the memory layout created by the given mapping. The created memory blocks are
/// wrapped after wrapByteCount bytes.
template<typename Mapping>
auto toSvg(const Mapping& mapping, std::size_t wrapByteCount = 64, bool breakBoxes = true) -> std::string
{
constexpr auto byteSizeInPixel = 30;
constexpr auto blobBlockWidth = 60;
auto infos = internal::boxesFromMapping(mapping);
if(breakBoxes)
infos = internal::breakBoxes(std::move(infos), wrapByteCount);
std::string svg;
std::array<int, Mapping::blobCount + 1> blobYOffset{};
for(std::size_t i = 0; i < Mapping::blobCount; i++)
{
const auto blobRows = (mapping.blobSize(i) + wrapByteCount - 1) / wrapByteCount;
blobYOffset[i + 1] = blobYOffset[i] + (blobRows + 1) * byteSizeInPixel; // one row gap between blobs
const auto height = blobRows * byteSizeInPixel;
svg += fmt::format(
R"a(<rect x="0" y="{}" width="{}" height="{}" fill="#AAA" stroke="#000"/>
<text x="{}" y="{}" fill="#000" text-anchor="middle">Blob: {}</text>
)a",
blobYOffset[i],
blobBlockWidth,
height,
blobBlockWidth / 2,
blobYOffset[i] + height / 2,
i);
}
svg = fmt::format(
R"(<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<svg width="{}" height="{}" xmlns="http://www.w3.org/2000/svg">
<style>
.label {{ font: {}px sans-serif; }}
</style>
)",
blobBlockWidth + wrapByteCount * byteSizeInPixel,
blobYOffset.back() - byteSizeInPixel,
byteSizeInPixel / 2)
+ svg;
for(const auto& info : infos)
{
const auto blobY = blobYOffset[info.nrAndOffset.nr];
auto x = (info.nrAndOffset.offset % wrapByteCount) * byteSizeInPixel + blobBlockWidth;
auto y = (info.nrAndOffset.offset / wrapByteCount) * byteSizeInPixel + blobY;
const auto fill = internal::color(info.recordCoord);
const auto width = byteSizeInPixel * info.size;
constexpr auto cropBoxes = true;
if(cropBoxes)
{
svg += fmt::format(
R"(<svg x="{}" y="{}" width="{}" height="{}">
)",
x,
y,
width,
byteSizeInPixel);
x = 0;
y = 0;
}
svg += fmt::format(
R"(<rect x="{}" y="{}" width="{}" height="{}" fill="#{:X}" stroke="#000"/>
)",
x,
y,
width,
byteSizeInPixel,
fill);
for(std::size_t i = 1; i < info.size; i++)
{
svg += fmt::format(
R"(<line x1="{}" y1="{}" x2="{}" y2="{}" stroke="#777"/>
)",
x + i * byteSizeInPixel,
y + byteSizeInPixel * 2 / 3,
x + i * byteSizeInPixel,
y + byteSizeInPixel);
}
svg += fmt::format(
R"(<text x="{}" y="{}" fill="#000" text-anchor="middle" class="label">{} {}</text>
)",
x + width / 2,
y + byteSizeInPixel * 3 / 4,
internal::formatArrayIndex(info.arrayIndex),
info.recordTags);
if(cropBoxes)
svg += R"(</svg>
)";
}
svg += "</svg>";
return svg;
}
/// Returns an HTML document visualizing the memory layout created by the given mapping. The visualization is
/// resizeable.
template<typename Mapping>
auto toHtml(const Mapping& mapping) -> std::string
{
constexpr auto byteSizeInPixel = 30;
constexpr auto rulerLengthInBytes = 512;
constexpr auto rulerByteInterval = 8;
auto infos = internal::boxesFromMapping(mapping);
std::stable_sort(
begin(infos),
end(infos),
[](const auto& a, const auto& b) {
return std::tie(a.nrAndOffset.nr, a.nrAndOffset.offset)
< std::tie(b.nrAndOffset.nr, b.nrAndOffset.offset);
});
infos.erase(
std::unique(
begin(infos),
end(infos),
[](const auto& a, const auto& b) { return a.nrAndOffset == b.nrAndOffset; }),
end(infos));
std::string html;
html += fmt::format(
R"(<!DOCTYPE html>
<html>
<head>
<style>
.box {{
outline: 1px solid;
display: inline-block;
white-space: nowrap;
height: {}px;
background: repeating-linear-gradient(90deg, #0000, #0000 29px, #777 29px, #777 30px);
text-align: center;
overflow: hidden;
vertical-align: middle;
}}
#ruler {{
background: repeating-linear-gradient(90deg, #0000, #0000 29px, #000 29px, #000 30px);
border-bottom: 1px solid;
height: 20px;
margin-bottom: 20px;
}}
#ruler div {{
position: absolute;
display: inline-block;
}}
)",
byteSizeInPixel);
using RecordDim = typename Mapping::RecordDim;
forEachLeafCoord<RecordDim>(
[&](auto rc)
{
constexpr int size = sizeof(GetType<RecordDim, decltype(rc)>);
html += fmt::format(
R"(.{} {{
width: {}px;
background-color: #{:X};
}}
)",
internal::cssClass(recordCoordTags<RecordDim>(rc)),
byteSizeInPixel * size,
internal::color(internal::toVec(rc)));
});
html += fmt::format(R"(</style>
</head>
<body>
<header id="ruler">
)");
for(auto i = 0; i < rulerLengthInBytes; i += rulerByteInterval)
html += fmt::format(
R"(</style>
<div style="margin-left: {}px;">{}</div>)",
i * byteSizeInPixel,
i);
html += fmt::format(R"(
</header>
)");
auto currentBlobNr = std::numeric_limits<std::size_t>::max();
for(const auto& info : infos)
{
if(currentBlobNr != info.nrAndOffset.nr)
{
currentBlobNr = info.nrAndOffset.nr;
html += fmt::format("<h1>Blob: {}</h1>", currentBlobNr);
}
html += fmt::format(
R"(<div class="box {0}" title="{1} {2}">{1} {2}</div>)",
internal::cssClass(info.recordTags),
internal::formatArrayIndex(info.arrayIndex),
info.recordTags);
}
html += R"(</body>
</html>)";
return html;
}
} // namespace llama
// ==
// == ./DumpMapping.hpp ==
// ============================================================================
// ============================================================================
// == ./llama.hpp ==
// ==
// Copyright 2018 Alexander Matthes
// SPDX-License-Identifier: GPL-3.0-or-later
// #pragma once
/// \mainpage LLAMA API documentation
///
/// LLAMA is a C++17 template header-only library for the abstraction of memory access patterns. It distinguishes
/// between the view of the algorithm on the memory and the real layout in the background. This enables performance
/// portability for multicore, manycore and gpu applications with the very same code.
///
/// In contrast to many other solutions LLAMA can define nested data structures of arbitrary depths and is not limited
/// only to struct of array and array of struct data layouts. It is also capable to explicitly define padding,
/// blocking, striding and any other run time or compile time access pattern simultaneously.
///
/// To archieve this goal LLAMA is split into mostly independent, orthogonal parts completely written in modern C++17
/// to run on as many architectures and with as many compilers as possible while still supporting extensions needed
/// e.g. to run on GPU or other many core hardware.
///
/// This page documents the API of LLAMA. The user documentation and an overview about the concepts and ideas can be
/// found here: https://llama-doc.rtfd.io
///
/// LLAMA is licensed under the LGPL3+.
#define LLAMA_VERSION_MAJOR 0
#define LLAMA_VERSION_MINOR 3
#define LLAMA_VERSION_PATCH 0
#ifdef __NVCC__
# pragma push
# if __CUDACC_VER_MAJOR__ * 1000 + __CUDACC_VER_MINOR__ >= 11005
# pragma nv_diag_suppress 940
# else
# pragma diag_suppress 940
# endif
#endif
// #include "ArrayExtents.hpp" // amalgamate: file already expanded
// #include "ArrayIndexRange.hpp" // amalgamate: file already expanded
// #include "BlobAllocators.hpp" // amalgamate: file already expanded
// #include "Copy.hpp" // amalgamate: file already expanded
// #include "Core.hpp" // amalgamate: file already expanded
// #include "Meta.hpp" // amalgamate: file already expanded
// #include "Vector.hpp" // amalgamate: file already expanded
// #include "View.hpp" // amalgamate: file already expanded
// #include "VirtualRecord.hpp" // amalgamate: file already expanded
// #include "macros.hpp" // amalgamate: file already expanded
// ============================================================================
// == ./mapping/AoS.hpp ==
// ==
// Copyright 2018 Alexander Matthes
// SPDX-License-Identifier: GPL-3.0-or-later
// #pragma once
// #include "Common.hpp" // amalgamate: file already expanded
namespace llama::mapping
{
/// Array of struct mapping. Used to create a \ref View via \ref allocView.
/// \tparam AlignAndPad If true, padding bytes are inserted to guarantee that struct members are properly aligned.
/// If false, struct members are tightly packed.
/// \tparam T_LinearizeArrayDimsFunctor Defines how the array dimensions should be mapped into linear numbers and
/// how big the linear domain gets.
/// \tparam FlattenRecordDim Defines how the record dimension's fields should be flattened. See \ref
/// FlattenRecordDimInOrder, \ref FlattenRecordDimIncreasingAlignment, \ref FlattenRecordDimDecreasingAlignment and
/// \ref FlattenRecordDimMinimizePadding.
template<
typename TArrayExtents,
typename TRecordDim,
bool AlignAndPad = true,
typename TLinearizeArrayDimsFunctor = LinearizeArrayDimsCpp,
template<typename> typename FlattenRecordDim = FlattenRecordDimInOrder>
struct AoS : private TArrayExtents
{
using ArrayExtents = TArrayExtents;
using ArrayIndex = typename ArrayExtents::Index;
using RecordDim = TRecordDim;
using LinearizeArrayDimsFunctor = TLinearizeArrayDimsFunctor;
static constexpr std::size_t blobCount = 1;
constexpr AoS() = default;
LLAMA_FN_HOST_ACC_INLINE
constexpr explicit AoS(ArrayExtents extents, RecordDim = {}) : ArrayExtents(extents)
{
}
LLAMA_FN_HOST_ACC_INLINE constexpr auto extents() const -> ArrayExtents
{
return *this;
}
LLAMA_FN_HOST_ACC_INLINE constexpr auto blobSize(std::size_t) const -> std::size_t
{
return LinearizeArrayDimsFunctor{}.size(extents())
* flatSizeOf<typename Flattener::FlatRecordDim, AlignAndPad>;
}
template<std::size_t... RecordCoords>
LLAMA_FN_HOST_ACC_INLINE constexpr auto blobNrAndOffset(ArrayIndex ai, RecordCoord<RecordCoords...> = {}) const
-> NrAndOffset
{
constexpr std::size_t flatFieldIndex =
#ifdef __NVCC__
*& // mess with nvcc compiler state to workaround bug
#endif
Flattener::template flatIndex<RecordCoords...>;
const auto offset
= LinearizeArrayDimsFunctor{}(ai, extents())
* flatSizeOf<
typename Flattener::FlatRecordDim,
AlignAndPad> + flatOffsetOf<typename Flattener::FlatRecordDim, flatFieldIndex, AlignAndPad>;
return {0, offset};
}
private:
using Flattener = FlattenRecordDim<TRecordDim>;
};
/// Array of struct mapping preserving the alignment of the field types by inserting padding.
/// \see AoS
template<typename ArrayExtents, typename RecordDim, typename LinearizeArrayDimsFunctor = LinearizeArrayDimsCpp>
using AlignedAoS = AoS<ArrayExtents, RecordDim, true, LinearizeArrayDimsFunctor>;
/// Array of struct mapping preserving the alignment of the field types by inserting padding and permuting the
/// field order to minimize this padding. \see AoS
template<typename ArrayExtents, typename RecordDim, typename LinearizeArrayDimsFunctor = LinearizeArrayDimsCpp>
using MinAlignedAoS
= AoS<ArrayExtents, RecordDim, true, LinearizeArrayDimsFunctor, FlattenRecordDimMinimizePadding>;
/// Array of struct mapping packing the field types tightly, violating the types alignment requirements.
/// \see AoS
template<typename ArrayExtents, typename RecordDim, typename LinearizeArrayDimsFunctor = LinearizeArrayDimsCpp>
using PackedAoS = AoS<ArrayExtents, RecordDim, false, LinearizeArrayDimsFunctor>;
template<bool AlignAndPad = true, typename LinearizeArrayDimsFunctor = LinearizeArrayDimsCpp>
struct PreconfiguredAoS
{
template<typename ArrayExtents, typename RecordDim>
using type = AoS<ArrayExtents, RecordDim, AlignAndPad, LinearizeArrayDimsFunctor>;
};
template<typename Mapping>
inline constexpr bool isAoS = false;
template<
typename ArrayExtents,
typename RecordDim,
bool AlignAndPad,
typename LinearizeArrayDimsFunctor,
template<typename>
typename FlattenRecordDim>
inline constexpr bool
isAoS<AoS<ArrayExtents, RecordDim, AlignAndPad, LinearizeArrayDimsFunctor, FlattenRecordDim>> = true;
} // namespace llama::mapping
// ==
// == ./mapping/AoS.hpp ==
// ============================================================================
// #include "mapping/AoSoA.hpp" // amalgamate: file already expanded
// ============================================================================
// == ./mapping/Bytesplit.hpp ==
// ==
// SPDX-License-Identifier: GPL-3.0-or-later
// #pragma once
// #include "Common.hpp" // amalgamate: file already expanded
namespace llama::mapping
{
namespace internal
{
template<typename T>
using ReplaceByByteArray = std::byte[sizeof(T)];
template<typename RecordDim>
using SplitBytes = TransformLeaves<RecordDim, ReplaceByByteArray>;
} // namespace internal
template<typename TArrayExtents, typename TRecordDim, template<typename, typename> typename InnerMapping>
struct Bytesplit : private InnerMapping<TArrayExtents, internal::SplitBytes<TRecordDim>>
{
using Inner = InnerMapping<TArrayExtents, internal::SplitBytes<TRecordDim>>;
using ArrayExtents = typename Inner::ArrayExtents;
using ArrayIndex = typename Inner::ArrayIndex;
using RecordDim = TRecordDim; // hide Inner::RecordDim
using Inner::blobCount;
using Inner::blobSize;
using Inner::extents;
using Inner::Inner;
LLAMA_FN_HOST_ACC_INLINE
constexpr explicit Bytesplit(TArrayExtents extents, TRecordDim = {}) : Inner(extents)
{
}
template<std::size_t... RecordCoords>
static constexpr auto isComputed(RecordCoord<RecordCoords...>)
{
return true;
}
template<typename QualifiedBase, typename RC, typename BlobArray>
struct Reference
{
QualifiedBase& innerMapping;
ArrayIndex ai;
BlobArray& blobs;
using DstType = GetType<TRecordDim, RC>;
// NOLINTNEXTLINE(google-explicit-constructor,hicpp-explicit-conversions)
operator DstType() const
{
DstType v;
auto* p = reinterpret_cast<std::byte*>(&v);
boost::mp11::mp_for_each<boost::mp11::mp_iota_c<sizeof(DstType)>>(
[&](auto ic)
{
constexpr auto i = decltype(ic)::value;
const auto [nr, off] = innerMapping.blobNrAndOffset(ai, Cat<RC, RecordCoord<i>>{});
p[i] = blobs[nr][off];
});
return v;
}
auto operator=(DstType v) -> Reference&
{
auto* p = reinterpret_cast<std::byte*>(&v);
boost::mp11::mp_for_each<boost::mp11::mp_iota_c<sizeof(DstType)>>(
[&](auto ic)
{
constexpr auto i = decltype(ic)::value;
const auto [nr, off] = innerMapping.blobNrAndOffset(ai, Cat<RC, RecordCoord<i>>{});
blobs[nr][off] = p[i];
});
return *this;
}
};
template<std::size_t... RecordCoords, typename BlobArray>
LLAMA_FN_HOST_ACC_INLINE constexpr auto compute(
typename Inner::ArrayIndex ai,
RecordCoord<RecordCoords...>,
BlobArray& blobs) const
{
return Reference<decltype(*this), RecordCoord<RecordCoords...>, BlobArray>{*this, ai, blobs};
}
};
} // namespace llama::mapping
// ==
// == ./mapping/Bytesplit.hpp ==
// ============================================================================
// ============================================================================
// == ./mapping/Heatmap.hpp ==
// ==
// #pragma once
// #include "Common.hpp" // amalgamate: file already expanded
// #include <array> // amalgamate: file already included
#include <atomic>
#include <sstream>
// #include <vector> // amalgamate: file already included
namespace llama::mapping
{
/// Forwards all calls to the inner mapping. Counts all accesses made to all bytes, allowing to extract a heatmap.
/// \tparam Mapping The type of the inner mapping.
template<typename Mapping, typename CountType = std::size_t>
struct Heatmap
{
using ArrayExtents = typename Mapping::ArrayExtents;
using ArrayIndex = typename Mapping::ArrayIndex;
using RecordDim = typename Mapping::RecordDim;
static constexpr std::size_t blobCount = Mapping::blobCount;
constexpr Heatmap() = default;
LLAMA_FN_HOST_ACC_INLINE
explicit Heatmap(Mapping mapping) : mapping(mapping)
{
for(std::size_t i = 0; i < blobCount; i++)
byteHits[i] = std::vector<std::atomic<CountType>>(blobSize(i));
}
Heatmap(const Heatmap&) = delete;
auto operator=(const Heatmap&) -> Heatmap& = delete;
Heatmap(Heatmap&&) noexcept = default;
auto operator=(Heatmap&&) noexcept -> Heatmap& = default;
~Heatmap() = default;
LLAMA_FN_HOST_ACC_INLINE constexpr auto extents() const -> ArrayExtents
{
return mapping.extents();
}
LLAMA_FN_HOST_ACC_INLINE constexpr auto blobSize(std::size_t i) const -> std::size_t
{
LLAMA_FORCE_INLINE_RECURSIVE
return mapping.blobSize(i);
}
template<std::size_t... RecordCoords>
LLAMA_FN_HOST_ACC_INLINE auto blobNrAndOffset(ArrayIndex ai, RecordCoord<RecordCoords...> rc = {}) const
-> NrAndOffset
{
const auto nao = mapping.blobNrAndOffset(ai, rc);
for(std::size_t i = 0; i < sizeof(GetType<RecordDim, RecordCoord<RecordCoords...>>); i++)
byteHits[nao.nr][nao.offset + i]++;
return nao;
}
auto toGnuplotScript(std::size_t wrapAfterBytes = 64) const -> std::string
{
std::stringstream f;
f << "#!/usr/bin/gnuplot -p\n$data << EOD\n";
for(std::size_t i = 0; i < blobCount; i++)
{
std::size_t byteCount = 0;
for(const auto& hits : byteHits[i])
f << hits << ((++byteCount % wrapAfterBytes == 0) ? '\n' : ' ');
while(byteCount++ % wrapAfterBytes != 0)
f << "0 ";
f << '\n';
}
f << R"(EOD
set view map
set xtics format ""
set x2tics autofreq 8
set yrange [] reverse
set link x2; set link y2
set ylabel "Cacheline"
set x2label "Byte"
plot $data matrix with image axes x2y1
)";
return f.str();
}
Mapping mapping;
mutable std::array<std::vector<std::atomic<CountType>>, blobCount> byteHits;
};
} // namespace llama::mapping
// ==
// == ./mapping/Heatmap.hpp ==
// ============================================================================
// #include "mapping/One.hpp" // amalgamate: file already expanded
// #include "mapping/SoA.hpp" // amalgamate: file already expanded
// ============================================================================
// == ./mapping/Split.hpp ==
// ==
// #pragma once
// #include "Common.hpp" // amalgamate: file already expanded
namespace llama::mapping
{
namespace internal
{
template<typename... Fields, std::size_t FirstCoord, std::size_t... Coords>
auto partitionRecordDim(Record<Fields...>, RecordCoord<FirstCoord, Coords...>)
{
using namespace boost::mp11;
using Rec = Record<Fields...>;
if constexpr(sizeof...(Coords) == 0)
{
using Part1 = Record<mp_at_c<Rec, FirstCoord>>;
using Part2 = mp_erase_c<Rec, FirstCoord, FirstCoord + 1>;
return mp_list<Part1, Part2>{};
}
else
{
using FieldTag = GetTag<Rec, RecordCoord<FirstCoord>>;
using FieldType = GetType<Rec, RecordCoord<FirstCoord>>;
using InnerPartition = decltype(partitionRecordDim(FieldType{}, RecordCoord<Coords...>{}));
using Part1 = Record<Field<FieldTag, mp_first<InnerPartition>>>;
using Part2 = mp_replace_at_c<Rec, FirstCoord, Field<FieldTag, mp_second<InnerPartition>>>;
return mp_list<Part1, Part2>{};
}
}
template<typename Acc, typename TagList>
struct PartitionFoldOpImpl
{
using Part1Before = boost::mp11::mp_first<Acc>;
using Part2Before = boost::mp11::mp_second<Acc>;
using R = decltype(partitionRecordDim(Part2Before{}, GetCoordFromTags<Part2Before, TagList>{}));
using Part1After = boost::mp11::mp_first<R>;
using Part2After = boost::mp11::mp_second<R>;
using type = boost::mp11::mp_list<MergedRecordDims<Part1Before, Part1After>, Part2After>;
};
template<typename Acc, typename TagList>
using PartitionFoldOp = typename PartitionFoldOpImpl<Acc, TagList>::type;
template<typename... Fields, typename... RCs>
auto partitionRecordDim(Record<Fields...>, boost::mp11::mp_list<RCs...>)
{
using namespace boost::mp11;
using Initial = mp_list<Record<>, Record<Fields...>>; // initially, nothing selected for mapping 1
return mp_fold<mp_list<GetTags<Record<Fields...>, RCs>...>, Initial, PartitionFoldOp>{};
}
// workaround for nvcc 11.3 and below: we cannot put the decltype() directly into the Split class
template<typename RecordDim, typename RecordCoordForMapping1>
struct PartionedRecordDim
{
using type = decltype(partitionRecordDim(RecordDim{}, RecordCoordForMapping1{}));
};
template<typename RC, typename RecordCoordForMapping1>
inline constexpr bool isSelected = RecordCoordCommonPrefixIsSame<RecordCoordForMapping1, RC>;
template<typename RC>
struct IsSelectedPredicate
{
template<typename RecordCoordForMapping1>
using fn = boost::mp11::mp_bool<isSelected<RC, RecordCoordForMapping1>>;
};
template<typename RC, typename... RecordCoordsForMapping1>
inline constexpr bool isSelected<RC, boost::mp11::mp_list<RecordCoordsForMapping1...>> = boost::mp11::
mp_any_of_q<boost::mp11::mp_list<RecordCoordsForMapping1...>, IsSelectedPredicate<RC>>::value;
} // namespace internal
/// Mapping which splits off a part of the record dimension and maps it differently then the rest.
/// \tparam RecordCoordForMapping1 A \ref RecordCoord or a list of RecordCoords selecting the part of the record
/// dimension to be mapped differently.
/// \tparam MappingTemplate1 The mapping used for the selected part of the record dimension.
/// \tparam MappingTemplate2 The mapping used for the not selected part of the record dimension.
/// \tparam SeparateBlobs If true, both pieces of the record dimension are mapped to separate blobs.
template<
typename TArrayExtents,
typename TRecordDim,
typename RecordCoordForMapping1,
template<typename...>
typename MappingTemplate1,
template<typename...>
typename MappingTemplate2,
bool SeparateBlobs = false>
struct Split
{
using ArrayExtents = TArrayExtents;
using ArrayIndex = typename ArrayExtents::Index;
using RecordDim = TRecordDim;
using RecordDimPartitions = typename internal::PartionedRecordDim<RecordDim, RecordCoordForMapping1>::type;
using RecordDim1 = boost::mp11::mp_first<RecordDimPartitions>;
using RecordDim2 = boost::mp11::mp_second<RecordDimPartitions>;
using Mapping1 = MappingTemplate1<ArrayExtents, RecordDim1>;
using Mapping2 = MappingTemplate2<ArrayExtents, RecordDim2>;
static constexpr std::size_t blobCount = SeparateBlobs ? Mapping1::blobCount + Mapping2::blobCount : 1;
static_assert(SeparateBlobs || Mapping1::blobCount == 1);
static_assert(SeparateBlobs || Mapping2::blobCount == 1);
constexpr Split() = default;
LLAMA_FN_HOST_ACC_INLINE
constexpr explicit Split(ArrayExtents extents) : mapping1(extents), mapping2(extents)
{
}
LLAMA_FN_HOST_ACC_INLINE constexpr auto extents() const -> ArrayExtents
{
return mapping1.extents();
}
LLAMA_FN_HOST_ACC_INLINE constexpr auto blobSize([[maybe_unused]] std::size_t i) const -> std::size_t
{
if constexpr(SeparateBlobs)
{
if(i < Mapping1::blobCount)
return mapping1.blobSize(i);
return mapping2.blobSize(i - Mapping1::blobCount);
}
else
return mapping1.blobSize(0) + mapping2.blobSize(0);
}
template<std::size_t... RecordCoords>
LLAMA_FN_HOST_ACC_INLINE constexpr auto blobNrAndOffset(ArrayIndex ai, RecordCoord<RecordCoords...> = {}) const
-> NrAndOffset
{
using Tags = GetTags<RecordDim, RecordCoord<RecordCoords...>>;
if constexpr(internal::isSelected<RecordCoord<RecordCoords...>, RecordCoordForMapping1>)
return mapping1.blobNrAndOffset(ai, GetCoordFromTags<RecordDim1, Tags>{});
else
{
auto nrAndOffset = mapping2.blobNrAndOffset(ai, GetCoordFromTags<RecordDim2, Tags>{});
if constexpr(SeparateBlobs)
nrAndOffset.nr += Mapping1::blobCount;
else
{
for(std::size_t i = 0; i < Mapping1::blobCount; i++)
nrAndOffset.offset += mapping1.blobSize(i);
}
return nrAndOffset;
}
}
Mapping1 mapping1;
Mapping2 mapping2;
};
template<
typename RecordCoordsForMapping1,
template<typename...>
typename MappingTemplate1,
template<typename...>
typename MappingTemplate2,
bool SeparateBlobs = false>
struct PreconfiguredSplit
{
template<typename ArrayExtents, typename RecordDim>
using type = Split<
ArrayExtents,
RecordDim,
RecordCoordsForMapping1,
MappingTemplate1,
MappingTemplate2,
SeparateBlobs>;
};
} // namespace llama::mapping
// ==
// == ./mapping/Split.hpp ==
// ============================================================================
// ============================================================================
// == ./mapping/Trace.hpp ==
// ==
// #pragma once
// #include "Common.hpp" // amalgamate: file already expanded
// #include <atomic> // amalgamate: file already included
// #include <iostream> // amalgamate: file already included
// #include <string> // amalgamate: file already included
#include <unordered_map>
namespace llama::mapping
{
/// Forwards all calls to the inner mapping. Traces all accesses made through this mapping and prints a summary on
/// destruction.
/// \tparam Mapping The type of the inner mapping.
template<typename Mapping>
struct Trace
{
using ArrayExtents = typename Mapping::ArrayExtents;
using ArrayIndex = typename Mapping::ArrayIndex;
using RecordDim = typename Mapping::RecordDim;
static constexpr std::size_t blobCount = Mapping::blobCount;
constexpr Trace() = default;
LLAMA_FN_HOST_ACC_INLINE
explicit Trace(Mapping mapping, bool printOnDestruction = true)
: mapping(mapping)
, printOnDestruction(printOnDestruction)
{
forEachLeafCoord<RecordDim>([&](auto rc) { fieldHits[recordCoordTags<RecordDim>(rc)] = 0; });
}
Trace(const Trace&) = delete;
auto operator=(const Trace&) -> Trace& = delete;
Trace(Trace&&) noexcept = default;
auto operator=(Trace&&) noexcept -> Trace& = default;
~Trace()
{
if(printOnDestruction && !fieldHits.empty())
print();
}
LLAMA_FN_HOST_ACC_INLINE constexpr auto extents() const -> ArrayExtents
{
return mapping.extents();
}
LLAMA_FN_HOST_ACC_INLINE constexpr auto blobSize(std::size_t i) const -> std::size_t
{
LLAMA_FORCE_INLINE_RECURSIVE
return mapping.blobSize(i);
}
template<std::size_t... RecordCoords>
LLAMA_FN_HOST_ACC_INLINE auto blobNrAndOffset(ArrayIndex ai, RecordCoord<RecordCoords...> rc = {}) const
-> NrAndOffset
{
const static auto name = recordCoordTags<RecordDim>(RecordCoord<RecordCoords...>{});
fieldHits.at(name)++;
LLAMA_FORCE_INLINE_RECURSIVE return mapping.blobNrAndOffset(ai, rc);
}
void print() const
{
std::cout << "Trace mapping, number of accesses:\n";
for(const auto& [k, v] : fieldHits)
std::cout << '\t' << k << ":\t" << v << '\n';
}
Mapping mapping;
mutable std::unordered_map<std::string, std::atomic<std::size_t>> fieldHits;
bool printOnDestruction;
};
} // namespace llama::mapping
// ==
// == ./mapping/Trace.hpp ==
// ============================================================================
// ============================================================================
// == ./mapping/tree/Mapping.hpp ==
// ==
// Copyright 2018 Alexander Matthes
// SPDX-License-Identifier: GPL-3.0-or-later
// #pragma once
// #include "../Common.hpp" // amalgamate: file already expanded
// ============================================================================
// == ./mapping/tree/Functors.hpp ==
// ==
// Copyright 2018 Alexander Matthes
// SPDX-License-Identifier: GPL-3.0-or-later
// #pragma once
// ============================================================================
// == ./mapping/tree/TreeFromDimensions.hpp ==
// ==
// Copyright 2018 Alexander Matthes
// SPDX-License-Identifier: GPL-3.0-or-later
// #pragma once
// #include "../../Core.hpp" // amalgamate: file already expanded
// ============================================================================
// == ./Tuple.hpp ==
// ==
// Copyright 2018 Alexander Matthes
// SPDX-License-Identifier: GPL-3.0-or-later
// #pragma once
// #include "Meta.hpp" // amalgamate: file already expanded
// #include "macros.hpp" // amalgamate: file already expanded
namespace llama
{
template<typename... Elements>
struct Tuple
{
};
/// Tuple class like `std::tuple` but suitable for use with offloading devices like GPUs.
template<typename TFirstElement, typename... Elements>
struct Tuple<TFirstElement, Elements...>
{
using FirstElement = TFirstElement;
using RestTuple = Tuple<Elements...>;
constexpr Tuple() = default;
/// Construct a tuple from values of the same types as the tuple stores.
LLAMA_FN_HOST_ACC_INLINE constexpr explicit Tuple(FirstElement first, Elements... rest)
: first(std::move(first))
, rest(std::move(rest)...)
{
}
/// Construct a tuple from forwarded values of potentially different types as the tuple stores.
// SFINAE away this ctor if tuple elements cannot be constructed from ctor arguments
template<
typename T,
typename... Ts,
std::enable_if_t<
sizeof...(Elements) == sizeof...(Ts)
&& std::is_constructible_v<TFirstElement, T> && (std::is_constructible_v<Elements, Ts> && ...),
int> = 0>
LLAMA_FN_HOST_ACC_INLINE constexpr explicit Tuple(T&& firstArg, Ts&&... restArgs)
: first(std::forward<T>(firstArg))
, rest(std::forward<Ts>(restArgs)...)
{
}
FirstElement first; ///< the first element (if existing)
#ifndef __NVCC__
[[no_unique_address]] // nvcc 11.3 ICE
#endif
RestTuple rest; ///< the remaining elements
};
template<typename... Elements>
Tuple(Elements...) -> Tuple<std::remove_cv_t<std::remove_reference_t<Elements>>...>;
template<std::size_t Pos, typename... Elements>
LLAMA_FN_HOST_ACC_INLINE constexpr auto get(Tuple<Elements...>& tuple) -> auto&
{
if constexpr(Pos == 0)
return tuple.first;
else
return get<Pos - 1>(tuple.rest);
}
template<std::size_t Pos, typename... Elements>
LLAMA_FN_HOST_ACC_INLINE constexpr auto get(const Tuple<Elements...>& tuple) -> const auto&
{
if constexpr(Pos == 0)
return tuple.first;
else
return get<Pos - 1>(tuple.rest);
}
} // namespace llama
template<typename... Elements>
struct std::tuple_size<llama::Tuple<Elements...>>
{
static constexpr auto value = sizeof...(Elements);
};
template<std::size_t I, typename... Elements>
struct std::tuple_element<I, llama::Tuple<Elements...>>
{
using type = boost::mp11::mp_at_c<llama::Tuple<Elements...>, I>;
};
namespace llama
{
namespace internal
{
template<typename... Elements, std::size_t... Is>
LLAMA_FN_HOST_ACC_INLINE constexpr auto areEqual(
const Tuple<Elements...>& a,
const Tuple<Elements...>& b,
std::index_sequence<Is...>) -> bool
{
return ((get<Is>(a) == get<Is>(b)) && ...);
}
} // namespace internal
template<typename... ElementsA, typename... ElementsB>
LLAMA_FN_HOST_ACC_INLINE constexpr auto operator==(const Tuple<ElementsA...>& a, const Tuple<ElementsB...>& b)
-> bool
{
using namespace boost::mp11;
if constexpr(sizeof...(ElementsA) == sizeof...(ElementsB))
if constexpr(mp_apply<mp_all, mp_transform<std::is_same, mp_list<ElementsA...>, mp_list<ElementsB...>>>::
value)
return internal::areEqual(a, b, std::make_index_sequence<sizeof...(ElementsA)>{});
return false;
}
template<typename... ElementsA, typename... ElementsB>
LLAMA_FN_HOST_ACC_INLINE constexpr auto operator!=(const Tuple<ElementsA...>& a, const Tuple<ElementsB...>& b)
-> bool
{
return !(a == b);
}
namespace internal
{
template<typename Tuple1, typename Tuple2, size_t... Is1, size_t... Is2>
LLAMA_FN_HOST_ACC_INLINE constexpr auto tupleCatImpl(
const Tuple1& t1,
const Tuple2& t2,
std::index_sequence<Is1...>,
std::index_sequence<Is2...>)
{
return Tuple{get<Is1>(t1)..., get<Is2>(t2)...};
}
} // namespace internal
template<typename Tuple1, typename Tuple2>
LLAMA_FN_HOST_ACC_INLINE constexpr auto tupleCat(const Tuple1& t1, const Tuple2& t2)
{
return internal::tupleCatImpl(
t1,
t2,
std::make_index_sequence<std::tuple_size_v<Tuple1>>{},
std::make_index_sequence<std::tuple_size_v<Tuple2>>{});
}
namespace internal
{
template<std::size_t Pos, typename Tuple, typename Replacement>
struct TupleReplaceImpl
{
LLAMA_FN_HOST_ACC_INLINE
auto operator()(Tuple const tuple, Replacement const replacement)
{
return tupleCat(
llama::Tuple{tuple.first},
TupleReplaceImpl<Pos - 1, typename Tuple::RestTuple, Replacement>()(tuple.rest, replacement));
};
};
template<typename... Elements, typename Replacement>
struct TupleReplaceImpl<0, Tuple<Elements...>, Replacement>
{
LLAMA_FN_HOST_ACC_INLINE
auto operator()(Tuple<Elements...> tuple, Replacement const replacement)
{
return tupleCat(Tuple{replacement}, tuple.rest);
};
};
template<typename OneElement, typename Replacement>
struct TupleReplaceImpl<0, Tuple<OneElement>, Replacement>
{
LLAMA_FN_HOST_ACC_INLINE
auto operator()(Tuple<OneElement>, Replacement const replacement)
{
return Tuple{replacement};
}
};
} // namespace internal
/// Creates a copy of a tuple with the element at position Pos replaced by replacement.
template<std::size_t Pos, typename Tuple, typename Replacement>
LLAMA_FN_HOST_ACC_INLINE auto tupleReplace(Tuple tuple, Replacement replacement)
{
return internal::TupleReplaceImpl<Pos, Tuple, Replacement>()(tuple, replacement);
}
namespace internal
{
template<size_t... Is, typename... Elements, typename Functor>
LLAMA_FN_HOST_ACC_INLINE constexpr auto tupleTransformHelper(
std::index_sequence<Is...>,
const Tuple<Elements...>& tuple,
const Functor& functor)
{
// FIXME(bgruber): nvcc fails to compile
// Tuple{functor(get<Is>(tuple))...}
return Tuple<decltype(functor(std::declval<Elements>()))...>{functor(get<Is>(tuple))...};
}
} // namespace internal
/// Applies a functor to every element of a tuple, creating a new tuple with the result of the element
/// transformations. The functor needs to implement a template `operator()` to which all tuple elements are passed.
// TODO(bgruber): replace by mp11 version in Boost 1.74.
template<typename... Elements, typename Functor>
LLAMA_FN_HOST_ACC_INLINE constexpr auto tupleTransform(const Tuple<Elements...>& tuple, const Functor& functor)
{
return internal::tupleTransformHelper(std::make_index_sequence<sizeof...(Elements)>{}, tuple, functor);
}
/// Returns a copy of the tuple without the first element.
template<typename... Elements>
LLAMA_FN_HOST_ACC_INLINE constexpr auto pop_front(const Tuple<Elements...>& tuple)
{
return tuple.rest;
}
} // namespace llama
// ==
// == ./Tuple.hpp ==
// ============================================================================
// #include <cstddef> // amalgamate: file already included
// #include <string> // amalgamate: file already included
// #include <type_traits> // amalgamate: file already included
namespace llama::mapping::tree
{
template<typename T>
inline constexpr auto one = 1;
template<>
inline constexpr auto one<boost::mp11::mp_size_t<1>> = boost::mp11::mp_size_t<1>{};
template<typename TIdentifier, typename TType, typename CountType = std::size_t>
struct Leaf
{
using Identifier = TIdentifier;
using Type = TType;
const CountType count = one<CountType>;
};
template<typename TIdentifier, typename TChildrenTuple, typename CountType = std::size_t>
struct Node
{
using Identifier = TIdentifier;
using ChildrenTuple = TChildrenTuple;
const CountType count = one<CountType>;
const ChildrenTuple childs = {};
};
template<std::size_t ChildIndex = 0, typename ArrayIndexType = std::size_t>
struct TreeCoordElement
{
static constexpr boost::mp11::mp_size_t<ChildIndex> childIndex = {};
const ArrayIndexType arrayIndex = {};
};
template<std::size_t... Coords>
using TreeCoord = Tuple<TreeCoordElement<Coords, boost::mp11::mp_size_t<0>>...>;
namespace internal
{
template<typename... Coords, std::size_t... Is>
auto treeCoordToString(Tuple<Coords...> treeCoord, std::index_sequence<Is...>) -> std::string
{
auto s
= ((std::to_string(get<Is>(treeCoord).arrayIndex) + ":" + std::to_string(get<Is>(treeCoord).childIndex)
+ ", ")
+ ...);
s.resize(s.length() - 2);
return s;
}
} // namespace internal
template<typename TreeCoord>
auto treeCoordToString(TreeCoord treeCoord) -> std::string
{
return std::string("[ ")
+ internal::treeCoordToString(treeCoord, std::make_index_sequence<std::tuple_size_v<TreeCoord>>{})
+ std::string(" ]");
}
namespace internal
{
template<typename Tag, typename RecordDim, typename CountType>
struct CreateTreeElement
{
using type = Leaf<Tag, RecordDim, boost::mp11::mp_size_t<1>>;
};
template<typename Tag, typename... Fields, typename CountType>
struct CreateTreeElement<Tag, Record<Fields...>, CountType>
{
using type = Node<
Tag,
Tuple<
typename CreateTreeElement<GetFieldTag<Fields>, GetFieldType<Fields>, boost::mp11::mp_size_t<1>>::
type...>,
CountType>;
};
template<typename Tag, typename ChildType, std::size_t Count, typename CountType>
struct CreateTreeElement<Tag, ChildType[Count], CountType>
{
template<std::size_t... Is>
static auto createChildren(std::index_sequence<Is...>)
{
return Tuple<
typename CreateTreeElement<RecordCoord<Is>, ChildType, boost::mp11::mp_size_t<1>>::type...>{};
}
using type = Node<Tag, decltype(createChildren(std::make_index_sequence<Count>{})), CountType>;
};
template<typename Leaf, std::size_t Count>
struct WrapInNNodes
{
using type = Node<NoName, Tuple<typename WrapInNNodes<Leaf, Count - 1>::type>>;
};
template<typename Leaf>
struct WrapInNNodes<Leaf, 0>
{
using type = Leaf;
};
template<typename RecordDim>
using TreeFromRecordDimImpl = typename CreateTreeElement<NoName, RecordDim, std::size_t>::type;
} // namespace internal
template<typename RecordDim>
using TreeFromRecordDim = internal::TreeFromRecordDimImpl<RecordDim>;
template<typename ArrayExtents, typename RecordDim>
using TreeFromDimensions =
typename internal::WrapInNNodes<internal::TreeFromRecordDimImpl<RecordDim>, ArrayExtents::rank - 1>::type;
template<typename RecordDim, std::size_t N, std::size_t Pos = 0>
LLAMA_FN_HOST_ACC_INLINE auto createTree(const ArrayIndex<N>& size)
{
if constexpr(Pos == N - 1)
return TreeFromRecordDim<RecordDim>{size[N - 1]};
else
{
Tuple inner{createTree<RecordDim, N, Pos + 1>(size)};
return Node<NoName, decltype(inner)>{size[Pos], inner};
}
};
namespace internal
{
template<
typename ArrayIndex,
std::size_t... ADIndices,
std::size_t FirstRecordCoord,
std::size_t... RecordCoords>
LLAMA_FN_HOST_ACC_INLINE auto createTreeCoord(
const ArrayIndex& ai,
std::index_sequence<ADIndices...>,
RecordCoord<FirstRecordCoord, RecordCoords...>)
{
return Tuple{
TreeCoordElement<(ADIndices == ArrayIndex::rank - 1 ? FirstRecordCoord : 0)>{ai[ADIndices]}...,
TreeCoordElement<RecordCoords, boost::mp11::mp_size_t<0>>{}...,
TreeCoordElement<0, boost::mp11::mp_size_t<0>>{}};
}
} // namespace internal
template<typename RecordCoord, typename ArrayIndex>
LLAMA_FN_HOST_ACC_INLINE auto createTreeCoord(const ArrayIndex& ai)
{
return internal::createTreeCoord(ai, std::make_index_sequence<ArrayIndex::rank>{}, RecordCoord{});
}
} // namespace llama::mapping::tree
// ==
// == ./mapping/tree/TreeFromDimensions.hpp ==
// ============================================================================
namespace llama::mapping::tree::functor
{
/// Functor for \ref tree::Mapping. Does nothing with the mapping tree. Is used for testing.
struct Idem
{
template<typename Tree>
LLAMA_FN_HOST_ACC_INLINE auto basicToResult(const Tree& tree) const -> Tree
{
return tree;
}
template<typename Tree, typename TreeCoord>
LLAMA_FN_HOST_ACC_INLINE auto basicCoordToResultCoord(const TreeCoord& basicCoord, const Tree&) const
-> TreeCoord
{
return basicCoord;
}
template<typename Tree, typename TreeCoord>
LLAMA_FN_HOST_ACC_INLINE auto resultCoordToBasicCoord(const TreeCoord& resultCoord, const Tree&) const
-> TreeCoord
{
return resultCoord;
}
};
/// Functor for \ref tree::Mapping. Moves all run time parts to the leaves, creating a SoA layout.
struct LeafOnlyRT
{
template<typename Tree>
LLAMA_FN_HOST_ACC_INLINE auto basicToResult(Tree tree) const
{
return basicToResultImpl(tree, 1);
}
template<typename Tree, typename BasicCoord>
LLAMA_FN_HOST_ACC_INLINE auto basicCoordToResultCoord(const BasicCoord& basicCoord, const Tree& tree) const
{
return basicCoordToResultCoordImpl(basicCoord, tree);
}
template<typename Tree, typename ResultCoord>
LLAMA_FN_HOST_ACC_INLINE auto resultCoordToBasicCoord(const ResultCoord& resultCoord, const Tree& /*tree*/)
const -> ResultCoord
{
return resultCoord;
}
private:
template<typename Identifier, typename Type, typename CountType>
LLAMA_FN_HOST_ACC_INLINE static auto basicToResultImpl(
const Node<Identifier, Type, CountType>& node,
std::size_t arraySize)
{
auto children = tupleTransform(
node.childs,
[&](auto element) { return basicToResultImpl(element, LLAMA_COPY(node.count) * arraySize); });
return Node<Identifier, decltype(children), boost::mp11::mp_size_t<1>>{{}, children};
}
template<typename Identifier, typename Type, typename CountType>
LLAMA_FN_HOST_ACC_INLINE static auto basicToResultImpl(
const Leaf<Identifier, Type, CountType>& leaf,
std::size_t arraySize)
{
return Leaf<Identifier, Type, std::size_t>{LLAMA_COPY(leaf.count) * arraySize};
}
template<typename BasicCoord, typename NodeOrLeaf>
LLAMA_FN_HOST_ACC_INLINE static auto basicCoordToResultCoordImpl(
const BasicCoord& basicCoord,
const NodeOrLeaf& nodeOrLeaf,
std::size_t arraySize = 0)
{
if constexpr(std::tuple_size_v<BasicCoord> == 1)
return Tuple{TreeCoordElement<BasicCoord::FirstElement::childIndex>{
arraySize + LLAMA_COPY(basicCoord.first.arrayIndex)}};
else
{
const auto& branch = get<BasicCoord::FirstElement::childIndex>(nodeOrLeaf.childs);
auto first = TreeCoordElement<BasicCoord::FirstElement::childIndex, boost::mp11::mp_size_t<0>>{};
return tupleCat(
Tuple{first},
basicCoordToResultCoordImpl(
basicCoord.rest,
branch,
(arraySize + LLAMA_COPY(basicCoord.first.arrayIndex)) * LLAMA_COPY(branch.count)));
}
}
};
namespace internal
{
template<typename TreeCoord, typename Node>
LLAMA_FN_HOST_ACC_INLINE auto getNode(const Node& node)
{
if constexpr(std::is_same_v<TreeCoord, Tuple<>>)
return node;
else
return getNode<typename TreeCoord::RestTuple>(get<TreeCoord::FirstElement::childIndex>(node.childs));
}
template<typename TreeCoord, typename Identifier, typename Type, typename CountType>
LLAMA_FN_HOST_ACC_INLINE auto changeNodeRuntime(
const Node<Identifier, Type, CountType>& tree,
std::size_t newValue)
{
if constexpr(std::is_same_v<TreeCoord, Tuple<>>)
return Node<Identifier, Type>{newValue, tree.childs};
else
{
auto current = get<TreeCoord::FirstElement::childIndex>(tree.childs);
auto replacement = changeNodeRuntime<typename TreeCoord::RestTuple>(current, newValue);
auto children = tupleReplace<TreeCoord::FirstElement::childIndex>(tree.childs, replacement);
return Node<Identifier, decltype(children)>{tree.count, children};
}
}
template<typename TreeCoord, typename Identifier, typename Type, typename CountType>
LLAMA_FN_HOST_ACC_INLINE auto changeNodeRuntime(
const Leaf<Identifier, Type, CountType>& /*tree*/,
std::size_t newValue)
{
return Leaf<Identifier, Type, std::size_t>{newValue};
}
struct ChangeNodeChildsRuntimeFunctor
{
const std::size_t newValue;
template<typename Identifier, typename Type, typename CountType>
LLAMA_FN_HOST_ACC_INLINE auto operator()(const Node<Identifier, Type, CountType>& element) const
{
return Node<Identifier, Type, std::size_t>{element.count * newValue, element.childs};
}
template<typename Identifier, typename Type, typename CountType>
LLAMA_FN_HOST_ACC_INLINE auto operator()(const Leaf<Identifier, Type, CountType>& element) const
{
return Leaf<Identifier, Type, std::size_t>{element.count * newValue};
}
};
template<typename TreeCoord, typename Identifier, typename Type, typename CountType>
LLAMA_FN_HOST_ACC_INLINE auto changeNodeChildsRuntime(
const Node<Identifier, Type, CountType>& tree,
std::size_t newValue)
{
if constexpr(std::is_same_v<TreeCoord, Tuple<>>)
{
auto children = tupleTransform(tree.childs, ChangeNodeChildsRuntimeFunctor{newValue});
return Node<Identifier, decltype(children)>{tree.count, children};
}
else
{
auto current = get<TreeCoord::FirstElement::childIndex>(tree.childs);
auto replacement = changeNodeChildsRuntime<typename TreeCoord::RestTuple>(current, newValue);
auto children = tupleReplace<TreeCoord::FirstElement::childIndex>(tree.childs, replacement);
return Node<Identifier, decltype(children)>{tree.count, children};
}
}
template<typename TreeCoord, typename Identifier, typename Type, typename CountType>
LLAMA_FN_HOST_ACC_INLINE auto changeNodeChildsRuntime(
const Leaf<Identifier, Type, CountType>& tree,
std::size_t /*newValue*/)
{
return tree;
}
} // namespace internal
/// Functor for \ref tree::Mapping. Move the run time part of a node one level down in direction of the leaves by
/// the given amount (runtime or compile time value).
/// \tparam TreeCoord tree coordinate in the mapping tree which's run time part shall be moved down one level
/// \see tree::Mapping
template<typename TreeCoord, typename Amount = std::size_t>
struct MoveRTDown
{
const Amount amount = {};
template<typename Tree>
LLAMA_FN_HOST_ACC_INLINE auto basicToResult(const Tree& tree) const
{
return internal::changeNodeChildsRuntime<TreeCoord>(
internal::changeNodeRuntime<TreeCoord>(
tree,
// NOLINTNEXTLINE(clang-analyzer-core.DivideZero)
(internal::getNode<TreeCoord>(tree).count + amount - 1) / amount),
amount);
}
template<typename Tree, typename BasicCoord>
LLAMA_FN_HOST_ACC_INLINE auto basicCoordToResultCoord(const BasicCoord& basicCoord, const Tree& tree) const
{
return basicCoordToResultCoordImpl<TreeCoord>(basicCoord, tree);
}
template<typename Tree, typename ResultCoord>
LLAMA_FN_HOST_ACC_INLINE auto resultCoordToBasicCoord(const ResultCoord& resultCoord, const Tree&) const
-> ResultCoord
{
return resultCoord;
}
private:
template<typename InternalTreeCoord, typename BasicCoord, typename Tree>
LLAMA_FN_HOST_ACC_INLINE auto basicCoordToResultCoordImpl(const BasicCoord& basicCoord, const Tree& tree) const
{
if constexpr(std::is_same_v<InternalTreeCoord, Tuple<>>)
{
if constexpr(std::is_same_v<BasicCoord, Tuple<>>)
return Tuple{};
else
{
const auto& childTree = get<BasicCoord::FirstElement::childIndex>(tree.childs);
const auto rt1 = basicCoord.first.arrayIndex / amount;
const auto rt2
= basicCoord.first.arrayIndex % amount * childTree.count + basicCoord.rest.first.arrayIndex;
auto rt1Child = TreeCoordElement<BasicCoord::FirstElement::childIndex>{rt1};
auto rt2Child = TreeCoordElement<BasicCoord::RestTuple::FirstElement::childIndex>{rt2};
return tupleCat(Tuple{rt1Child}, tupleCat(Tuple{rt2Child}, pop_front(basicCoord.rest)));
}
}
else
{
if constexpr(InternalTreeCoord::FirstElement::childIndex != BasicCoord::FirstElement::childIndex)
return basicCoord;
else
{
auto rest = basicCoordToResultCoordImpl<typename InternalTreeCoord::RestTuple>(
pop_front(basicCoord),
get<BasicCoord::FirstElement::childIndex>(tree.childs));
return tupleCat(Tuple{basicCoord.first}, rest);
}
}
}
};
template<typename TreeCoord, std::size_t Amount>
using MoveRTDownFixed = MoveRTDown<TreeCoord, boost::mp11::mp_size_t<Amount>>;
} // namespace llama::mapping::tree::functor
// ==
// == ./mapping/tree/Functors.hpp ==
// ============================================================================
// #include "TreeFromDimensions.hpp" // amalgamate: file already expanded
// ============================================================================
// == ./mapping/tree/toString.hpp ==
// ==
// Copyright 2018 Alexander Matthes
// SPDX-License-Identifier: GPL-3.0-or-later
// #pragma once
// #include "TreeFromDimensions.hpp" // amalgamate: file already expanded
// #include <boost/core/demangle.hpp> // amalgamate: file already included
// #include <string> // amalgamate: file already included
#include <typeinfo>
namespace llama::mapping::tree
{
template<typename T>
auto toString(T) -> std::string
{
return "Unknown";
}
// handles array indices
template<std::size_t I>
inline auto toString(RecordCoord<I>) -> std::string
{
return "";
}
inline auto toString(NoName) -> std::string
{
return "";
}
template<typename... Elements>
auto toString(Tuple<Elements...> tree) -> std::string
{
if constexpr(sizeof...(Elements) > 1)
return toString(tree.first) + " , " + toString(tree.rest);
else
return toString(tree.first);
}
namespace internal
{
inline void replace_all(std::string& str, const std::string& search, const std::string& replace)
{
std::string::size_type i = 0;
while((i = str.find(search, i)) != std::string::npos)
{
str.replace(i, search.length(), replace);
i += replace.length();
}
}
template<typename NodeOrLeaf>
auto countAndIdentToString(const NodeOrLeaf& nodeOrLeaf) -> std::string
{
auto r = std::to_string(nodeOrLeaf.count);
if constexpr(std::is_same_v<std::decay_t<decltype(nodeOrLeaf.count)>, std::size_t>)
r += "R"; // runtime
else
r += "C"; // compile time
r += std::string{" * "} + toString(typename NodeOrLeaf::Identifier{});
return r;
}
} // namespace internal
template<typename Identifier, typename Type, typename CountType>
auto toString(const Node<Identifier, Type, CountType>& node) -> std::string
{
return internal::countAndIdentToString(node) + "[ " + toString(node.childs) + " ]";
}
template<typename Identifier, typename Type, typename CountType>
auto toString(const Leaf<Identifier, Type, CountType>& leaf) -> std::string
{
auto raw = boost::core::demangle(typeid(Type).name());
#ifdef _MSC_VER
internal::replace_all(raw, " __cdecl(void)", "");
#endif
#ifdef __GNUG__
internal::replace_all(raw, " ()", "");
#endif
return internal::countAndIdentToString(leaf) + "(" + raw + ")";
}
} // namespace llama::mapping::tree
// ==
// == ./mapping/tree/toString.hpp ==
// ============================================================================
// #include <type_traits> // amalgamate: file already included
namespace llama::mapping::tree
{
namespace internal
{
template<typename Tree, typename TreeOperationList>
struct MergeFunctors
{
};
template<typename Tree, typename... Operations>
struct MergeFunctors<Tree, Tuple<Operations...>>
{
boost::mp11::mp_first<Tuple<Operations...>> operation = {};
using ResultTree = decltype(operation.basicToResult(Tree()));
ResultTree treeAfterOp;
MergeFunctors<ResultTree, boost::mp11::mp_drop_c<Tuple<Operations...>, 1>> next = {};
MergeFunctors() = default;
LLAMA_FN_HOST_ACC_INLINE
MergeFunctors(const Tree& tree, const Tuple<Operations...>& treeOperationList)
: operation(treeOperationList.first)
, treeAfterOp(operation.basicToResult(tree))
, next(treeAfterOp, pop_front(treeOperationList))
{
}
LLAMA_FN_HOST_ACC_INLINE
auto basicToResult(const Tree& tree) const
{
if constexpr(sizeof...(Operations) > 1)
return next.basicToResult(treeAfterOp);
else if constexpr(sizeof...(Operations) == 1)
return operation.basicToResult(tree);
else
return tree;
}
template<typename TreeCoord>
LLAMA_FN_HOST_ACC_INLINE auto basicCoordToResultCoord(const TreeCoord& basicCoord, const Tree& tree) const
{
if constexpr(sizeof...(Operations) >= 1)
return next.basicCoordToResultCoord(
operation.basicCoordToResultCoord(basicCoord, tree),
treeAfterOp);
else
return basicCoord;
}
template<typename TreeCoord>
LLAMA_FN_HOST_ACC_INLINE auto resultCoordToBasicCoord(const TreeCoord& resultCoord, const Tree& tree) const
{
if constexpr(sizeof...(Operations) >= 1)
return next.resultCoordToBasicCoord(
operation.resultCoordToBasicCoord(resultCoord, tree),
operation.basicToResult(tree));
else
return resultCoord;
}
};
template<typename Tree>
struct MergeFunctors<Tree, Tuple<>>
{
MergeFunctors() = default;
LLAMA_FN_HOST_ACC_INLINE
MergeFunctors(const Tree&, const Tuple<>&)
{
}
LLAMA_FN_HOST_ACC_INLINE
auto basicToResult(const Tree& tree) const
{
return tree;
}
template<typename TreeCoord>
LLAMA_FN_HOST_ACC_INLINE auto basicCoordToResultCoord(TreeCoord const& basicCoord, Tree const& /*tree*/)
const -> TreeCoord
{
return basicCoord;
}
template<typename TreeCoord>
LLAMA_FN_HOST_ACC_INLINE auto resultCoordToBasicCoord(TreeCoord const& resultCoord, Tree const& /*tree*/)
const -> TreeCoord
{
return resultCoord;
}
};
template<typename Identifier, typename Type, typename CountType>
LLAMA_FN_HOST_ACC_INLINE auto getTreeBlobSize(const Node<Identifier, Type, CountType>& node) -> std::size_t;
template<typename Identifier, typename Type, typename CountType>
LLAMA_FN_HOST_ACC_INLINE auto getTreeBlobSize(const Leaf<Identifier, Type, CountType>& leaf) -> std::size_t;
template<typename... Children, std::size_t... Is, typename Count>
LLAMA_FN_HOST_ACC_INLINE auto getChildrenBlobSize(
const Tuple<Children...>& childs,
std::index_sequence<Is...> /*ii*/,
const Count& count) -> std::size_t
{
return count * (getTreeBlobSize(get<Is>(childs)) + ...);
}
template<typename Identifier, typename Type, typename CountType>
LLAMA_FN_HOST_ACC_INLINE auto getTreeBlobSize(const Node<Identifier, Type, CountType>& node) -> std::size_t
{
constexpr std::size_t childCount = boost::mp11::mp_size<std::decay_t<decltype(node.childs)>>::value;
return getChildrenBlobSize(node.childs, std::make_index_sequence<childCount>{}, LLAMA_COPY(node.count));
}
template<typename Identifier, typename Type, typename CountType>
LLAMA_FN_HOST_ACC_INLINE auto getTreeBlobSize(const Leaf<Identifier, Type, CountType>& leaf) -> std::size_t
{
return leaf.count * sizeof(Type);
}
template<typename Childs, typename CountType>
LLAMA_FN_HOST_ACC_INLINE auto getTreeBlobSize(const Childs& childs, const CountType& count) -> std::size_t
{
return getTreeBlobSize(Node<NoName, Childs, CountType>{count, childs});
}
template<std::size_t MaxPos, typename Identifier, typename Type, typename CountType, std::size_t... Is>
LLAMA_FN_HOST_ACC_INLINE auto sumChildrenSmallerThan(
const Node<Identifier, Type, CountType>& node,
std::index_sequence<Is...>) -> std::size_t
{
return ((getTreeBlobSize(get<Is>(node.childs)) * (Is < MaxPos)) + ...);
}
template<typename Tree, typename... Coords>
LLAMA_FN_HOST_ACC_INLINE auto getTreeBlobByte(const Tree& tree, const Tuple<Coords...>& treeCoord)
-> std::size_t
{
const auto firstArrayIndex = treeCoord.first.arrayIndex;
if constexpr(sizeof...(Coords) > 1)
{
constexpr auto firstChildIndex = decltype(treeCoord.first.childIndex)::value;
return getTreeBlobSize(tree.childs, firstArrayIndex)
+ sumChildrenSmallerThan<firstChildIndex>(
tree,
std::make_index_sequence<std::tuple_size_v<typename Tree::ChildrenTuple>>{})
+ getTreeBlobByte(get<firstChildIndex>(tree.childs), treeCoord.rest);
}
else
return sizeof(typename Tree::Type) * firstArrayIndex;
}
} // namespace internal
/// An experimental attempt to provide a general purpose description of a mapping. \ref Array and record
/// dimensions are represented by a compile time tree data structure. This tree is mapped into memory by means of a
/// breadth-first tree traversal. By specifying additional tree operations, the tree can be modified at compile
/// time before being mapped to memory.
template<typename TArrayExtents, typename TRecordDim, typename TreeOperationList>
struct Mapping : private TArrayExtents
{
using ArrayExtents = TArrayExtents;
using ArrayIndex = typename ArrayExtents::Index;
using RecordDim = TRecordDim;
using BasicTree = TreeFromDimensions<ArrayExtents, RecordDim>;
// TODO(bgruber): , support more than one blob
static constexpr std::size_t blobCount = 1;
using MergedFunctors = internal::MergeFunctors<BasicTree, TreeOperationList>;
BasicTree basicTree;
MergedFunctors mergedFunctors;
using ResultTree = decltype(mergedFunctors.basicToResult(basicTree));
ResultTree resultTree;
Mapping() = default;
LLAMA_FN_HOST_ACC_INLINE
Mapping(ArrayExtents extents, TreeOperationList treeOperationList, RecordDim = {})
: ArrayExtents(extents)
, basicTree(createTree<RecordDim>(extents.toArray()))
, mergedFunctors(basicTree, treeOperationList)
, resultTree(mergedFunctors.basicToResult(basicTree))
{
}
LLAMA_FN_HOST_ACC_INLINE auto extents() const -> ArrayExtents
{
return ArrayExtents{*this};
}
LLAMA_FN_HOST_ACC_INLINE
auto blobSize(std::size_t const) const -> std::size_t
{
return internal::getTreeBlobSize(resultTree);
}
template<std::size_t... RecordCoords>
LLAMA_FN_HOST_ACC_INLINE auto blobNrAndOffset(ArrayIndex ai, RecordCoord<RecordCoords...> = {}) const
-> NrAndOffset
{
auto const basicTreeCoord = createTreeCoord<RecordCoord<RecordCoords...>>(ai);
auto const resultTreeCoord = mergedFunctors.basicCoordToResultCoord(basicTreeCoord, basicTree);
const auto offset = internal::getTreeBlobByte(resultTree, resultTreeCoord);
return {0, offset};
}
};
} // namespace llama::mapping::tree
// ==
// == ./mapping/tree/Mapping.hpp ==
// ============================================================================
#ifdef __NVCC__
# pragma pop
#endif
// ==
// == ./llama.hpp ==
// ============================================================================
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment