-
-
Save iOrange/02be3523331eae9940e0b2bc0ce9da7e to your computer and use it in GitHub Desktop.
AES header only library
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#pragma once | |
#define AES_COMPILER_UNKNOWN 0 | |
#define AES_COMPILER_MSVC 1 | |
#define AES_COMPILER_GCC 2 | |
#define AES_COMPILER_ICC 3 | |
#define AES_COMPILER_CLANG 4 | |
#if defined(__clang__) | |
#define AES_COMPILER AES_COMPILER_CLANG | |
#define restrict_ptr __restrict__ | |
#if (defined(__i386__) || defined(__x86_64__) || defined(__amd64__)) && (defined(__AES__) || (__clang_major__== 3 && __clang_minor__ >= 8) || (__clang_major__> 3)) && __has_include(<x86intrin.h>) && __has_include(<wmmintrin.h>) && __has_include(<cpuid.h>) | |
#include <cpuid.h> | |
#include <x86intrin.h> | |
#include <wmmintrin.h> | |
#define LITTLE_ENDIAN | |
#define OPT_AES_NI | |
#if ((__clang_major__== 3 && __clang_minor__ >= 8) || (__clang_major__> 3)) | |
#define ENABLE_AES_NI_ATTRIBUTE __attribute__((target("aes"))) | |
#else | |
#define ENABLE_AES_NI_ATTRIBUTE | |
#endif | |
#else | |
#pragma message("enable aes instructions") | |
#define ENABLE_AES_NI_ATTRIBUTE | |
#endif | |
#elif defined(__ICC) || defined(__INTEL_COMPILER) | |
#define AES_COMPILER AES_COMPILER_ICC | |
#define restrict_ptr __restrict__ | |
#if __INTEL_COMPILER >= 1110 | |
#include <x86intrin.h> | |
#include <wmmintrin.h> | |
#define OPT_AES_NI | |
#include <cpuid.h> | |
#define LITTLE_ENDIAN | |
#endif | |
#define ENABLE_AES_NI_ATTRIBUTE | |
#elif defined(__GNUC__) || defined(__GNUG__) | |
#define AES_COMPILER AES_COMPILER_GCC | |
#define restrict_ptr __restrict__ | |
#if defined(__i386__) || defined(__x86_64__) || defined(__amd64__) | |
#include <x86intrin.h> | |
#include <wmmintrin.h> | |
#define OPT_AES_NI | |
#pragma GCC target("aes") | |
#include <cpuid.h> | |
#endif | |
#define ENABLE_AES_NI_ATTRIBUTE | |
#elif defined(_MSC_VER) | |
#define AES_COMPILER AES_COMPILER_MSVC | |
#define restrict_ptr __restrict | |
#if (defined(_M_X64) || defined(_M_AMD64) || defined(_M_IX86)) && (_MSC_FULL_VER >= 150030729) | |
#define OPT_AES_NI | |
#include <intrin.h> | |
#include <wmmintrin.h> | |
#define LITTLE_ENDIAN | |
#endif | |
#define ENABLE_AES_NI_ATTRIBUTE | |
#else | |
#define AES_COMPILER AES_COMPILER_UNKNOWN | |
#define restrict_ptr | |
#define ENABLE_AES_NI_ATTRIBUTE | |
#endif | |
namespace AES | |
{ | |
typedef unsigned char aes_uint8_t; | |
static_assert(sizeof(aes_uint8_t) == 1, "have to be 8 bits long"); | |
typedef unsigned int aes_uint32_t; | |
static_assert(sizeof(aes_uint32_t) == 4, "have to be 32 bits long"); | |
#ifdef OPT_AES_NI | |
// #define _mm_loadu_si128 * | |
static bool checkAesNi() noexcept | |
{ | |
#if AES_COMPILER == AES_COMPILER_MSVC | |
int output[4] = { 0, 0, 0, 0 }; | |
__cpuid(output, 1); | |
return (output[2] & (1 << 25)) != 0; | |
#elif AES_COMPILER == AES_COMPILER_GCC || AES_COMPILER == AES_COMPILER_CLANG || AES_COMPILER == AES_COMPILER_ICC | |
unsigned int output[4] = { 0, 0, 0, 0 }; | |
__get_cpuid(1, &output[0], &output[1], &output[2], &output[3]); | |
return (output[2] & (1 << 25)) != 0; | |
#else | |
return false; | |
#endif | |
} | |
static bool haveAesNi() noexcept | |
{ | |
static /*thread_local*/ const bool ret = checkAesNi(); | |
return ret; | |
//return true; | |
} | |
#endif | |
template<unsigned int Nk, unsigned int Nr> | |
class AES | |
{ | |
static constexpr unsigned int Nb = 4; | |
alignas(16) aes_uint8_t m_roundKey[Nb * (Nr + 1) * 4]; | |
static inline aes_uint8_t SBOX(aes_uint8_t x) noexcept { | |
static const aes_uint8_t val[] = { | |
0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76, | |
0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0, | |
0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15, | |
0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75, | |
0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84, | |
0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf, | |
0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8, | |
0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2, | |
0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73, | |
0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb, | |
0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79, | |
0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08, | |
0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a, | |
0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e, | |
0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf, | |
0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 | |
}; | |
return val[x]; | |
} | |
static inline aes_uint8_t SBOX_INV(aes_uint8_t x) noexcept { | |
static const aes_uint8_t val[] = { | |
0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb, | |
0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb, | |
0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e, | |
0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25, | |
0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92, | |
0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84, | |
0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06, | |
0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b, | |
0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73, | |
0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e, | |
0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b, | |
0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4, | |
0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f, | |
0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef, | |
0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61, | |
0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d | |
}; | |
return val[x]; | |
} | |
static inline aes_uint8_t RCON(unsigned round) noexcept | |
{ | |
static const aes_uint8_t rcon_val[] = { | |
0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36, 0x6c, 0xd8, 0xab, 0x4d, 0x9a, | |
0x2f, 0x5e, 0xbc, 0x63, 0xc6, 0x97, 0x35, 0x6a, 0xd4, 0xb3, 0x7d, 0xfa, 0xef, 0xc5, 0x91, 0x39 | |
}; | |
return aes_uint8_t(rcon_val[round]); | |
}; | |
static inline aes_uint32_t T_E4(aes_uint8_t x) noexcept { | |
static const aes_uint32_t val[] = { | |
0x00000000, 0x03010102, 0x06020204, 0x05030306, 0x0c040408, 0x0f05050a, 0x0a06060c, 0x0907070e, 0x18080810, 0x1b090912, 0x1e0a0a14, 0x1d0b0b16, 0x140c0c18, 0x170d0d1a, 0x120e0e1c, 0x110f0f1e, | |
0x30101020, 0x33111122, 0x36121224, 0x35131326, 0x3c141428, 0x3f15152a, 0x3a16162c, 0x3917172e, 0x28181830, 0x2b191932, 0x2e1a1a34, 0x2d1b1b36, 0x241c1c38, 0x271d1d3a, 0x221e1e3c, 0x211f1f3e, | |
0x60202040, 0x63212142, 0x66222244, 0x65232346, 0x6c242448, 0x6f25254a, 0x6a26264c, 0x6927274e, 0x78282850, 0x7b292952, 0x7e2a2a54, 0x7d2b2b56, 0x742c2c58, 0x772d2d5a, 0x722e2e5c, 0x712f2f5e, | |
0x50303060, 0x53313162, 0x56323264, 0x55333366, 0x5c343468, 0x5f35356a, 0x5a36366c, 0x5937376e, 0x48383870, 0x4b393972, 0x4e3a3a74, 0x4d3b3b76, 0x443c3c78, 0x473d3d7a, 0x423e3e7c, 0x413f3f7e, | |
0xc0404080, 0xc3414182, 0xc6424284, 0xc5434386, 0xcc444488, 0xcf45458a, 0xca46468c, 0xc947478e, 0xd8484890, 0xdb494992, 0xde4a4a94, 0xdd4b4b96, 0xd44c4c98, 0xd74d4d9a, 0xd24e4e9c, 0xd14f4f9e, | |
0xf05050a0, 0xf35151a2, 0xf65252a4, 0xf55353a6, 0xfc5454a8, 0xff5555aa, 0xfa5656ac, 0xf95757ae, 0xe85858b0, 0xeb5959b2, 0xee5a5ab4, 0xed5b5bb6, 0xe45c5cb8, 0xe75d5dba, 0xe25e5ebc, 0xe15f5fbe, | |
0xa06060c0, 0xa36161c2, 0xa66262c4, 0xa56363c6, 0xac6464c8, 0xaf6565ca, 0xaa6666cc, 0xa96767ce, 0xb86868d0, 0xbb6969d2, 0xbe6a6ad4, 0xbd6b6bd6, 0xb46c6cd8, 0xb76d6dda, 0xb26e6edc, 0xb16f6fde, | |
0x907070e0, 0x937171e2, 0x967272e4, 0x957373e6, 0x9c7474e8, 0x9f7575ea, 0x9a7676ec, 0x997777ee, 0x887878f0, 0x8b7979f2, 0x8e7a7af4, 0x8d7b7bf6, 0x847c7cf8, 0x877d7dfa, 0x827e7efc, 0x817f7ffe, | |
0x9b80801b, 0x98818119, 0x9d82821f, 0x9e83831d, 0x97848413, 0x94858511, 0x91868617, 0x92878715, 0x8388880b, 0x80898909, 0x858a8a0f, 0x868b8b0d, 0x8f8c8c03, 0x8c8d8d01, 0x898e8e07, 0x8a8f8f05, | |
0xab90903b, 0xa8919139, 0xad92923f, 0xae93933d, 0xa7949433, 0xa4959531, 0xa1969637, 0xa2979735, 0xb398982b, 0xb0999929, 0xb59a9a2f, 0xb69b9b2d, 0xbf9c9c23, 0xbc9d9d21, 0xb99e9e27, 0xba9f9f25, | |
0xfba0a05b, 0xf8a1a159, 0xfda2a25f, 0xfea3a35d, 0xf7a4a453, 0xf4a5a551, 0xf1a6a657, 0xf2a7a755, 0xe3a8a84b, 0xe0a9a949, 0xe5aaaa4f, 0xe6abab4d, 0xefacac43, 0xecadad41, 0xe9aeae47, 0xeaafaf45, | |
0xcbb0b07b, 0xc8b1b179, 0xcdb2b27f, 0xceb3b37d, 0xc7b4b473, 0xc4b5b571, 0xc1b6b677, 0xc2b7b775, 0xd3b8b86b, 0xd0b9b969, 0xd5baba6f, 0xd6bbbb6d, 0xdfbcbc63, 0xdcbdbd61, 0xd9bebe67, 0xdabfbf65, | |
0x5bc0c09b, 0x58c1c199, 0x5dc2c29f, 0x5ec3c39d, 0x57c4c493, 0x54c5c591, 0x51c6c697, 0x52c7c795, 0x43c8c88b, 0x40c9c989, 0x45caca8f, 0x46cbcb8d, 0x4fcccc83, 0x4ccdcd81, 0x49cece87, 0x4acfcf85, | |
0x6bd0d0bb, 0x68d1d1b9, 0x6dd2d2bf, 0x6ed3d3bd, 0x67d4d4b3, 0x64d5d5b1, 0x61d6d6b7, 0x62d7d7b5, 0x73d8d8ab, 0x70d9d9a9, 0x75dadaaf, 0x76dbdbad, 0x7fdcdca3, 0x7cdddda1, 0x79dedea7, 0x7adfdfa5, | |
0x3be0e0db, 0x38e1e1d9, 0x3de2e2df, 0x3ee3e3dd, 0x37e4e4d3, 0x34e5e5d1, 0x31e6e6d7, 0x32e7e7d5, 0x23e8e8cb, 0x20e9e9c9, 0x25eaeacf, 0x26ebebcd, 0x2fececc3, 0x2cededc1, 0x29eeeec7, 0x2aefefc5, | |
0x0bf0f0fb, 0x08f1f1f9, 0x0df2f2ff, 0x0ef3f3fd, 0x07f4f4f3, 0x04f5f5f1, 0x01f6f6f7, 0x02f7f7f5, 0x13f8f8eb, 0x10f9f9e9, 0x15fafaef, 0x16fbfbed, 0x1ffcfce3, 0x1cfdfde1, 0x19fefee7, 0x1affffe5, | |
}; | |
return val[(unsigned int)(x)]; | |
} | |
static inline aes_uint32_t T_D4(aes_uint8_t x) noexcept { | |
static const aes_uint32_t val[] = { | |
0x00000000, 0x0b0d090e, 0x161a121c, 0x1d171b12, 0x2c342438, 0x27392d36, 0x3a2e3624, 0x31233f2a, 0x58684870, 0x5365417e, 0x4e725a6c, 0x457f5362, 0x745c6c48, 0x7f516546, 0x62467e54, 0x694b775a, | |
0xb0d090e0, 0xbbdd99ee, 0xa6ca82fc, 0xadc78bf2, 0x9ce4b4d8, 0x97e9bdd6, 0x8afea6c4, 0x81f3afca, 0xe8b8d890, 0xe3b5d19e, 0xfea2ca8c, 0xf5afc382, 0xc48cfca8, 0xcf81f5a6, 0xd296eeb4, 0xd99be7ba, | |
0x7bbb3bdb, 0x70b632d5, 0x6da129c7, 0x66ac20c9, 0x578f1fe3, 0x5c8216ed, 0x41950dff, 0x4a9804f1, 0x23d373ab, 0x28de7aa5, 0x35c961b7, 0x3ec468b9, 0x0fe75793, 0x04ea5e9d, 0x19fd458f, 0x12f04c81, | |
0xcb6bab3b, 0xc066a235, 0xdd71b927, 0xd67cb029, 0xe75f8f03, 0xec52860d, 0xf1459d1f, 0xfa489411, 0x9303e34b, 0x980eea45, 0x8519f157, 0x8e14f859, 0xbf37c773, 0xb43ace7d, 0xa92dd56f, 0xa220dc61, | |
0xf66d76ad, 0xfd607fa3, 0xe07764b1, 0xeb7a6dbf, 0xda595295, 0xd1545b9b, 0xcc434089, 0xc74e4987, 0xae053edd, 0xa50837d3, 0xb81f2cc1, 0xb31225cf, 0x82311ae5, 0x893c13eb, 0x942b08f9, 0x9f2601f7, | |
0x46bde64d, 0x4db0ef43, 0x50a7f451, 0x5baafd5f, 0x6a89c275, 0x6184cb7b, 0x7c93d069, 0x779ed967, 0x1ed5ae3d, 0x15d8a733, 0x08cfbc21, 0x03c2b52f, 0x32e18a05, 0x39ec830b, 0x24fb9819, 0x2ff69117, | |
0x8dd64d76, 0x86db4478, 0x9bcc5f6a, 0x90c15664, 0xa1e2694e, 0xaaef6040, 0xb7f87b52, 0xbcf5725c, 0xd5be0506, 0xdeb30c08, 0xc3a4171a, 0xc8a91e14, 0xf98a213e, 0xf2872830, 0xef903322, 0xe49d3a2c, | |
0x3d06dd96, 0x360bd498, 0x2b1ccf8a, 0x2011c684, 0x1132f9ae, 0x1a3ff0a0, 0x0728ebb2, 0x0c25e2bc, 0x656e95e6, 0x6e639ce8, 0x737487fa, 0x78798ef4, 0x495ab1de, 0x4257b8d0, 0x5f40a3c2, 0x544daacc, | |
0xf7daec41, 0xfcd7e54f, 0xe1c0fe5d, 0xeacdf753, 0xdbeec879, 0xd0e3c177, 0xcdf4da65, 0xc6f9d36b, 0xafb2a431, 0xa4bfad3f, 0xb9a8b62d, 0xb2a5bf23, 0x83868009, 0x888b8907, 0x959c9215, 0x9e919b1b, | |
0x470a7ca1, 0x4c0775af, 0x51106ebd, 0x5a1d67b3, 0x6b3e5899, 0x60335197, 0x7d244a85, 0x7629438b, 0x1f6234d1, 0x146f3ddf, 0x097826cd, 0x02752fc3, 0x335610e9, 0x385b19e7, 0x254c02f5, 0x2e410bfb, | |
0x8c61d79a, 0x876cde94, 0x9a7bc586, 0x9176cc88, 0xa055f3a2, 0xab58faac, 0xb64fe1be, 0xbd42e8b0, 0xd4099fea, 0xdf0496e4, 0xc2138df6, 0xc91e84f8, 0xf83dbbd2, 0xf330b2dc, 0xee27a9ce, 0xe52aa0c0, | |
0x3cb1477a, 0x37bc4e74, 0x2aab5566, 0x21a65c68, 0x10856342, 0x1b886a4c, 0x069f715e, 0x0d927850, 0x64d90f0a, 0x6fd40604, 0x72c31d16, 0x79ce1418, 0x48ed2b32, 0x43e0223c, 0x5ef7392e, 0x55fa3020, | |
0x01b79aec, 0x0aba93e2, 0x17ad88f0, 0x1ca081fe, 0x2d83bed4, 0x268eb7da, 0x3b99acc8, 0x3094a5c6, 0x59dfd29c, 0x52d2db92, 0x4fc5c080, 0x44c8c98e, 0x75ebf6a4, 0x7ee6ffaa, 0x63f1e4b8, 0x68fcedb6, | |
0xb1670a0c, 0xba6a0302, 0xa77d1810, 0xac70111e, 0x9d532e34, 0x965e273a, 0x8b493c28, 0x80443526, 0xe90f427c, 0xe2024b72, 0xff155060, 0xf418596e, 0xc53b6644, 0xce366f4a, 0xd3217458, 0xd82c7d56, | |
0x7a0ca137, 0x7101a839, 0x6c16b32b, 0x671bba25, 0x5638850f, 0x5d358c01, 0x40229713, 0x4b2f9e1d, 0x2264e947, 0x2969e049, 0x347efb5b, 0x3f73f255, 0x0e50cd7f, 0x055dc471, 0x184adf63, 0x1347d66d, | |
0xcadc31d7, 0xc1d138d9, 0xdcc623cb, 0xd7cb2ac5, 0xe6e815ef, 0xede51ce1, 0xf0f207f3, 0xfbff0efd, 0x92b479a7, 0x99b970a9, 0x84ae6bbb, 0x8fa362b5, 0xbe805d9f, 0xb58d5491, 0xa89a4f83, 0xa397468d, | |
}; | |
return val[(unsigned int)(x)]; | |
} | |
static inline aes_uint32_t rot(aes_uint32_t val, unsigned bits) | |
{ | |
return (val << bits) | (val >> (32 - bits)); | |
} | |
static inline void write(aes_uint8_t ret[4], aes_uint32_t val) | |
{ | |
#ifdef LITTLE_ENDIAN | |
*reinterpret_cast<aes_uint32_t*>(&ret[0]) = val; | |
#else | |
ret[0] = aes_uint8_t(val & 0xFF); | |
ret[1] = aes_uint8_t((val >> 8) & 0xFF); | |
ret[2] = aes_uint8_t((val >> 16) & 0xFF); | |
ret[3] = aes_uint8_t((val >> 24) & 0xFF); | |
#endif | |
} | |
static inline aes_uint32_t read(const aes_uint8_t val[4]) | |
{ | |
#ifdef LITTLE_ENDIAN | |
return *reinterpret_cast<const aes_uint32_t*>(&val[0]); | |
#else | |
return aes_uint32_t(val[0]) | (aes_uint32_t(val[1]) << 8) | (aes_uint32_t(val[2]) << 16) | (aes_uint32_t(val[3]) << 24); | |
#endif | |
} | |
static inline void MixColumn(aes_uint8_t ret[4], aes_uint8_t a, aes_uint8_t b, aes_uint8_t c, aes_uint8_t d, const aes_uint8_t * restrict_ptr roundKey) noexcept | |
{ | |
write(ret, aes_uint32_t(T_E4(a) ^ rot(T_E4(b), 8) ^ rot(T_E4(c), 16) ^ rot(T_E4(d), 24) ^ read(roundKey))); | |
} | |
static inline void InvMixColumn(aes_uint8_t ret[4], aes_uint8_t a, aes_uint8_t b, aes_uint8_t c, aes_uint8_t d) noexcept | |
{ | |
write(ret, aes_uint32_t(T_D4(a) ^ rot(T_D4(b), 8) ^ rot(T_D4(c), 16) ^ rot(T_D4(d), 24))); | |
} | |
static inline aes_uint8_t keyChar(const aes_uint8_t* key, unsigned int length, unsigned int i) noexcept | |
{ | |
return (i < length) ? key[i] : 0u; | |
} | |
static inline void KeyExpansion(aes_uint8_t roundKey[Nb * (Nr + 1) * 4], const aes_uint8_t* restrict_ptr key, unsigned int length) noexcept | |
{ | |
for (unsigned i = 0; i < Nk * 4; i++) | |
{ | |
roundKey[i] = keyChar(key, length, i); | |
} | |
for (unsigned i = Nk; i < Nb * (Nr + 1); i++) | |
{ | |
aes_uint8_t temp0 = roundKey[(i - 1) * 4 + 0]; | |
aes_uint8_t temp1 = roundKey[(i - 1) * 4 + 1]; | |
aes_uint8_t temp2 = roundKey[(i - 1) * 4 + 2]; | |
aes_uint8_t temp3 = roundKey[(i - 1) * 4 + 3]; | |
if (i % Nk == 0) | |
{ | |
aes_uint8_t temp = temp0; | |
temp0 = aes_uint8_t(SBOX(temp1) ^ RCON(i / Nk)); | |
temp1 = SBOX(temp2); | |
temp2 = SBOX(temp3); | |
temp3 = SBOX(temp); | |
} | |
else if ((Nk > 6) && (i % Nk == 4)) | |
{ | |
temp0 = SBOX(temp0); | |
temp1 = SBOX(temp1); | |
temp2 = SBOX(temp2); | |
temp3 = SBOX(temp3); | |
} | |
roundKey[i * 4 + 0] = aes_uint8_t(roundKey[(i - Nk) * 4 + 0] ^ temp0); | |
roundKey[i * 4 + 1] = aes_uint8_t(roundKey[(i - Nk) * 4 + 1] ^ temp1); | |
roundKey[i * 4 + 2] = aes_uint8_t(roundKey[(i - Nk) * 4 + 2] ^ temp2); | |
roundKey[i * 4 + 3] = aes_uint8_t(roundKey[(i - Nk) * 4 + 3] ^ temp3); | |
} | |
} | |
static inline void EncryptRound(aes_uint8_t* restrict_ptr stateTo, const aes_uint8_t* restrict_ptr stateFrom, const aes_uint8_t * restrict_ptr roundKey) noexcept | |
{ | |
MixColumn(&stateTo[0], SBOX(stateFrom[0]), SBOX(stateFrom[5]), SBOX(stateFrom[10]), SBOX(stateFrom[15]), &roundKey[0]); | |
MixColumn(&stateTo[4], SBOX(stateFrom[4]), SBOX(stateFrom[9]), SBOX(stateFrom[14]), SBOX(stateFrom[3]), &roundKey[4]); | |
MixColumn(&stateTo[8], SBOX(stateFrom[8]), SBOX(stateFrom[13]), SBOX(stateFrom[2]), SBOX(stateFrom[7]), &roundKey[8]); | |
MixColumn(&stateTo[12], SBOX(stateFrom[12]), SBOX(stateFrom[1]), SBOX(stateFrom[6]), SBOX(stateFrom[11]), &roundKey[12]); | |
} | |
static inline void EncryptRoundLast(aes_uint8_t* restrict_ptr stateTo, const aes_uint8_t* restrict_ptr stateFrom, const aes_uint8_t * restrict_ptr roundKey) noexcept | |
{ | |
stateTo[0] = aes_uint8_t(SBOX(stateFrom[0]) ^ roundKey[0]); | |
stateTo[1] = aes_uint8_t(SBOX(stateFrom[5]) ^ roundKey[1]); | |
stateTo[2] = aes_uint8_t(SBOX(stateFrom[10]) ^ roundKey[2]); | |
stateTo[3] = aes_uint8_t(SBOX(stateFrom[15]) ^ roundKey[3]); | |
stateTo[4] = aes_uint8_t(SBOX(stateFrom[4]) ^ roundKey[4]); | |
stateTo[5] = aes_uint8_t(SBOX(stateFrom[9]) ^ roundKey[5]); | |
stateTo[6] = aes_uint8_t(SBOX(stateFrom[14]) ^ roundKey[6]); | |
stateTo[7] = aes_uint8_t(SBOX(stateFrom[3]) ^ roundKey[7]); | |
stateTo[8] = aes_uint8_t(SBOX(stateFrom[8]) ^ roundKey[8]); | |
stateTo[9] = aes_uint8_t(SBOX(stateFrom[13]) ^ roundKey[9]); | |
stateTo[10] = aes_uint8_t(SBOX(stateFrom[2]) ^ roundKey[10]); | |
stateTo[11] = aes_uint8_t(SBOX(stateFrom[7]) ^ roundKey[11]); | |
stateTo[12] = aes_uint8_t(SBOX(stateFrom[12]) ^ roundKey[12]); | |
stateTo[13] = aes_uint8_t(SBOX(stateFrom[1]) ^ roundKey[13]); | |
stateTo[14] = aes_uint8_t(SBOX(stateFrom[6]) ^ roundKey[14]); | |
stateTo[15] = aes_uint8_t(SBOX(stateFrom[11]) ^ roundKey[15]); | |
} | |
static inline void DecryptRound(aes_uint8_t* restrict_ptr stateTo, const aes_uint8_t* restrict_ptr stateFrom, const aes_uint8_t * restrict_ptr roundKey) noexcept | |
{ | |
InvMixColumn(&stateTo[0], aes_uint8_t(SBOX_INV(stateFrom[0]) ^ roundKey[0]), aes_uint8_t(SBOX_INV(stateFrom[13]) ^ roundKey[1]), aes_uint8_t(SBOX_INV(stateFrom[10]) ^ roundKey[2]), aes_uint8_t(SBOX_INV(stateFrom[7]) ^ roundKey[3])); | |
InvMixColumn(&stateTo[4], aes_uint8_t(SBOX_INV(stateFrom[4]) ^ roundKey[4]), aes_uint8_t(SBOX_INV(stateFrom[1]) ^ roundKey[5]), aes_uint8_t(SBOX_INV(stateFrom[14]) ^ roundKey[6]), aes_uint8_t(SBOX_INV(stateFrom[11]) ^ roundKey[7])); | |
InvMixColumn(&stateTo[8], aes_uint8_t(SBOX_INV(stateFrom[8]) ^ roundKey[8]), aes_uint8_t(SBOX_INV(stateFrom[5]) ^ roundKey[9]), aes_uint8_t(SBOX_INV(stateFrom[2]) ^ roundKey[10]), aes_uint8_t(SBOX_INV(stateFrom[15]) ^ roundKey[11])); | |
InvMixColumn(&stateTo[12], aes_uint8_t(SBOX_INV(stateFrom[12]) ^ roundKey[12]), aes_uint8_t(SBOX_INV(stateFrom[9]) ^ roundKey[13]), aes_uint8_t(SBOX_INV(stateFrom[6]) ^ roundKey[14]), aes_uint8_t(SBOX_INV(stateFrom[3]) ^ roundKey[15])); | |
} | |
static inline void DecryptRoundLast(aes_uint8_t* restrict_ptr stateTo, const aes_uint8_t* restrict_ptr stateFrom, const aes_uint8_t* restrict_ptr roundKey) noexcept | |
{ | |
stateTo[0] = aes_uint8_t(SBOX_INV(stateFrom[0]) ^ roundKey[0]); | |
stateTo[1] = aes_uint8_t(SBOX_INV(stateFrom[13]) ^ roundKey[1]); | |
stateTo[2] = aes_uint8_t(SBOX_INV(stateFrom[10]) ^ roundKey[2]); | |
stateTo[3] = aes_uint8_t(SBOX_INV(stateFrom[7]) ^ roundKey[3]); | |
stateTo[4] = aes_uint8_t(SBOX_INV(stateFrom[4]) ^ roundKey[4]); | |
stateTo[5] = aes_uint8_t(SBOX_INV(stateFrom[1]) ^ roundKey[5]); | |
stateTo[6] = aes_uint8_t(SBOX_INV(stateFrom[14]) ^ roundKey[6]); | |
stateTo[7] = aes_uint8_t(SBOX_INV(stateFrom[11]) ^ roundKey[7]); | |
stateTo[8] = aes_uint8_t(SBOX_INV(stateFrom[8]) ^ roundKey[8]); | |
stateTo[9] = aes_uint8_t(SBOX_INV(stateFrom[5]) ^ roundKey[9]); | |
stateTo[10] = aes_uint8_t(SBOX_INV(stateFrom[2]) ^ roundKey[10]); | |
stateTo[11] = aes_uint8_t(SBOX_INV(stateFrom[15]) ^ roundKey[11]); | |
stateTo[12] = aes_uint8_t(SBOX_INV(stateFrom[12]) ^ roundKey[12]); | |
stateTo[13] = aes_uint8_t(SBOX_INV(stateFrom[9]) ^ roundKey[13]); | |
stateTo[14] = aes_uint8_t(SBOX_INV(stateFrom[6]) ^ roundKey[14]); | |
stateTo[15] = aes_uint8_t(SBOX_INV(stateFrom[3]) ^ roundKey[15]); | |
} | |
public: | |
AES(const aes_uint8_t* key, unsigned length) noexcept | |
{ | |
KeyExpansion(m_roundKey, key, length); | |
} | |
ENABLE_AES_NI_ATTRIBUTE void encrypt(const aes_uint8_t text[Nb * 4], aes_uint8_t cipper[Nb * 4]) noexcept | |
{ | |
#ifdef OPT_AES_NI | |
if (haveAesNi()) | |
{ | |
__m128i s = _mm_loadu_si128((__m128i *)(void *)(text)); | |
s = _mm_xor_si128(s, _mm_loadu_si128((__m128i *)(void *)(m_roundKey))); | |
for (unsigned i = 1; i < Nr; i++) | |
{ | |
s = _mm_aesenc_si128(s, _mm_loadu_si128((__m128i *)(void *)(&m_roundKey[i * Nb * 4]))); | |
} | |
s = _mm_aesenclast_si128(s, _mm_loadu_si128((__m128i *)(void *)(&m_roundKey[Nr * Nb * 4]))); | |
_mm_storeu_si128((__m128i *)(void *)cipper, s); | |
} | |
else | |
#endif | |
{ | |
aes_uint8_t state[2][Nb * 4]; | |
int stateNum = 0; | |
for (unsigned i = 0; i < Nb * 4; i++) | |
state[stateNum][i] = aes_uint8_t(text[i] ^ m_roundKey[i]); // AddRoundKey | |
for (unsigned i = 1; i < Nr; i++) | |
{ | |
EncryptRound(state[1 - stateNum], state[stateNum], &m_roundKey[i * Nb * 4]); | |
stateNum = 1 - stateNum; | |
} | |
EncryptRoundLast(cipper, state[stateNum], &m_roundKey[Nr * Nb * 4]); | |
} | |
} | |
ENABLE_AES_NI_ATTRIBUTE void decrypt(const aes_uint8_t cipper[Nb * 4], aes_uint8_t text[Nb * 4]) noexcept | |
{ | |
#ifdef OPT_AES_NI | |
if (haveAesNi()) | |
{ | |
__m128i s = _mm_loadu_si128((__m128i *)(void *)(cipper)); | |
s = _mm_xor_si128(s, _mm_loadu_si128((__m128i *)(void *)(&m_roundKey[Nr * Nb * 4]))); | |
for (unsigned i = Nr - 1; i > 0; i--) | |
{ | |
s = _mm_aesdec_si128(s, _mm_aesimc_si128(_mm_loadu_si128((__m128i *)(void *)(&m_roundKey[i * Nb * 4])))); | |
} | |
s = _mm_aesdeclast_si128(s, _mm_loadu_si128((__m128i *)(void *)(m_roundKey))); | |
_mm_storeu_si128((__m128i *)(void *)text, s); | |
} | |
else | |
#endif | |
{ | |
aes_uint8_t state[2][Nb * 4]; | |
int stateNum = 0; | |
for (unsigned i = 0; i < Nb * 4; i++) | |
state[stateNum][i] = aes_uint8_t(cipper[i] ^ m_roundKey[Nr * Nb * 4 + i]); // AddRoundKey | |
for (unsigned i = Nr - 1; i > 0; i--) | |
{ | |
DecryptRound(state[1 - stateNum], state[stateNum], &m_roundKey[i * Nb * 4]); | |
stateNum = 1 - stateNum; | |
} | |
DecryptRoundLast(text, state[stateNum], m_roundKey); | |
} | |
} | |
}; | |
typedef AES<8, 14> AES256; | |
typedef AES<6, 12> AES192; | |
typedef AES<4, 10> AES128; | |
}; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment