Skip to content

Instantly share code, notes, and snippets.

@alula
Created February 14, 2024 12:40
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save alula/2504f5cadc3600c3b29ce8f9d9da1b46 to your computer and use it in GitHub Desktop.
Save alula/2504f5cadc3600c3b29ce8f9d9da1b46 to your computer and use it in GitHub Desktop.
AES bruteforcer (incrementer version)
// Original non-working mess fixed by alula
//
// The AES-NI code seems to be taken from https://gist.github.com/acapola/d5b940da024080dfaf5f
#include <stdint.h>
#include <stdbool.h>
#include <string.h>
#include <wmmintrin.h>
#include <smmintrin.h>
#include <stdio.h>
#include <fcntl.h>
#include <unistd.h>
#include <pthread.h>
#define NUM_THREADS 8
#define ALIGN16 __attribute__((aligned(16)))
typedef unsigned __int128 u128; // GCC & Clang with extensions only
typedef union key_data
{
__m128i m;
uint8_t c[16];
} key_data;
typedef struct key_schedule
{
key_data keys[11];
} key_schedule;
static inline __m128i bswap128(__m128i n)
{
const static ALIGN16 int8_t mask[] = {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0};
return _mm_shuffle_epi8(n, *((const __m128i *)mask));
}
static inline void inc_key_data(key_data *key, uint32_t inc)
{
__m128i m = bswap128(key->m);
u128 num;
memcpy(&num, &m, sizeof(u128));
num += inc;
memcpy(key->c, &num, sizeof(u128));
key->m = bswap128(key->m);
}
#define AES_128_key_exp(k, rcon) aes_128_key_expansion(k, _mm_aeskeygenassist_si128(k, rcon))
static inline __m128i aes_128_key_expansion(__m128i key, __m128i keygened)
{
keygened = _mm_shuffle_epi32(keygened, _MM_SHUFFLE(3, 3, 3, 3));
key = _mm_xor_si128(key, _mm_slli_si128(key, 4));
key = _mm_xor_si128(key, _mm_slli_si128(key, 4));
key = _mm_xor_si128(key, _mm_slli_si128(key, 4));
return _mm_xor_si128(key, keygened);
}
static inline void aes128_load_key(key_schedule *key_schedule, key_data enc_key)
{
key_schedule->keys[0].m = _mm_loadu_si128(&enc_key.m);
key_schedule->keys[1].m = AES_128_key_exp(key_schedule->keys[0].m, 0x01);
key_schedule->keys[2].m = AES_128_key_exp(key_schedule->keys[1].m, 0x02);
key_schedule->keys[3].m = AES_128_key_exp(key_schedule->keys[2].m, 0x04);
key_schedule->keys[4].m = AES_128_key_exp(key_schedule->keys[3].m, 0x08);
key_schedule->keys[5].m = AES_128_key_exp(key_schedule->keys[4].m, 0x10);
key_schedule->keys[6].m = AES_128_key_exp(key_schedule->keys[5].m, 0x20);
key_schedule->keys[7].m = AES_128_key_exp(key_schedule->keys[6].m, 0x40);
key_schedule->keys[8].m = AES_128_key_exp(key_schedule->keys[7].m, 0x80);
key_schedule->keys[9].m = AES_128_key_exp(key_schedule->keys[8].m, 0x1B);
key_schedule->keys[10].m = AES_128_key_exp(key_schedule->keys[9].m, 0x36);
}
static inline void aes128_enc(const key_schedule *key_schedule, key_data plainText, key_data *outCipherText)
{
__m128i m = _mm_loadu_si128(&plainText.m);
m = _mm_xor_si128(m, key_schedule->keys[0].m);
m = _mm_aesenc_si128(m, key_schedule->keys[1].m);
m = _mm_aesenc_si128(m, key_schedule->keys[2].m);
m = _mm_aesenc_si128(m, key_schedule->keys[3].m);
m = _mm_aesenc_si128(m, key_schedule->keys[4].m);
m = _mm_aesenc_si128(m, key_schedule->keys[5].m);
m = _mm_aesenc_si128(m, key_schedule->keys[6].m);
m = _mm_aesenc_si128(m, key_schedule->keys[7].m);
m = _mm_aesenc_si128(m, key_schedule->keys[8].m);
m = _mm_aesenc_si128(m, key_schedule->keys[9].m);
m = _mm_aesenclast_si128(m, key_schedule->keys[10].m);
_mm_storeu_si128(&outCipherText->m, m);
}
static inline bool key_data_equal(key_data k1, key_data k2)
{
__m128i v = _mm_cmpeq_epi8(k1.m, k2.m);
return _mm_movemask_epi8(v) == 0xffff;
// SSE2 version:
// __m128i v = _mm_xor_si128(k1.m, k2.m);
// return _mm_testz_si128(v, v);
}
void hexdump(const key_data *data)
{
const uint8_t *d = data->c;
printf("%02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x\n",
d[0], d[1], d[2], d[3], d[4], d[5], d[6], d[7], d[8], d[9], d[10], d[11], d[12], d[13], d[14], d[15]);
}
typedef struct thread_data
{
uint32_t thread_id;
uint32_t increment;
key_data seed;
} thread_data;
#define KEYS_COUNT 1024
void *crack_thread(void *threadarg)
{
thread_data *my_data = (thread_data *)threadarg;
printf("Thread ID: %d\n", my_data->thread_id);
const key_data cipher = {.c = {0x8B, 0x66, 0x68, 0xC2, 0x7D, 0x22, 0x61, 0x05, 0xA9, 0x17, 0xD6, 0x61, 0x41, 0xBC, 0x7B, 0x67}};
const key_data plain = {.c = {0xC4, 0x93, 0xE8, 0x4A, 0xAD, 0xD1, 0xC3, 0x03, 0x91, 0x3A, 0xBD, 0x57, 0xFE, 0x09, 0x79, 0x36}};
key_schedule key_schedule;
key_data computed_cipher;
key_data enc_key = my_data->seed;
uint32_t increment = my_data->increment;
inc_key_data(&enc_key, my_data->thread_id);
for (;;)
{
// printf("Trying key: \n");
// hexdump(&enc_key);
aes128_load_key(&key_schedule, enc_key);
aes128_enc(&key_schedule, plain, &computed_cipher);
if (key_data_equal(cipher, computed_cipher))
{
hexdump(&cipher);
hexdump(&computed_cipher);
printf("cipher match, key is\n");
hexdump(&enc_key);
goto end;
}
inc_key_data(&enc_key, increment);
}
end:
pthread_exit(NULL);
return NULL;
}
bool read_seed(const char *seed_str, key_data *out_seed)
{
if (strlen(seed_str) != 32)
{
return false;
}
for (int i = 0; i < 16; i++)
{
uint8_t byte;
uint8_t c1 = seed_str[i * 2];
uint8_t c2 = seed_str[i * 2 + 1];
if (c1 >= '0' && c1 <= '9')
{
byte = (c1 - '0') << 4;
}
else if (c1 >= 'a' && c1 <= 'f')
{
byte = (c1 - 'a' + 10) << 4;
}
else
{
return false;
}
if (c2 >= '0' && c2 <= '9')
{
byte |= (c2 - '0');
}
else if (c2 >= 'a' && c2 <= 'f')
{
byte |= (c2 - 'a' + 10);
}
else
{
return false;
}
out_seed->c[i] = byte;
}
return true;
}
int main(int argc, char *argv[])
{
int num_threads = NUM_THREADS;
if (argc < 2)
{
printf("Usage: %s <starting seed> [num_threads]\n", argv[0]);
printf(" starting seed: 128-bit hex string\n");
printf(" num_threads: number of threads to use (default: %d)\n", NUM_THREADS);
return 1;
}
key_data seed;
if (!read_seed(argv[1], &seed))
{
printf("Invalid seed.\n");
return 1;
}
if (argc > 2)
{
num_threads = atoi(argv[2]);
}
if (num_threads <= 0)
{
printf("Invalid number of threads.\n");
return 1;
}
printf("Using %d threads\n", num_threads);
pthread_t *threads = (pthread_t *)malloc(sizeof(pthread_t) * num_threads);
thread_data *td = (thread_data *)malloc(sizeof(thread_data) * num_threads);
for (int i = 0; i < num_threads; i++)
{
td[i].increment = num_threads;
td[i].thread_id = i;
td[i].seed = seed;
int rc = pthread_create(&threads[i], NULL, crack_thread, (void *)&td[i]);
if (rc)
{
printf("Error: unable to create thread %d\n", rc);
exit(-1);
}
}
pthread_exit(NULL);
return 0;
}
# Clang compiler is recommended. It does much better job at understanding and optimizing SIMD code.
all:
clang brute-inc.c -o brute-inc -O3 -Wall -lpthread -maes -march=native -msse2 -msse -msse4.1
clean:
rm brute-inc
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment