Skip to content

Instantly share code, notes, and snippets.

@aqrit
Created September 18, 2017 00:47
Show Gist options
  • Save aqrit/a2ccea48d7cac7e9d4d99f19d4759666 to your computer and use it in GitHub Desktop.
Save aqrit/a2ccea48d7cac7e9d4d99f19d4759666 to your computer and use it in GitHub Desktop.
#include <stdio.h>
#include <intrin.h>
#include "smmintrin.h"
size_t hash(unsigned char* lut, size_t c){
size_t a;
size_t b;
// psrld
// note: the 0xE0 simulates the shifting in of three bits from the next byte...
a = 0xE0 | (c >> 3);
// pshufb
b = lut[c & 0x0F];
if(c & 0x80) b = 0x00;
// pavg
return ((a + b + 1) >> 1);
}
// I'm lazy... just use the real thing
unsigned char adds8(unsigned char a, unsigned char b){
__m128i x = _mm_cvtsi32_si128(a);
__m128i y = _mm_cvtsi32_si128(b);
__m128i r = _mm_adds_epi8(x, y);
return _mm_cvtsi128_si32(r);
}
unsigned char valid_chars[64] = {
0x2B, 0x2F, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35,
0x36, 0x37, 0x38, 0x39, 0x41, 0x42, 0x43, 0x44,
0x45, 0x46, 0x47, 0x48, 0x49, 0x4A, 0x4B, 0x4C,
0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0x53, 0x54,
0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0x61, 0x62,
0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A,
0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72,
0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A
};
unsigned char decoded_chars[64] = {
62, 63, 52, 53, 54, 55, 56, 57,
58, 59, 60, 61, 0, 1, 2, 3,
4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14, 15, 16, 17, 18, 19,
20, 21, 22, 23, 24, 25, 26, 27,
28, 29, 30, 31, 32, 33, 34, 35,
36, 37, 38, 39, 40, 41, 42, 43,
44, 45, 46, 47, 48, 49, 50, 51
};
// all signed chars are also invalid
unsigned char invalid_chars[64] = {
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
0x28, 0x29, 0x2A, 0x2C, 0x2D, 0x2E, 0x3A, 0x3B,
0x3C, 0x3D, 0x3E, 0x3F, 0x40, 0x5B, 0x5C, 0x5D,
0x5E, 0x5F, 0x60, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F
};
unsigned char delta_asso[16] = {
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x00, 0x00, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x0F
};
unsigned char delta_values[16] = {
0x00, 0x00, 0x00, 0x13, 0x04, 0xBF, 0xBF, 0xB9,
0xB9, 0x00, 0x10, 0xC3, 0xBF, 0xBF, 0xB9, 0xB9
};
unsigned char check_asso[16] = {
0x0D, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x03, 0x07, 0x0B, 0x0B, 0x0B, 0x0F
};
unsigned char check_values[16] = {
0x80, 0x80, 0x80, 0x80, 0xCF, 0xBF, 0xD5, 0xA6,
0xB5, 0x86, 0xD1, 0x80, 0xB1, 0x80, 0x91, 0x80
};
void check_decode(){
for( size_t i = 0; i < 64; i++ ){
unsigned char c = valid_chars[i];
unsigned char h = hash(delta_asso, c);
unsigned char v = adds8(delta_values[h & 0x0F],c);
if( decoded_chars[i] != v ){
printf("FAIL: %02X decoded to %02X \n",c,v);
}
}
// set equal sign (0x3D) to zero in decode, because why not?
unsigned char x = adds8(delta_values[hash(delta_asso, 0x3D) & 0x0F], 0x3D);
if( x != 0 ) printf("FAIL: %02X decoded to %02X \n",0x3D,x);
}
void check_invalid_char_detection()
{
for( size_t i = 0; i < 64; i++ ){
unsigned char c = valid_chars[i];
unsigned char h = hash(check_asso, c);
unsigned char v = adds8(check_values[h & 0x0F],c);
if(v >= 0x80){
printf("FAIL: valid char 0x%02X not detected\n",c);
}
}
for( size_t i = 0; i < 64; i++ ){
unsigned char c = invalid_chars[i];
unsigned char h = hash(check_asso, c);
unsigned char v = adds8(check_values[h & 0x0F], c);
if(v < 0x80){
printf("FAIL: invalid char 0x%02X not detected\n",c);
printf( "%02X %02X %02X\n", h, check_values[h & 0x0F], v );
}
}
// note:
// all check_values are signed...
// because using sat_adds8: signed + signed = signed
// with signed input it is possible hash overflows
// in which case, 0 + signed = signed
for( int i = 0; i < 16; i++ ){
unsigned char c = check_values[i];
if((c & 0x80) == 0){
printf("FAIL: check_value %02X is unsigned\n", c);
}
}
for( size_t i = 128; i < 256; i++ ){
unsigned char c = i;
unsigned char h = hash(check_asso, c);
unsigned char v = adds8(check_values[h & 0x0F], c);
if(v < 0x80){
printf("FAIL: invalid char 0x%02X not detected\n",c);
}
}
}
// hash must produce unsigned results for unsigned input
void check_unsigned_hash(){
for( size_t i = 0; i < 128; i++ ){
unsigned char h_c = hash(check_asso, i);
if(h_c >= 0x80){
printf("FAIL: %02X hashes to %02X\n",i,h_c);
}
unsigned char h_d = hash(delta_asso, i);
if(h_d >= 0x80){
printf("FAIL: %02X hashes to %02X\n",i,h_d);
}
}
}
void print_hash( unsigned char* lut ){
for( size_t i = 0; i < 128; i++ ){
unsigned char c = i;
unsigned char h = hash(lut, c);
if((i & 0x0F) == 0)printf("\n");
printf( "%01X ", h & 0x0F);
}
printf("\n");
}
static const char moby_dick_base64[] =
"Q2FsbCBtZSBJc2htYWVsLiBTb21lIHllYXJzIGFnby0tbmV2ZXIgbWluZCBob3cgbG9uZ"
"yBwcmVjaXNlbHktLWhhdmluZwpsaXR0bGUgb3Igbm8gbW9uZXkgaW4gbXkgcHVyc2UsIG"
"FuZCBub3RoaW5nIHBhcnRpY3VsYXIgdG8gaW50ZXJlc3QgbWUgb24Kc2hvcmUsIEkgdGh"
"vdWdodCBJIHdvdWxkIHNhaWwgYWJvdXQgYSBsaXR0bGUgYW5kIHNlZSB0aGUgd2F0ZXJ5"
"IHBhcnQgb2YKdGhlIHdvcmxkLiBJdCBpcyBhIHdheSBJIGhhdmUgb2YgZHJpdmluZyBvZ"
"mYgdGhlIHNwbGVlbiBhbmQgcmVndWxhdGluZwp0aGUgY2lyY3VsYXRpb24uIFdoZW5ldm"
"VyIEkgZmluZCBteXNlbGYgZ3Jvd2luZyBncmltIGFib3V0IHRoZSBtb3V0aDsKd2hlbmV"
"2ZXIgaXQgaXMgYSBkYW1wLCBkcml6emx5IE5vdmVtYmVyIGluIG15IHNvdWw7IHdoZW5l"
"dmVyIEkgZmluZApteXNlbGYgaW52b2x1bnRhcmlseSBwYXVzaW5nIGJlZm9yZSBjb2Zma"
"W4gd2FyZWhvdXNlcywgYW5kIGJyaW5naW5nIHVwCnRoZSByZWFyIG9mIGV2ZXJ5IGZ1bm"
"VyYWwgSSBtZWV0OyBhbmQgZXNwZWNpYWxseSB3aGVuZXZlciBteSBoeXBvcyBnZXQKc3V"
"jaCBhbiB1cHBlciBoYW5kIG9mIG1lLCB0aGF0IGl0IHJlcXVpcmVzIGEgc3Ryb25nIG1v"
"cmFsIHByaW5jaXBsZSB0bwpwcmV2ZW50IG1lIGZyb20gZGVsaWJlcmF0ZWx5IHN0ZXBwa"
"W5nIGludG8gdGhlIHN0cmVldCwgYW5kIG1ldGhvZGljYWxseQprbm9ja2luZyBwZW9wbG"
"UncyBoYXRzIG9mZi0tdGhlbiwgSSBhY2NvdW50IGl0IGhpZ2ggdGltZSB0byBnZXQgdG8"
"gc2VhCmFzIHNvb24gYXMgSSBjYW4uIFRoaXMgaXMgbXkgc3Vic3RpdHV0ZSBmb3IgcGlz"
"dG9sIGFuZCBiYWxsLiBXaXRoIGEKcGhpbG9zb3BoaWNhbCBmbG91cmlzaCBDYXRvIHRoc"
"m93cyBoaW1zZWxmIHVwb24gaGlzIHN3b3JkOyBJIHF1aWV0bHkKdGFrZSB0byB0aGUgc2"
"hpcC4gVGhlcmUgaXMgbm90aGluZyBzdXJwcmlzaW5nIGluIHRoaXMuIElmIHRoZXkgYnV"
"0IGtuZXcKaXQsIGFsbW9zdCBhbGwgbWVuIGluIHRoZWlyIGRlZ3JlZSwgc29tZSB0aW1l"
"IG9yIG90aGVyLCBjaGVyaXNoIHZlcnkKbmVhcmx5IHRoZSBzYW1lIGZlZWxpbmdzIHRvd"
"2FyZHMgdGhlIG9jZWFuIHdpdGggbWUuCg==";
bool base64_decode_ssse3( void* dst_void, void* src_void, size_t length )
{
unsigned char* src = (unsigned char*)src_void;
unsigned char* dst = (unsigned char*)dst_void;
const __m128i delta_asso = _mm_setr_epi8(
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x00, 0x00, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x0F
);
const __m128i delta_values = _mm_setr_epi8(
0x00, 0x00, 0x00, 0x13, 0x04, 0xBF, 0xBF, 0xB9,
0xB9, 0x00, 0x10, 0xC3, 0xBF, 0xBF, 0xB9, 0xB9
);
const __m128i check_asso = _mm_setr_epi8(
0x0D, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x03, 0x07, 0x0B, 0x0B, 0x0B, 0x0F
);
const __m128i check_values = _mm_setr_epi8(
0x80, 0x80, 0x80, 0x80, 0xCF, 0xBF, 0xD5, 0xA6,
0xB5, 0x86, 0xD1, 0x80, 0xB1, 0x80, 0x91, 0x80
);
for( unsigned char* end = &src[(length & ~15)]; src != end; src += 16){
__m128i asrc, shifted, delta_hash, check_hash, out, chk;
int mask;
asrc = _mm_loadu_si128((__m128i *)src);
shifted = _mm_srli_epi32(asrc, 3);
delta_hash = _mm_avg_epu8(_mm_shuffle_epi8(delta_asso, asrc), shifted);
check_hash = _mm_avg_epu8(_mm_shuffle_epi8(check_asso, asrc), shifted);
out = _mm_adds_epi8(_mm_shuffle_epi8(delta_values, delta_hash), asrc);
chk = _mm_adds_epi8(_mm_shuffle_epi8(check_values, check_hash), asrc);
mask = _mm_movemask_epi8(chk);
if(mask != 0){
break;
}
const __m128i pack_shuffle = _mm_setr_epi8(
2, 1, 0, 6, 5, 4, 10, 9,
8, 14, 13, 12, -1, -1, -1, -1);
out = _mm_maddubs_epi16(out, _mm_set1_epi32(0x01400140));
out = _mm_madd_epi16(out, _mm_set1_epi32(0x00011000));
out = _mm_shuffle_epi8(out, pack_shuffle);
_mm_storeu_si128((__m128i *)dst, out);
dst += 12;
}
return true;
}
int main ()
{
print_hash( check_asso );
print_hash( delta_asso );
check_unsigned_hash();
check_decode();
check_invalid_char_detection();
static unsigned char out[0x4000];
memset(out,0,sizeof(out));
base64_decode_ssse3(out, (void*)moby_dick_base64, -1);
printf( "\n\n%s", out);
printf("\npress enter to exit\n");
getchar();
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment