Created
March 23, 2018 12:17
-
-
Save bokibo/42abc1e33a6210d9ccfc5a96be728c68 to your computer and use it in GitHub Desktop.
Kernel for AES in CUDA
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#pragma once | |
const unsigned char Sbox[16][16] = | |
{ | |
{ 0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5, 0x30, 0x01, 0x67, 0x2B, 0xFE, 0xD7, 0xAB, 0x76 }, | |
{ 0xCA, 0x82, 0xC9, 0x7D, 0xFA, 0x59, 0x47, 0xF0, 0xAD, 0xD4, 0xA2, 0xAF, 0x9C, 0xA4, 0x72, 0xC0 }, | |
{ 0xB7, 0xFD, 0x93, 0x26, 0x36, 0x3F, 0xF7, 0xCC, 0x34, 0xA5, 0xE5, 0xF1, 0x71, 0xD8, 0x31, 0x15 }, | |
{ 0x04, 0xC7, 0x23, 0xC3, 0x18, 0x96, 0x05, 0x9A, 0x07, 0x12, 0x80, 0xE2, 0xEB, 0x27, 0xB2, 0x75 }, | |
{ 0x09, 0x83, 0x2C, 0x1A, 0x1B, 0x6E, 0x5A, 0xA0, 0x52, 0x3B, 0xD6, 0xB3, 0x29, 0xE3, 0x2F, 0x84 }, | |
{ 0x53, 0xD1, 0x00, 0xED, 0x20, 0xFC, 0xB1, 0x5B, 0x6A, 0xCB, 0xBE, 0x39, 0x4A, 0x4C, 0x58, 0xCF }, | |
{ 0xD0, 0xEF, 0xAA, 0xFB, 0x43, 0x4D, 0x33, 0x85, 0x45, 0xF9, 0x02, 0x7F, 0x50, 0x3C, 0x9F, 0xA8 }, | |
{ 0x51, 0xA3, 0x40, 0x8F, 0x92, 0x9D, 0x38, 0xF5, 0xBC, 0xB6, 0xDA, 0x21, 0x10, 0xFF, 0xF3, 0xD2 }, | |
{ 0xCD, 0x0C, 0x13, 0xEC, 0x5F, 0x97, 0x44, 0x17, 0xC4, 0xA7, 0x7E, 0x3D, 0x64, 0x5D, 0x19, 0x73 }, | |
{ 0x60, 0x81, 0x4F, 0xDC, 0x22, 0x2A, 0x90, 0x88, 0x46, 0xEE, 0xB8, 0x14, 0xDE, 0x5E, 0x0B, 0xDB }, | |
{ 0xE0, 0x32, 0x3A, 0x0A, 0x49, 0x06, 0x24, 0x5C, 0xC2, 0xD3, 0xAC, 0x62, 0x91, 0x95, 0xE4, 0x79 }, | |
{ 0xE7, 0xC8, 0x37, 0x6D, 0x8D, 0xD5, 0x4E, 0xA9, 0x6C, 0x56, 0xF4, 0xEA, 0x65, 0x7A, 0xAE, 0x08 }, | |
{ 0xBA, 0x78, 0x25, 0x2E, 0x1C, 0xA6, 0xB4, 0xC6, 0xE8, 0xDD, 0x74, 0x1F, 0x4B, 0xBD, 0x8B, 0x8A }, | |
{ 0x70, 0x3E, 0xB5, 0x66, 0x48, 0x03, 0xF6, 0x0E, 0x61, 0x35, 0x57, 0xB9, 0x86, 0xC1, 0x1D, 0x9E }, | |
{ 0xE1, 0xF8, 0x98, 0x11, 0x69, 0xD9, 0x8E, 0x94, 0x9B, 0x1E, 0x87, 0xE9, 0xCE, 0x55, 0x28, 0xDF }, | |
{ 0x8C, 0xA1, 0x89, 0x0D, 0xBF, 0xE6, 0x42, 0x68, 0x41, 0x99, 0x2D, 0x0F, 0xB0, 0x54, 0xBB, 0x16 } | |
}; | |
const unsigned char InvSbox[16][16] = | |
{ | |
{ 0x52, 0x09, 0x6A, 0xD5, 0x30, 0x36, 0xA5, 0x38, 0xBF, 0x40, 0xA3, 0x9E, 0x81, 0xF3, 0xD7, 0xFB }, | |
{ 0x7C, 0xE3, 0x39, 0x82, 0x9B, 0x2F, 0xFF, 0x87, 0x34, 0x8E, 0x43, 0x44, 0xC4, 0xDE, 0xE9, 0xCB }, | |
{ 0x54, 0x7B, 0x94, 0x32, 0xA6, 0xC2, 0x23, 0x3D, 0xEE, 0x4C, 0x95, 0x0B, 0x42, 0xFA, 0xC3, 0x4E }, | |
{ 0x08, 0x2E, 0xA1, 0x66, 0x28, 0xD9, 0x24, 0xB2, 0x76, 0x5B, 0xA2, 0x49, 0x6D, 0x8B, 0xD1, 0x25 }, | |
{ 0x72, 0xF8, 0xF6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xD4, 0xA4, 0x5C, 0xCC, 0x5D, 0x65, 0xB6, 0x92 }, | |
{ 0x6C, 0x70, 0x48, 0x50, 0xFD, 0xED, 0xB9, 0xDA, 0x5E, 0x15, 0x46, 0x57, 0xA7, 0x8D, 0x9D, 0x84 }, | |
{ 0x90, 0xD8, 0xAB, 0x00, 0x8C, 0xBC, 0xD3, 0x0A, 0xF7, 0xE4, 0x58, 0x05, 0xB8, 0xB3, 0x45, 0x06 }, | |
{ 0xD0, 0x2C, 0x1E, 0x8F, 0xCA, 0x3F, 0x0F, 0x02, 0xC1, 0xAF, 0xBD, 0x03, 0x01, 0x13, 0x8A, 0x6B }, | |
{ 0x3A, 0x91, 0x11, 0x41, 0x4F, 0x67, 0xDC, 0xEA, 0x97, 0xF2, 0xCF, 0xCE, 0xF0, 0xB4, 0xE6, 0x73 }, | |
{ 0x96, 0xAC, 0x74, 0x22, 0xE7, 0xAD, 0x35, 0x85, 0xE2, 0xF9, 0x37, 0xE8, 0x1C, 0x75, 0xDF, 0x6E }, | |
{ 0x47, 0xF1, 0x1A, 0x71, 0x1D, 0x29, 0xC5, 0x89, 0x6F, 0xB7, 0x62, 0x0E, 0xAA, 0x18, 0xBE, 0x1B }, | |
{ 0xFC, 0x56, 0x3E, 0x4B, 0xC6, 0xD2, 0x79, 0x20, 0x9A, 0xDB, 0xC0, 0xFE, 0x78, 0xCD, 0x5A, 0xF4 }, | |
{ 0x1F, 0xDD, 0xA8, 0x33, 0x88, 0x07, 0xC7, 0x31, 0xB1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xEC, 0x5F }, | |
{ 0x60, 0x51, 0x7F, 0xA9, 0x19, 0xB5, 0x4A, 0x0D, 0x2D, 0xE5, 0x7A, 0x9F, 0x93, 0xC9, 0x9C, 0xEF }, | |
{ 0xA0, 0xE0, 0x3B, 0x4D, 0xAE, 0x2A, 0xF5, 0xB0, 0xC8, 0xEB, 0xBB, 0x3C, 0x83, 0x53, 0x99, 0x61 }, | |
{ 0x17, 0x2B, 0x04, 0x7E, 0xBA, 0x77, 0xD6, 0x26, 0xE1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0C, 0x7D } | |
}; | |
const unsigned char Rcon[4][10] = | |
{ | |
{ 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36 }, | |
{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, | |
{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, | |
{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } | |
}; | |
const unsigned char Sbox1[256] = | |
{ | |
0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5, 0x30, 0x01, 0x67, 0x2B, 0xFE, 0xD7, 0xAB, 0x76 , | |
0xCA, 0x82, 0xC9, 0x7D, 0xFA, 0x59, 0x47, 0xF0, 0xAD, 0xD4, 0xA2, 0xAF, 0x9C, 0xA4, 0x72, 0xC0 , | |
0xB7, 0xFD, 0x93, 0x26, 0x36, 0x3F, 0xF7, 0xCC, 0x34, 0xA5, 0xE5, 0xF1, 0x71, 0xD8, 0x31, 0x15 , | |
0x04, 0xC7, 0x23, 0xC3, 0x18, 0x96, 0x05, 0x9A, 0x07, 0x12, 0x80, 0xE2, 0xEB, 0x27, 0xB2, 0x75 , | |
0x09, 0x83, 0x2C, 0x1A, 0x1B, 0x6E, 0x5A, 0xA0, 0x52, 0x3B, 0xD6, 0xB3, 0x29, 0xE3, 0x2F, 0x84 , | |
0x53, 0xD1, 0x00, 0xED, 0x20, 0xFC, 0xB1, 0x5B, 0x6A, 0xCB, 0xBE, 0x39, 0x4A, 0x4C, 0x58, 0xCF , | |
0xD0, 0xEF, 0xAA, 0xFB, 0x43, 0x4D, 0x33, 0x85, 0x45, 0xF9, 0x02, 0x7F, 0x50, 0x3C, 0x9F, 0xA8 , | |
0x51, 0xA3, 0x40, 0x8F, 0x92, 0x9D, 0x38, 0xF5, 0xBC, 0xB6, 0xDA, 0x21, 0x10, 0xFF, 0xF3, 0xD2 , | |
0xCD, 0x0C, 0x13, 0xEC, 0x5F, 0x97, 0x44, 0x17, 0xC4, 0xA7, 0x7E, 0x3D, 0x64, 0x5D, 0x19, 0x73 , | |
0x60, 0x81, 0x4F, 0xDC, 0x22, 0x2A, 0x90, 0x88, 0x46, 0xEE, 0xB8, 0x14, 0xDE, 0x5E, 0x0B, 0xDB , | |
0xE0, 0x32, 0x3A, 0x0A, 0x49, 0x06, 0x24, 0x5C, 0xC2, 0xD3, 0xAC, 0x62, 0x91, 0x95, 0xE4, 0x79 , | |
0xE7, 0xC8, 0x37, 0x6D, 0x8D, 0xD5, 0x4E, 0xA9, 0x6C, 0x56, 0xF4, 0xEA, 0x65, 0x7A, 0xAE, 0x08 , | |
0xBA, 0x78, 0x25, 0x2E, 0x1C, 0xA6, 0xB4, 0xC6, 0xE8, 0xDD, 0x74, 0x1F, 0x4B, 0xBD, 0x8B, 0x8A , | |
0x70, 0x3E, 0xB5, 0x66, 0x48, 0x03, 0xF6, 0x0E, 0x61, 0x35, 0x57, 0xB9, 0x86, 0xC1, 0x1D, 0x9E , | |
0xE1, 0xF8, 0x98, 0x11, 0x69, 0xD9, 0x8E, 0x94, 0x9B, 0x1E, 0x87, 0xE9, 0xCE, 0x55, 0x28, 0xDF , | |
0x8C, 0xA1, 0x89, 0x0D, 0xBF, 0xE6, 0x42, 0x68, 0x41, 0x99, 0x2D, 0x0F, 0xB0, 0x54, 0xBB, 0x16 | |
}; | |
const unsigned char InvSbox1[256] = | |
{ | |
0x52, 0x09, 0x6A, 0xD5, 0x30, 0x36, 0xA5, 0x38, 0xBF, 0x40, 0xA3, 0x9E, 0x81, 0xF3, 0xD7, 0xFB , | |
0x7C, 0xE3, 0x39, 0x82, 0x9B, 0x2F, 0xFF, 0x87, 0x34, 0x8E, 0x43, 0x44, 0xC4, 0xDE, 0xE9, 0xCB , | |
0x54, 0x7B, 0x94, 0x32, 0xA6, 0xC2, 0x23, 0x3D, 0xEE, 0x4C, 0x95, 0x0B, 0x42, 0xFA, 0xC3, 0x4E , | |
0x08, 0x2E, 0xA1, 0x66, 0x28, 0xD9, 0x24, 0xB2, 0x76, 0x5B, 0xA2, 0x49, 0x6D, 0x8B, 0xD1, 0x25 , | |
0x72, 0xF8, 0xF6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xD4, 0xA4, 0x5C, 0xCC, 0x5D, 0x65, 0xB6, 0x92 , | |
0x6C, 0x70, 0x48, 0x50, 0xFD, 0xED, 0xB9, 0xDA, 0x5E, 0x15, 0x46, 0x57, 0xA7, 0x8D, 0x9D, 0x84 , | |
0x90, 0xD8, 0xAB, 0x00, 0x8C, 0xBC, 0xD3, 0x0A, 0xF7, 0xE4, 0x58, 0x05, 0xB8, 0xB3, 0x45, 0x06 , | |
0xD0, 0x2C, 0x1E, 0x8F, 0xCA, 0x3F, 0x0F, 0x02, 0xC1, 0xAF, 0xBD, 0x03, 0x01, 0x13, 0x8A, 0x6B , | |
0x3A, 0x91, 0x11, 0x41, 0x4F, 0x67, 0xDC, 0xEA, 0x97, 0xF2, 0xCF, 0xCE, 0xF0, 0xB4, 0xE6, 0x73 , | |
0x96, 0xAC, 0x74, 0x22, 0xE7, 0xAD, 0x35, 0x85, 0xE2, 0xF9, 0x37, 0xE8, 0x1C, 0x75, 0xDF, 0x6E , | |
0x47, 0xF1, 0x1A, 0x71, 0x1D, 0x29, 0xC5, 0x89, 0x6F, 0xB7, 0x62, 0x0E, 0xAA, 0x18, 0xBE, 0x1B , | |
0xFC, 0x56, 0x3E, 0x4B, 0xC6, 0xD2, 0x79, 0x20, 0x9A, 0xDB, 0xC0, 0xFE, 0x78, 0xCD, 0x5A, 0xF4 , | |
0x1F, 0xDD, 0xA8, 0x33, 0x88, 0x07, 0xC7, 0x31, 0xB1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xEC, 0x5F , | |
0x60, 0x51, 0x7F, 0xA9, 0x19, 0xB5, 0x4A, 0x0D, 0x2D, 0xE5, 0x7A, 0x9F, 0x93, 0xC9, 0x9C, 0xEF , | |
0xA0, 0xE0, 0x3B, 0x4D, 0xAE, 0x2A, 0xF5, 0xB0, 0xC8, 0xEB, 0xBB, 0x3C, 0x83, 0x53, 0x99, 0x61 , | |
0x17, 0x2B, 0x04, 0x7E, 0xBA, 0x77, 0xD6, 0x26, 0xE1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0C, 0x7D | |
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include "cuda_runtime.h" | |
#include "device_launch_parameters.h" | |
#include "consts.h" | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <time.h> | |
//macros multiplication for Inverse MixColumns | |
#define xtime(x) ((x<<1) ^ (((x>>7) & 1) * 0x1b)) | |
#define Multiply(x,y) (((y & 1) * x) ^ ((y>>1 & 1) * xtime(x)) ^ ((y>>2 & 1) * xtime(xtime(x))) ^ ((y>>3 & 1) * xtime(xtime(xtime(x)))) ^ ((y>>4 & 1) * xtime(xtime(xtime(xtime(x)))))) | |
using namespace std; | |
__constant__ unsigned char Sbox_dev[256] = | |
{ | |
0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5, 0x30, 0x01, 0x67, 0x2B, 0xFE, 0xD7, 0xAB, 0x76 , | |
0xCA, 0x82, 0xC9, 0x7D, 0xFA, 0x59, 0x47, 0xF0, 0xAD, 0xD4, 0xA2, 0xAF, 0x9C, 0xA4, 0x72, 0xC0 , | |
0xB7, 0xFD, 0x93, 0x26, 0x36, 0x3F, 0xF7, 0xCC, 0x34, 0xA5, 0xE5, 0xF1, 0x71, 0xD8, 0x31, 0x15 , | |
0x04, 0xC7, 0x23, 0xC3, 0x18, 0x96, 0x05, 0x9A, 0x07, 0x12, 0x80, 0xE2, 0xEB, 0x27, 0xB2, 0x75 , | |
0x09, 0x83, 0x2C, 0x1A, 0x1B, 0x6E, 0x5A, 0xA0, 0x52, 0x3B, 0xD6, 0xB3, 0x29, 0xE3, 0x2F, 0x84 , | |
0x53, 0xD1, 0x00, 0xED, 0x20, 0xFC, 0xB1, 0x5B, 0x6A, 0xCB, 0xBE, 0x39, 0x4A, 0x4C, 0x58, 0xCF , | |
0xD0, 0xEF, 0xAA, 0xFB, 0x43, 0x4D, 0x33, 0x85, 0x45, 0xF9, 0x02, 0x7F, 0x50, 0x3C, 0x9F, 0xA8 , | |
0x51, 0xA3, 0x40, 0x8F, 0x92, 0x9D, 0x38, 0xF5, 0xBC, 0xB6, 0xDA, 0x21, 0x10, 0xFF, 0xF3, 0xD2 , | |
0xCD, 0x0C, 0x13, 0xEC, 0x5F, 0x97, 0x44, 0x17, 0xC4, 0xA7, 0x7E, 0x3D, 0x64, 0x5D, 0x19, 0x73 , | |
0x60, 0x81, 0x4F, 0xDC, 0x22, 0x2A, 0x90, 0x88, 0x46, 0xEE, 0xB8, 0x14, 0xDE, 0x5E, 0x0B, 0xDB , | |
0xE0, 0x32, 0x3A, 0x0A, 0x49, 0x06, 0x24, 0x5C, 0xC2, 0xD3, 0xAC, 0x62, 0x91, 0x95, 0xE4, 0x79 , | |
0xE7, 0xC8, 0x37, 0x6D, 0x8D, 0xD5, 0x4E, 0xA9, 0x6C, 0x56, 0xF4, 0xEA, 0x65, 0x7A, 0xAE, 0x08 , | |
0xBA, 0x78, 0x25, 0x2E, 0x1C, 0xA6, 0xB4, 0xC6, 0xE8, 0xDD, 0x74, 0x1F, 0x4B, 0xBD, 0x8B, 0x8A , | |
0x70, 0x3E, 0xB5, 0x66, 0x48, 0x03, 0xF6, 0x0E, 0x61, 0x35, 0x57, 0xB9, 0x86, 0xC1, 0x1D, 0x9E , | |
0xE1, 0xF8, 0x98, 0x11, 0x69, 0xD9, 0x8E, 0x94, 0x9B, 0x1E, 0x87, 0xE9, 0xCE, 0x55, 0x28, 0xDF , | |
0x8C, 0xA1, 0x89, 0x0D, 0xBF, 0xE6, 0x42, 0x68, 0x41, 0x99, 0x2D, 0x0F, 0xB0, 0x54, 0xBB, 0x16 | |
}; | |
__constant__ unsigned char InvSbox_dev[256] = | |
{ | |
0x52, 0x09, 0x6A, 0xD5, 0x30, 0x36, 0xA5, 0x38, 0xBF, 0x40, 0xA3, 0x9E, 0x81, 0xF3, 0xD7, 0xFB , | |
0x7C, 0xE3, 0x39, 0x82, 0x9B, 0x2F, 0xFF, 0x87, 0x34, 0x8E, 0x43, 0x44, 0xC4, 0xDE, 0xE9, 0xCB , | |
0x54, 0x7B, 0x94, 0x32, 0xA6, 0xC2, 0x23, 0x3D, 0xEE, 0x4C, 0x95, 0x0B, 0x42, 0xFA, 0xC3, 0x4E , | |
0x08, 0x2E, 0xA1, 0x66, 0x28, 0xD9, 0x24, 0xB2, 0x76, 0x5B, 0xA2, 0x49, 0x6D, 0x8B, 0xD1, 0x25 , | |
0x72, 0xF8, 0xF6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xD4, 0xA4, 0x5C, 0xCC, 0x5D, 0x65, 0xB6, 0x92 , | |
0x6C, 0x70, 0x48, 0x50, 0xFD, 0xED, 0xB9, 0xDA, 0x5E, 0x15, 0x46, 0x57, 0xA7, 0x8D, 0x9D, 0x84 , | |
0x90, 0xD8, 0xAB, 0x00, 0x8C, 0xBC, 0xD3, 0x0A, 0xF7, 0xE4, 0x58, 0x05, 0xB8, 0xB3, 0x45, 0x06 , | |
0xD0, 0x2C, 0x1E, 0x8F, 0xCA, 0x3F, 0x0F, 0x02, 0xC1, 0xAF, 0xBD, 0x03, 0x01, 0x13, 0x8A, 0x6B , | |
0x3A, 0x91, 0x11, 0x41, 0x4F, 0x67, 0xDC, 0xEA, 0x97, 0xF2, 0xCF, 0xCE, 0xF0, 0xB4, 0xE6, 0x73 , | |
0x96, 0xAC, 0x74, 0x22, 0xE7, 0xAD, 0x35, 0x85, 0xE2, 0xF9, 0x37, 0xE8, 0x1C, 0x75, 0xDF, 0x6E , | |
0x47, 0xF1, 0x1A, 0x71, 0x1D, 0x29, 0xC5, 0x89, 0x6F, 0xB7, 0x62, 0x0E, 0xAA, 0x18, 0xBE, 0x1B , | |
0xFC, 0x56, 0x3E, 0x4B, 0xC6, 0xD2, 0x79, 0x20, 0x9A, 0xDB, 0xC0, 0xFE, 0x78, 0xCD, 0x5A, 0xF4 , | |
0x1F, 0xDD, 0xA8, 0x33, 0x88, 0x07, 0xC7, 0x31, 0xB1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xEC, 0x5F , | |
0x60, 0x51, 0x7F, 0xA9, 0x19, 0xB5, 0x4A, 0x0D, 0x2D, 0xE5, 0x7A, 0x9F, 0x93, 0xC9, 0x9C, 0xEF , | |
0xA0, 0xE0, 0x3B, 0x4D, 0xAE, 0x2A, 0xF5, 0xB0, 0xC8, 0xEB, 0xBB, 0x3C, 0x83, 0x53, 0x99, 0x61 , | |
0x17, 0x2B, 0x04, 0x7E, 0xBA, 0x77, 0xD6, 0x26, 0xE1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0C, 0x7D | |
}; | |
__constant__ unsigned char MixCol_dev[4][4] = | |
{ | |
{ 0x02,0x03,0x01,0x01 }, | |
{ 0x01,0x02,0x03,0x01 }, | |
{ 0x01,0x01,0x02,0x03 }, | |
{ 0x03,0x01,0x01,0x02 } | |
}; | |
__constant__ unsigned char InvMixCol_dev[4][4] = { | |
{ 0x0e, 0x0b, 0x0d, 0x09 }, | |
{ 0x09, 0x0e, 0x0b, 0x0d }, | |
{ 0x0d, 0x09, 0x0e, 0x0b }, | |
{ 0x0b, 0x0d, 0x09, 0x0e } | |
}; | |
typedef struct { | |
unsigned char item[4][4]; | |
} Block; | |
__host__ long file_length(const char* filename) { | |
FILE * f = fopen(filename, "r"); | |
long length; | |
if (f) | |
{ | |
fseek(f, 0, SEEK_END); | |
length = ftell(f); | |
fclose(f); | |
return length; | |
} | |
else | |
return 0; | |
} | |
__host__ char * readSource(char* filename) { | |
char * source; | |
long length; | |
FILE * file = fopen(filename, "r"); | |
if (file) { | |
fseek(file, 0, SEEK_END); | |
length = ftell(file); | |
fseek(file, 0, SEEK_SET); | |
source = (char*)calloc(length, sizeof(char)); | |
if (source) | |
fread(source, 1, length, file); | |
fclose(file); | |
} | |
return source; | |
} | |
__host__ Block * plaintext_initialization(char * source, long source_length, int num_of_blocks) { | |
FILE * file = fopen(source, "r"); | |
//Block * plaintext = new Block[num_of_blocks]; | |
Block * plaintext = (Block *)malloc(num_of_blocks * sizeof(Block)); | |
int t = 0; | |
int i = 0; | |
int j = 0; | |
int k = 0; | |
while (i < source_length) { | |
unsigned char c; | |
c = NULL; | |
c = (char)fgetc(file); | |
plaintext[t].item[j][k] = c; | |
//fprintf(stdout, "%0x %c\n", c,c); | |
j++; | |
i++; | |
if (j == 4) { | |
k++; | |
j = 0; | |
} | |
if (i % 16 == 0) { | |
t++; | |
j = 0; | |
k = 0; | |
} | |
} | |
fclose(file); | |
return plaintext; | |
} | |
__host__ Block * key_scheduling() { | |
Block *keys = new Block[11]; | |
//initial key | |
unsigned char key[4][4] = { | |
{ 0x54, 0x73, 0x20, 0x67 }, | |
{ 0x68, 0x20, 0x4b, 0x20 }, | |
{ 0x61, 0x6d, 0x75, 0x46 }, | |
{ 0x74, 0x79, 0x6e, 0x75 } }; | |
for (int i = 0; i < 4; i++) | |
{ | |
for (int j = 0; j < 4; j++) | |
{ | |
keys[0].item[i][j] = key[i][j]; | |
} | |
} | |
// key scheduling algorithm | |
for (int k = 1; k <= 10; k++) { | |
Block tempNew; | |
Block tempOld = keys[k - 1]; | |
unsigned int temp[4] = { tempOld.item[0][3], tempOld.item[1][3], tempOld.item[2][3], tempOld.item[3][3] }; //last column of first key | |
//rotword //ROTWORD | |
unsigned int t = temp[0]; | |
temp[0] = temp[1]; | |
temp[1] = temp[2]; | |
temp[2] = temp[3]; | |
temp[3] = t; | |
//SUBBYTES | |
for (int i = 0; i < 4; i++) { | |
temp[i] = Sbox1[temp[i]]; | |
} | |
unsigned int temp2[4] = { tempOld.item[0][0], tempOld.item[1][0], tempOld.item[2][0], tempOld.item[3][0] }; //first column of first key | |
//RCON //xor second column and temp and Rcon 1st round | |
for (int i = 0; i < 4; i++) { | |
temp2[i] = temp[i] ^ tempOld.item[i][0]; | |
temp2[i] = temp2[i] ^ Rcon[i][k - 1]; | |
} | |
for (int i = 0; i < 4; i++) //first column of 2nd key | |
tempNew.item[i][0] = temp2[i]; | |
for (int j = 1; j < 4; j++) { | |
for (int i = 0; i < 4; i++) | |
{ | |
tempNew.item[i][j] = (tempNew.item[i][j - 1] ^ tempOld.item[i][j]); | |
} | |
} | |
keys[k] = tempNew; | |
} //end of key scheduling | |
return keys; | |
} | |
__host__ void printBlock(Block b) { | |
for (size_t i = 0; i < 4; i++) | |
{ | |
for (size_t j = 0; j < 4; j++) | |
{ | |
fprintf(stderr, " %0x ", b.item[i][j]); | |
} | |
fprintf(stderr, "\n"); | |
} | |
} | |
__host__ void printBlocks(Block * in, int num_of_blocks) { | |
for (size_t t = 0; t < num_of_blocks; t++) | |
{ | |
for (size_t i = 0; i < 4; i++) | |
{ | |
for (size_t j = 0; j < 4; j++) | |
{ | |
fprintf(stderr, " %c", in[t].item[i][j]); | |
} | |
fprintf(stderr, "\n"); | |
} | |
fprintf(stderr, "-----------------\n"); | |
} | |
} | |
__host__ void writeToFile(Block * in, int num_of_blocks, char * filename) { | |
FILE * file = fopen(filename, "w"); | |
if (file) { | |
for (size_t t = 0; t < num_of_blocks; t++) | |
{ | |
for (size_t i = 0; i < 4; i++) | |
{ | |
for (size_t j = 0; j < 4; j++) | |
{ | |
fprintf(file, "%c", in[t].item[j][i]); | |
} | |
} | |
} | |
} | |
else | |
fprintf(stderr, "Error opening file %s ", filename); | |
} | |
__host__ void writeToFile2(Block * in, int num_of_blocks, char * filename) { | |
FILE * file = fopen(filename, "w"); | |
if (file) { | |
int k = 0; | |
for (size_t t = 0; t < num_of_blocks; t++) | |
{ | |
for (size_t i = 0; i < 4; i++) | |
{ | |
for (size_t j = 0; j < 4; j++) | |
{ | |
fprintf(file, "%d. %c\n", k, in[t].item[j][i]); | |
k++; | |
} | |
} | |
} | |
} | |
else | |
fprintf(stderr, "Error opening file %s ", filename); | |
} | |
__host__ void padding(Block* plaintext, int num_of_blocks, int plaintext_length) { | |
int x = plaintext_length % 16; | |
int ii = x % 4; | |
int jj = x / 4; | |
fprintf(stderr, "\nChar for padding? "); | |
unsigned char c = getchar(); | |
fprintf(stderr, "\n"); | |
bool padding = false; | |
for (size_t i = 0; i < 4; i++) | |
{ | |
for (size_t j = 0; j < 4; j++) | |
{ | |
if (ii == j & jj == i) | |
padding = true; | |
if (padding) | |
plaintext[num_of_blocks - 1].item[j][i] = c; | |
} | |
} | |
} | |
__device__ unsigned int mixColumns(unsigned int m0, | |
unsigned int m1, | |
unsigned int m2, | |
unsigned int m3, | |
unsigned int c0, | |
unsigned int c1, | |
unsigned int c2, | |
unsigned int c3) { | |
unsigned int rez0 = 0; | |
unsigned int rez1 = 0; | |
unsigned int rez2 = 0; | |
unsigned int rez3 = 0; | |
switch (m0) | |
{ | |
case 1: | |
rez0 = c0; | |
break; | |
case 2: | |
rez0 = c0 << 1; | |
if ((((c0 & 0x80) >> 7) & 0x01) == 1) | |
rez0 ^= 0x1b; | |
break; | |
case 3: | |
rez0 = c0 << 1; | |
if ((((c0 & 0x80) >> 7) & 0x01) == 1) | |
rez0 ^= 0x1b; | |
rez0 ^= c0; | |
break; | |
default: | |
break; | |
} | |
switch (m1) | |
{ | |
case 1: | |
rez1 = c1; | |
break; | |
case 2: | |
rez1 = c1 << 1; | |
if ((((c1 & 0x80) >> 7) & 0x01) == 1) | |
rez1 ^= 0x1b; | |
break; | |
case 3: | |
rez1 = c1 << 1; | |
if ((((c1 & 0x80) >> 7) & 0x01) == 1) | |
rez1 ^= 0x1b; | |
rez1 ^= c1; | |
break; | |
default: | |
break; | |
} | |
switch (m2) | |
{ | |
case 1: | |
rez2 = c2; | |
break; | |
case 2: | |
rez2 = c2 << 1; | |
if ((((c2 & 0x80) >> 7) & 0x01) == 1) | |
rez2 ^= 0x1b; | |
break; | |
case 3: | |
rez2 = c2 << 1; | |
if ((((c2 & 0x80) >> 7) & 0x01) == 1) | |
rez2 ^= 0x1b; | |
rez2 ^= c2; | |
break; | |
default: | |
break; | |
} | |
switch (m3) | |
{ | |
case 1: | |
rez3 = c3; | |
break; | |
case 2: | |
rez3 = c3 << 1; | |
if ((((c3 & 0x80) >> 7) & 0x01) == 1) | |
rez3 ^= 0x1b; | |
break; | |
case 3: | |
rez3 = c3 << 1; | |
if ((((c3 & 0x80) >> 7) & 0x01) == 1) | |
rez3 ^= 0x1b; | |
rez3 ^= c3; | |
break; | |
default: | |
break; | |
} | |
return rez0 ^ rez1 ^ rez2 ^ rez3; | |
} | |
__device__ unsigned int inverseMixColumns(unsigned int in0, | |
unsigned int in1, | |
unsigned int in2, | |
unsigned int in3, | |
unsigned int p0, | |
unsigned int p1, | |
unsigned int p2, | |
unsigned int p3) { | |
return Multiply(p0, in0) ^ Multiply(p1, in1) ^ Multiply(p2, in2) ^ Multiply(p3, in3); | |
} | |
__global__ void encrypt(Block *keys, Block *plaintext, Block *ciphertext, unsigned int num_of_blocks) | |
{ | |
int i = threadIdx.x; | |
int j = threadIdx.y; | |
int blockNumber = blockIdx.y; | |
//initial round key | |
ciphertext[blockNumber].item[i][j] = keys[0].item[i][j] ^ plaintext[blockNumber].item[i][j]; | |
__syncthreads(); | |
for (int k = 1; k < 10; k++) | |
{ | |
//subBytes | |
ciphertext[blockNumber].item[i][j] = Sbox_dev[ciphertext[blockNumber].item[i][j]]; | |
__syncthreads(); | |
//shift rows | |
ciphertext[blockNumber].item[i][j] = ciphertext[blockNumber].item[i][(j + i) % 4]; | |
__syncthreads(); | |
//mixColumns | |
ciphertext[blockNumber].item[i][j] = mixColumns( | |
MixCol_dev[i][0], | |
MixCol_dev[i][1], | |
MixCol_dev[i][2], | |
MixCol_dev[i][3], | |
ciphertext[blockNumber].item[0][j], | |
ciphertext[blockNumber].item[1][j], | |
ciphertext[blockNumber].item[2][j], | |
ciphertext[blockNumber].item[3][j]); | |
__syncthreads(); | |
//add round key | |
ciphertext[blockNumber].item[i][j] = keys[k].item[i][j] ^ ciphertext[blockNumber].item[i][j]; | |
__syncthreads(); | |
} | |
//subbytes | |
ciphertext[blockNumber].item[i][j] = Sbox_dev[ciphertext[blockNumber].item[i][j]]; | |
__syncthreads(); | |
//rotwords | |
ciphertext[blockNumber].item[i][j] = ciphertext[blockNumber].item[i][(j + i) % 4]; | |
__syncthreads(); | |
//add round key | |
ciphertext[blockNumber].item[i][j] = keys[10].item[i][j] ^ ciphertext[blockNumber].item[i][j]; | |
__syncthreads(); | |
} | |
__global__ void decrypt(Block *keys, Block *plaintext, Block *ciphertext, unsigned int num_of_blocks) { | |
int i = threadIdx.x; | |
int j = threadIdx.y; | |
int blockNumber = blockIdx.y; | |
//inverse add round key | |
plaintext[blockNumber].item[i][j] = ciphertext[blockNumber].item[i][j] ^ keys[10].item[i][j]; | |
__syncthreads(); | |
for (size_t k = 9; k >= 1; k--) | |
{ | |
//inverse shift rows | |
plaintext[blockNumber].item[i][j] = plaintext[blockNumber].item[i][(4 + j - i) % 4]; | |
__syncthreads(); | |
//inverse subbytes | |
plaintext[blockNumber].item[i][j] = InvSbox_dev[plaintext[blockNumber].item[i][j]]; | |
__syncthreads(); | |
//inverse add round key | |
plaintext[blockNumber].item[i][j] = plaintext[blockNumber].item[i][j] ^ keys[k].item[i][j]; | |
__syncthreads(); | |
//inverse mixColumns | |
plaintext[blockNumber].item[i][j] = inverseMixColumns( | |
InvMixCol_dev[i][0], | |
InvMixCol_dev[i][1], | |
InvMixCol_dev[i][2], | |
InvMixCol_dev[i][3], | |
plaintext[blockNumber].item[0][j], | |
plaintext[blockNumber].item[1][j], | |
plaintext[blockNumber].item[2][j], | |
plaintext[blockNumber].item[3][j]); | |
__syncthreads(); | |
} | |
//inverse shift rows | |
plaintext[blockNumber].item[i][j] = plaintext[blockNumber].item[i][(4 + j - i) % 4]; | |
__syncthreads(); | |
//inverse subbytes | |
plaintext[blockNumber].item[i][j] = InvSbox_dev[plaintext[blockNumber].item[i][j]]; | |
__syncthreads(); | |
//inverse add round key | |
plaintext[blockNumber].item[i][j] = plaintext[blockNumber].item[i][j] ^ keys[0].item[i][j]; | |
__syncthreads(); | |
} | |
int main() | |
{ | |
cudaDeviceProp deviceProp; | |
cudaError_t cudaStatus; | |
cudaStatus = cudaSetDevice(0); | |
if (cudaStatus != cudaSuccess) { | |
fprintf(stderr, "cudaSetDevice failed! Do you have a CUDA-capable GPU installed?"); | |
} | |
//----------------------- device properties ------------------------------------ | |
cudaStatus = cudaGetDeviceProperties(&deviceProp, 0); | |
if (cudaStatus != cudaSuccess) { | |
fprintf(stderr, "cudaGetProp failed!"); | |
} | |
fprintf(stderr, "Device name: %s\n", deviceProp.name); | |
fprintf(stderr, "Major revision number: %d\n", deviceProp.major); | |
fprintf(stderr, "Minor revision Number: %d\n", deviceProp.minor); | |
fprintf(stderr, "Memory clock rate: %d MHz\n", deviceProp.memoryClockRate / 1000); | |
fprintf(stderr, "Clock Rate: %d MHz\n", deviceProp.clockRate / 1000); | |
fprintf(stderr, "Total Global Memory: %d MB\n", deviceProp.totalGlobalMem / 1024 / 1024); | |
fprintf(stderr, "L2 Cache memory size: %d KB\n", deviceProp.l2CacheSize / 1024); | |
fprintf(stderr, "Total shared mem per block: %d KB\n", deviceProp.sharedMemPerBlock / 1024); | |
fprintf(stderr, "Total const mem size: %d KB\n", deviceProp.totalConstMem / 1024); | |
fprintf(stderr, "Warp size: %d\n", deviceProp.warpSize); | |
fprintf(stderr, "Maximum block dimensions: %d x %d x %d\n", deviceProp.maxThreadsDim[0], | |
deviceProp.maxThreadsDim[1], | |
deviceProp.maxThreadsDim[2]); | |
fprintf(stderr, "Maximum grid dimensions: %d x %d x %d\n", deviceProp.maxGridSize[0], | |
deviceProp.maxGridSize[1], | |
deviceProp.maxGridSize[2]); | |
fprintf(stderr, "Number of muliprocessors: %d\n", deviceProp.multiProcessorCount); | |
fprintf(stderr, "Max threads per block: %d\n", deviceProp.maxThreadsPerBlock); | |
fprintf(stderr, "Supports conncurent kernels: %s\n\n\n", (deviceProp.concurrentKernels == 1) ? "Yes" : "No"); | |
//--------------- initialization --------------------- | |
Block *keys = key_scheduling(); | |
long plaintext_length = file_length("test5.txt"); | |
long num_of_blocks = (plaintext_length % 16 == 0) ? plaintext_length / 16 : plaintext_length / 16 + 1; | |
fprintf(stderr, "Plaintext length: %ld characters \n", plaintext_length); | |
fprintf(stderr, "Reading file and initializing....\n\n"); | |
Block *plaintext = plaintext_initialization("test5.txt", plaintext_length, num_of_blocks); | |
if (plaintext_length % 16 != 0) { | |
padding(plaintext, num_of_blocks, plaintext_length); | |
} | |
Block* ciphertext = new Block[num_of_blocks]; | |
Block* plaintext2 = new Block[num_of_blocks]; | |
//--------------- device memory allocation ------------------- | |
Block *keys_dev; | |
cudaStatus = cudaMalloc((void**)&keys_dev, 11 * sizeof(Block)); | |
if (cudaStatus != cudaSuccess) { | |
fprintf(stderr, "Allocating memory for key blocks failed!"); | |
goto Error; | |
} | |
Block *plaintext_dev; | |
cudaStatus = cudaMalloc((void**)&plaintext_dev, num_of_blocks * sizeof(Block)); | |
if (cudaStatus != cudaSuccess) { | |
fprintf(stderr, "Allocating memory for plaintext blocks failed!"); | |
goto Error; | |
} | |
Block *ciphertext_dev; | |
cudaStatus = cudaMalloc((void**)&ciphertext_dev, num_of_blocks * sizeof(Block)); | |
if (cudaStatus != cudaSuccess) { | |
fprintf(stderr, "Allocating memory for ciphertext blocks failed!"); | |
goto Error; | |
} | |
Block* plaintext2_dev = new Block[num_of_blocks]; | |
cudaStatus = cudaMalloc((void**)&plaintext2_dev, num_of_blocks * sizeof(Block)); | |
if (cudaStatus != cudaSuccess) { | |
fprintf(stderr, "Allocating memory for decrypted text blocks failed!"); | |
goto Error; | |
} | |
//---------------------------------------------------------------------- | |
//------------------ copying block from host to device ----------------- | |
cudaStatus = cudaMemcpy(keys_dev, keys, 11 * sizeof(Block), cudaMemcpyHostToDevice); | |
if (cudaStatus != cudaSuccess) { | |
fprintf(stderr, "Copying key blocks on device failed!"); | |
goto Error; | |
} | |
cudaStatus = cudaMemcpy(plaintext_dev, plaintext, num_of_blocks * sizeof(Block), cudaMemcpyHostToDevice); | |
if (cudaStatus != cudaSuccess) { | |
fprintf(stderr, "Copying plaintext blocks on device failed!"); | |
goto Error; | |
} | |
cudaStatus = cudaMemcpy(ciphertext_dev, ciphertext, num_of_blocks * sizeof(Block), cudaMemcpyHostToDevice); | |
if (cudaStatus != cudaSuccess) { | |
fprintf(stderr, "Copying ciphertext blocks on device failed!"); | |
goto Error; | |
} | |
cudaStatus = cudaMemcpy(plaintext2_dev, plaintext2, num_of_blocks * sizeof(Block), cudaMemcpyHostToDevice); | |
if (cudaStatus != cudaSuccess) { | |
fprintf(stderr, "Copying decrypted blocks on device failed!"); | |
goto Error; | |
} | |
//----------------------------------------------------------------------- | |
//---------------------- executing kernel ------------------------------- | |
dim3 threadsPerBlock(4, 4); | |
dim3 numBlocks(1, num_of_blocks); | |
//------------------ encryption -------------------------------------- | |
fprintf(stderr, "Encryption.........\n\n"); | |
cudaEvent_t startEnc, stopEnc; | |
float timeEnc; | |
cudaEventCreate(&startEnc); | |
cudaEventCreate(&stopEnc); | |
cudaEventRecord(startEnc, 0); | |
encrypt << <numBlocks, threadsPerBlock >> > (keys_dev, plaintext_dev, ciphertext_dev, num_of_blocks); | |
cudaEventRecord(stopEnc, 0); | |
cudaEventSynchronize(stopEnc); | |
cudaEventElapsedTime(&timeEnc, startEnc, stopEnc); | |
fprintf(stderr, "Encryption time %.2f ms\n", timeEnc); | |
//------------------ decryption -------------------------------------- | |
fprintf(stderr, "Decryption.......\n\n"); | |
cudaEvent_t startDec, stopDec; | |
float timeDec; | |
cudaEventCreate(&startDec); | |
cudaEventCreate(&stopDec); | |
cudaEventRecord(startDec, 0); | |
decrypt << <numBlocks, threadsPerBlock >> > (keys_dev, plaintext2_dev, ciphertext_dev, num_of_blocks); | |
cudaEventRecord(stopDec, 0); | |
cudaEventSynchronize(stopDec); | |
cudaEventElapsedTime(&timeDec, startDec, stopDec); | |
fprintf(stderr, "Decryption time %.2f ms\n", timeDec); | |
//----------------------------------------------------------------------- | |
//------------- copying from device to host ----------------------------- | |
cudaStatus = cudaMemcpy(ciphertext, ciphertext_dev, num_of_blocks * sizeof(Block), cudaMemcpyDeviceToHost); | |
if (cudaStatus != cudaSuccess) { | |
fprintf(stderr, "Copying ciphertext blocks on host failed!"); | |
goto Error; | |
} | |
cudaStatus = cudaMemcpy(plaintext2, plaintext2_dev, num_of_blocks * sizeof(Block), cudaMemcpyDeviceToHost); | |
if (cudaStatus != cudaSuccess) { | |
fprintf(stderr, "Copying decrypted blocks on host failed!"); | |
goto Error; | |
} | |
//----------------------------------------------------------------------- | |
fprintf(stderr, "Writing to files....\n\n"); | |
writeToFile(ciphertext, num_of_blocks, "kriptovano.txt"); | |
writeToFile(plaintext2, num_of_blocks, "dekriptovano.txt"); | |
//writeToFile2(ciphertext, num_of_blocks, "kriptovanoCHAR.txt"); | |
//writeToFile2(plaintext2, num_of_blocks, "dekriptovanoCHAR.txt"); | |
Error: | |
cudaFree(keys_dev); | |
cudaFree(plaintext_dev); | |
cudaFree(ciphertext_dev); | |
fprintf(stderr, "\nPress enter to end......"); | |
getchar(); | |
getchar(); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment