Created
November 3, 2016 16:43
-
-
Save fador/3ab7d4f2c1ef4d262a49b7af0e088d6b to your computer and use it in GitHub Desktop.
arc4 hasher in OpenCL for tietoturvahaaste.fi challenge
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
Copyright (c) 2016, Marko Viitanen (Fador) | |
Permission to use, copy, modify, and/or distribute this software for any purpose | |
with or without fee is hereby granted, provided that the above copyright notice | |
and this permission notice appear in all copies. | |
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH | |
REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY | |
AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, | |
INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM | |
LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE | |
OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR | |
PERFORMANCE OF THIS SOFTWARE. | |
*/ | |
#include <iostream> | |
#include <Windows.h> | |
#include <cstdio> | |
#include <cstdlib> | |
#include <CL/opencl.h> | |
#include <ctime> | |
#include <random> | |
#include <fstream> | |
char *arc4_brute; | |
unsigned long long microTime() | |
{ | |
#ifndef WIN32 | |
struct timespec now; | |
clock_gettime(CLOCK_MONOTONIC, &now); | |
return (now.tv_sec*(uint64_t)1000000 + now.tv_nsec/(uint64_t)1000); | |
#else | |
FILETIME ft; | |
GetSystemTimeAsFileTime(&ft); | |
unsigned long long out = ((unsigned long long)ft.dwHighDateTime)<<32 | (unsigned long long)ft.dwLowDateTime; | |
out /= 10; // from 100ns to 1us | |
return out; | |
#endif | |
} | |
int main(int argc, char **argv) | |
{ | |
FILE *src = NULL; | |
cl_int error; | |
cl_uint num; | |
cl_uint ok = 1; | |
cl_context context; | |
unsigned long long GPUstart; | |
unsigned long long GPUend; | |
unsigned long long CPUstart; | |
unsigned long long CPUend; | |
unsigned char *pBuf = NULL; | |
if (argc < 3) { | |
printf("Usage:\r\n"); | |
printf("arc4 <file> <keylen> (<platform ID> <start index>)\r\n"); | |
return 0; | |
} | |
src = fopen(argv[1], "rb"); | |
if (src == NULL) { | |
printf("Cannot open input file\r\n"); | |
return -1; | |
} | |
fseek(src, 0, SEEK_END); | |
size_t size = ftell(src); | |
rewind(src); | |
pBuf = (unsigned char *)calloc(size, 1); | |
if(fread(pBuf, 1, size, src) != size) | |
{ | |
printf("Failed reading the file!\r\n"); | |
return EXIT_FAILURE; | |
} | |
fclose(src); | |
cl_short keylen = atoi(argv[2]); | |
int selected_platform = -1; | |
if (argc >= 4) { | |
selected_platform = atoi(argv[3]); | |
} | |
uint64_t offset = 0; | |
if (argc >= 5) { | |
offset = atoll(argv[4]); | |
} | |
cl_platform_id platform = NULL; | |
// cl_context context; | |
cl_command_queue queue; | |
cl_device_id device; | |
// Platform | |
error = clGetPlatformIDs(0, NULL, &num); | |
if (error != CL_SUCCESS) { | |
std::cout << "Error getting platform id: " << error << std::endl; | |
exit(error); | |
} | |
std::cout << "Number of platforms: " << num << std::endl; | |
cl_platform_id* platforms = new cl_platform_id[num]; | |
error = clGetPlatformIDs(num, platforms, NULL); | |
if (error != CL_SUCCESS) { | |
std::cout << "Error getting device ids: " << error << std::endl; | |
exit(error); | |
} | |
char platformName[100]; | |
// Use given Platform ID or search for an AMD GPU | |
if (selected_platform != -1) { | |
if (selected_platform >= num) { | |
printf("Invalid platform ID\r\n"); | |
return EXIT_FAILURE; | |
} | |
clGetPlatformInfo(platforms[selected_platform], | |
CL_PLATFORM_VENDOR, | |
sizeof(platformName), | |
platformName, | |
NULL); | |
platform = platforms[selected_platform]; | |
printf("Selected platform: %s\r\n", platformName); | |
} else { | |
for (unsigned i = 0; i < num; ++i) { | |
error = clGetPlatformInfo(platforms[i], | |
CL_PLATFORM_VENDOR, | |
sizeof(platformName), | |
platformName, | |
NULL); | |
if (error != CL_SUCCESS) { | |
std::cout << "clGetPlatformInfo failed." << std::endl; | |
} | |
platform = platforms[i]; | |
if (!strcmp(platformName, "Advanced Micro Devices, Inc.")) { | |
break; | |
} | |
} | |
} | |
if (platform == NULL) { | |
std::cout << "NULL platform found so Exiting Application." << std::endl; | |
exit(error); | |
} | |
cl_context_properties cps[3] = | |
{ | |
CL_CONTEXT_PLATFORM, | |
(cl_context_properties)platform, | |
0 | |
}; | |
// Context | |
context = clCreateContextFromType(cps, CL_DEVICE_TYPE_GPU, NULL, NULL, &error); | |
if (error != CL_SUCCESS) { | |
std::cout << "Error creating context: " << error << std::endl; | |
exit(error); | |
} | |
size_t deviceListSize = 0; | |
int status = 0; | |
status = clGetContextInfo( | |
context, | |
CL_CONTEXT_DEVICES, | |
0, | |
NULL, | |
&deviceListSize); | |
if (error != CL_SUCCESS) { | |
std::cout << "clGetContextInfo failed." << error << std::endl; | |
exit(error); | |
} | |
int deviceCount = (int)(deviceListSize / sizeof(cl_device_id)); | |
cl_device_id *devices; | |
devices = (cl_device_id *)malloc(deviceListSize); | |
/* Now, get the device list data */ | |
status = clGetContextInfo(context, | |
CL_CONTEXT_DEVICES, | |
deviceListSize, | |
devices, | |
NULL); | |
cl_command_queue_properties prop = 0; | |
// Command-queue | |
queue = clCreateCommandQueue(context, devices[0], prop, &error); | |
if (error != CL_SUCCESS) { | |
std::cout << "Error creating command queue: " << error << std::endl; | |
exit(error); | |
} | |
std::ifstream in("arc4_search.cl"); | |
std::string contents((std::istreambuf_iterator<char>(in)), | |
std::istreambuf_iterator<char>()); | |
arc4_brute = (char *)contents.c_str(); | |
size_t progsize = strlen(arc4_brute); | |
cl_program program = clCreateProgramWithSource(context, 1, (const char **)&arc4_brute, &progsize, &error); | |
if (error != CL_SUCCESS) { | |
std::cout << "Error clCreateProgramWithSource: " << error << std::endl; | |
exit(error); | |
} | |
// Builds the program | |
error = clBuildProgram(program, 1, &devices[0], NULL, NULL, NULL); | |
if (error != CL_SUCCESS) { | |
std::cout << "Error clBuildProgram: " << error << std::endl; | |
char *build_log; | |
size_t ret_val_size; | |
clGetProgramBuildInfo(program, devices[0], CL_PROGRAM_BUILD_LOG, 0, NULL, &ret_val_size); | |
build_log = new char[ret_val_size + 1]; | |
clGetProgramBuildInfo(program, devices[0], CL_PROGRAM_BUILD_LOG, ret_val_size, build_log, NULL); | |
std::cout << build_log << std::endl; | |
exit(error); | |
} | |
size_t image_support; size_t image_support_size; | |
error = clGetDeviceInfo(devices[0], CL_DEVICE_IMAGE_SUPPORT, sizeof(size_t), &image_support, &image_support_size); | |
if (error != CL_SUCCESS) { | |
std::cout << "Error clGetDeviceInfo: " << error << std::endl; | |
exit(error); | |
} | |
size_t image_support_height; size_t image_support_height_size; | |
error = clGetDeviceInfo(devices[0], CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof(size_t), &image_support_height, &image_support_height_size); | |
if (error != CL_SUCCESS) { | |
std::cout << "Error clGetDeviceInfo: " << error << std::endl; | |
exit(error); | |
} | |
size_t image_support_width; size_t image_support_width_size; | |
error = clGetDeviceInfo(devices[0], CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof(size_t), &image_support_width, &image_support_width_size); | |
if (error != CL_SUCCESS) { | |
std::cout << "Error clGetDeviceInfo: " << error << std::endl; | |
exit(error); | |
} | |
std::cout << "image2d support: " << image_support << " (" << image_support_width << "x" << image_support_height << ")" << std::endl; | |
// Extracting the kernel | |
cl_kernel arc4_search_k = clCreateKernel(program, "arc4_search", &error); | |
if (error != CL_SUCCESS) { | |
std::cout << "Error clCreateKernel: " << error << std::endl; | |
exit(error); | |
} | |
//Fetch information about compute device | |
size_t pref_workg_size_mult; | |
size_t max_workg_size; | |
size_t max_workit_sizes[3]; | |
clGetKernelWorkGroupInfo(arc4_search_k, devices[0], | |
CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, | |
sizeof(size_t), (void*)&pref_workg_size_mult, NULL); | |
clGetDeviceInfo(devices[0], | |
CL_DEVICE_MAX_WORK_GROUP_SIZE, | |
sizeof(size_t), (void*)&max_workg_size, NULL); | |
clGetDeviceInfo(devices[0], | |
CL_DEVICE_MAX_WORK_ITEM_SIZES, | |
sizeof(size_t) * 3, (void*)max_workit_sizes, NULL); | |
std::cout << "CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE: " << pref_workg_size_mult << std::endl; | |
std::cout << "CL_DEVICE_MAX_WORK_GROUP_SIZE: " << max_workg_size << std::endl; | |
std::cout << "CL_DEVICE_MAX_WORK_ITEM_SIZES: " << max_workit_sizes[0] << " " << max_workit_sizes[1] << " " << max_workit_sizes[2] << std::endl; | |
char* best_match_string = new char[64]; | |
cl_mem output_data_str = clCreateBuffer(context, CL_MEM_WRITE_ONLY, 64*sizeof(cl_char), NULL, &error); | |
#define PAGE_LOCKED_BUF_SIZE 32 * 1024 * 1024 | |
cl_mem input_string = clCreateBuffer(context, CL_MEM_READ_ONLY, 64, NULL, &error); | |
cl_mem input_keyposition = clCreateBuffer(context, CL_MEM_READ_ONLY, 256*sizeof(cl_int), NULL, &error); | |
cl_mem page_locked_buffer = clCreateBuffer( context, CL_MEM_WRITE_ONLY|CL_MEM_ALLOC_HOST_PTR, PAGE_LOCKED_BUF_SIZE, NULL, &status ); | |
char *page_locked_ptr = (char *)clEnqueueMapBuffer( queue, page_locked_buffer, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, | |
0, PAGE_LOCKED_BUF_SIZE, 0, NULL, NULL, &status ); | |
// Calculate how many actual key positions are used because of the broken algorithm in arc4 | |
unsigned char keyPositions[256][256] = { 0 }; | |
int keyLens[256] = { 0 }; | |
for (int keylen_t = 1; keylen_t < 256; keylen_t++) { | |
for (int i = 0; i < 256; i++) keyPositions[keylen_t][i & keylen_t] = 1; | |
for (int i = 0; i < keylen_t; i++) { | |
if (keyPositions[keylen_t][i]) keyLens[keylen_t]++; | |
} | |
} | |
int key_changable_positions[256] = { 0 }; | |
int cur_key_pos = 0; | |
for (int i = 0; i < keylen; i++) { | |
if (keyPositions[keylen][i]) key_changable_positions[cur_key_pos++] = i; | |
} | |
// Copy the crypted input to the page locked memory | |
memcpy(page_locked_ptr, pBuf, 41); | |
// Move page locked memory to the GPU input_string | |
error = clEnqueueWriteBuffer(queue, input_string, CL_TRUE, 0, 41, page_locked_ptr, 0, NULL, NULL ); | |
memcpy(page_locked_ptr, key_changable_positions, 256*sizeof(int)); | |
error = clEnqueueWriteBuffer(queue, input_keyposition, CL_TRUE, 0, 256*sizeof(cl_int), page_locked_ptr, 0, NULL, NULL); | |
if (error != CL_SUCCESS) { | |
std::cout << "Error clEnqueueWriteImage: " << error << std::endl; | |
exit(error); | |
} | |
// Number of work-items per work-group, use recommended | |
size_t local_ws[] = { max_workg_size }; | |
// Split work on multiple sets because GPU doesn't like to have too many work units at the same time | |
size_t global_ws[] = { 256 * 256 * 25 }; // Total number of work-items | |
printf("changable_len: %d\r\n", keyLens[keylen]); | |
uint64_t total_work = pow(36, keyLens[keylen]); | |
printf("total_work: %d\r\n", total_work); | |
bool more_work = true; | |
total_work -= offset; | |
int work_units = total_work / global_ws[0]; | |
// Enqueuing parameters | |
error = clSetKernelArg(arc4_search_k, 0, sizeof(keylen), &keylen); | |
error = clSetKernelArg(arc4_search_k, 1, sizeof(input_string), &input_string); | |
error |= clSetKernelArg(arc4_search_k, 2, sizeof(output_data_str), &output_data_str); | |
error |= clSetKernelArg(arc4_search_k, 3, sizeof(input_keyposition), &input_keyposition); | |
error |= clSetKernelArg(arc4_search_k, 4, sizeof(cl_int), &keyLens[keylen]); | |
if (error != CL_SUCCESS) { | |
std::cout << "Error clSetKernelArg: " << error << std::endl; | |
exit(error); | |
} | |
std::cout << "Worksize: " << global_ws[0] << std::endl; | |
// Do work on limited sets | |
while (more_work) { | |
GPUstart = microTime(); | |
// Update parameter for offset with each round | |
error |= clSetKernelArg(arc4_search_k, 5, sizeof(cl_ulong), &offset); | |
error = clEnqueueNDRangeKernel(queue, arc4_search_k, 1, NULL, global_ws, NULL, 0, NULL, NULL); | |
if (error != CL_SUCCESS) { | |
std::cout << "Error clEnqueueNDRangeKernel: " << error << std::endl; | |
exit(error); | |
} | |
// Reading back | |
error = clEnqueueReadBuffer(queue, output_data_str, CL_TRUE, 0, 41*sizeof(cl_char), best_match_string, 0, NULL, NULL); | |
if (error != CL_SUCCESS) { | |
std::cout << "Error clEnqueueReadBuffer: " << error << std::endl; | |
exit(error); | |
} | |
CPUstart = microTime(); | |
best_match_string[41] = 0; | |
std::cout << best_match_string << std::endl; | |
offset+= global_ws[0]; | |
if(!work_units) more_work = false; | |
std::cout << "Finished task " << offset << "/" << total_work << " (task " << work_units << ")" << std::endl; | |
work_units--; | |
if (!strncmp("http://", best_match_string, 7)) more_work = false; | |
GPUend = microTime(); | |
std::cout << (uint64_t)(global_ws[0]/(((double)GPUend - (double)GPUstart)/1000000.0)) << " hash/s" << std::endl; | |
} | |
// Cleaning up | |
clReleaseKernel(arc4_search_k); | |
clReleaseCommandQueue(queue); | |
clReleaseContext(context); | |
return EXIT_SUCCESS; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
typedef struct _arc4state | |
{ | |
unsigned char sbox[256]; | |
} arc4state; | |
void init_arc4(arc4state *state, char *key, int keylen) | |
{ | |
int j; | |
unsigned char tmp; | |
for(int i = 0; i < 256; i++) | |
{ | |
state->sbox[i] = i; | |
} | |
j = 0; | |
for(int i = 0; i < 256; i++) | |
{ | |
j = (state->sbox[i] + j + key[i & keylen]) & 0xFF; | |
tmp = state->sbox[i]; | |
state->sbox[i] = state->sbox[j]; | |
state->sbox[j] = tmp; | |
} | |
} | |
void arc4(arc4state *state, const global unsigned char *inbuf, int buflen, global unsigned char *output_data_str) | |
{ | |
unsigned char i,j,u,m,t; | |
i = 0; | |
j = 0; | |
const char match[7] = "http://"; | |
int curbuf = buflen; | |
int decrypting_ok = 0; | |
int readpos = 0; | |
while(curbuf > 0) | |
{ | |
i++; | |
m = state->sbox[i]; | |
j += (m + 1); | |
u = state->sbox[j]; | |
state->sbox[i] = u; | |
state->sbox[j] = m; | |
t = (m + u); | |
unsigned char outval = inbuf[readpos++] ^ state->sbox[t]; | |
if(decrypting_ok > 6 || outval == match[decrypting_ok]) { | |
output_data_str[decrypting_ok] = outval; | |
decrypting_ok ++; | |
} else { | |
return; | |
} | |
curbuf--; | |
} | |
} | |
__kernel void arc4_search(short keylength_under_testing, | |
const global unsigned char *input_string, | |
global unsigned char *output_data_str, | |
const global int *key_changable_positions, | |
int current_brute_force_items_in_key, | |
ulong offset) { | |
const ulong pos = get_global_id(0) + offset; | |
char values[37] = "abcdefghijklmnopqrstuvwxyz0123456789."; | |
ulong curval = pos; | |
arc4state state; | |
char input_key[256]; | |
for(int i = 0; i < current_brute_force_items_in_key; i++) { | |
ulong temp = (curval/36); | |
input_key[key_changable_positions[i]] = values[curval-(temp*36)]; | |
curval = temp; | |
} | |
input_key[keylength_under_testing] = 0; | |
init_arc4(&state, input_key, keylength_under_testing); | |
arc4(&state, input_string, 41, output_data_str); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment