martinjacobd/jit.c

## jit.c
/*
 * Copyright (c) 2023 Jacob Martin
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in all
 * copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */

/**
 * Please, please please please, take the lack of warranty seriously.
 * This program literally executes arbitrary code with no checking whatsoever.
 *
 * Takes hexadecimal bytes from stdin and executes them as machine code,
 * assuming they implement a function that takes no arguments
 * and returns an int
 * prints out the returned value as a hex number
 * Sample input on x86_64: echo b8ff000000c3 | ./jit
 * makes a function that returns 0xff
*/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <error.h>
#include <errno.h>
#include <unistd.h>
#include <sys/mman.h>

/**
 * Takes a string of ascii bytes of length `len` in string
 * `str` and convertes them into hexadecimal *bytes*, stored
 * in `buf` with capacity `size`. Upon success, reeturns the
 * number of bytes written to buf.
 *
 * For example, "0102"  becomes { 0x01, 0x02 } regardless of
 * the endianness of the system. The string may be specified
 * with "0x0102..." or "0x01 0x02" or "0102" or "01 02". Or a
 * mix such as "01 0x02".
 *
 * Upon error, the function returns 0 and the contents of
 * `buf` are unspecified, except that it will never write more than
 * `size` bytes to it.
 *
 * Including less than one full byte before whitespace or 0x
 * sequence is an error. For instance "0x1 0x20" is an error.
 * "121" is also an error since it does not specify full bytes.
 *
 * Using any characters other than 0..9, a..f, A..F, ascii whitespace,
 * the sequence "0x", and the null character is an error.
 *
 * The function will read at most len bytes from str, or until it
 * encounters a null character, whichever comes first. So for
 * instance, if you have a string like "It is 0x01.", you
 * can pass in str+6 and len 4 to extract only the hexadecimal.
 *
 */
ssize_t convert_hex_to_bytes(const char str[], size_t len,
			     unsigned char* buf, size_t size)
{
  enum char_type {
    INVALID = 0,
    ZERO,
    POSITIVE_DIGIT,
    EX,
    STRING_END,
    SPACE,
  };

  const enum char_type char_table[256] = {
    ['0'] = ZERO,
    ['1' ... '9'] = POSITIVE_DIGIT,
    ['a' ... 'f'] = POSITIVE_DIGIT,
    ['A' ... 'F'] = POSITIVE_DIGIT,
    ['x'] = EX,
    ['\0'] = STRING_END,
    [' '] = SPACE,
    ['\n'] = SPACE,
    ['\t'] = SPACE
  };

  const unsigned char number_table[128] = {
    ['0'] = 0,
    ['1'] = 1,
    ['2'] = 2,
    ['3'] = 3,
    ['4'] = 4,
    ['5'] = 5,
    ['6'] = 6,
    ['7'] = 7,
    ['8'] = 8,
    ['9'] = 9,
    ['a'] = 10,
    ['A'] = 10,
    ['b'] = 11,
    ['B'] = 11,
    ['c'] = 12,
    ['C'] = 12,
    ['d'] = 13,
    ['D'] = 13,
    ['e'] = 14,
    ['E'] = 14,
    ['f'] = 15,
    ['F'] = 15
  };

  enum {
    OUTSIDE_OF_BYTE, /* we have begun the string or have
		      * just read a low nibble */
    ZERO_BEGINNING,  /* we have read a 0 in possibly the high
		      * nibble position--
		      * ambiguous state, could be "0x" or "0F" etc. */
    HIGH_NIBBLE,     /* we have unambiguously read the high nibble */
  } state = OUTSIDE_OF_BYTE;

  size_t place_in_buf = 0;
  unsigned char current_byte = 0;

  for (size_t i = 0; i < len; i++) {
    unsigned char c = str[i];

    switch (state) {
    case OUTSIDE_OF_BYTE:
      switch(char_table[c]) {
      case ZERO:
	state = ZERO_BEGINNING;
	break;

      case POSITIVE_DIGIT:
	current_byte = 16 * number_table[c];
	state = HIGH_NIBBLE;
	break;

      case STRING_END:
	goto NORMAL_RETURN;

      case SPACE:
	break;

      case INVALID: /* fallthrough */
      case EX:
	goto ERROR;
      }
      break;

    case ZERO_BEGINNING:
      switch(char_table[c]) {
      case ZERO: /* fallthrough */
      case POSITIVE_DIGIT:
	current_byte += number_table[c];
	if (place_in_buf <= size) {
	  buf[place_in_buf++] = current_byte;
	  current_byte = 0;
	} else {
	  goto ERROR;
	}
	state = OUTSIDE_OF_BYTE;
	break;

      case EX:
	state = OUTSIDE_OF_BYTE;
	break;

      case INVALID: /* fallthrough */
      case STRING_END: /* fallthrough */
      case SPACE:
	goto ERROR;
      }
      break;

    case HIGH_NIBBLE:
      switch(char_table[c]) {
      case ZERO: /* fallthrough */
      case POSITIVE_DIGIT:
	current_byte += number_table[c];
	if (place_in_buf <= size) {
	  buf[place_in_buf++] = current_byte;
	  current_byte = 0;
	} else {
	  goto ERROR;
	}
	state = OUTSIDE_OF_BYTE;
	break;

      case INVALID: /* fallthrough */
      case EX: /* fallthrough */
      case STRING_END: /* fallthrough */
      case SPACE:
	goto ERROR;
      }
      break;
    }
  }

  if (state == OUTSIDE_OF_BYTE) {
  NORMAL_RETURN:
    return place_in_buf;
  } else {
  ERROR:
    return -1;
  }
}

int test(void)
{
  unsigned char buf[100];
  const char *test = "0x01 02 03 0x04 05 06 07 08 09 0A 0b 0c 0D 0x0E 0F 0x10 0x21 0x22 0x35 0xAA 0xFF";
  size_t n_written = convert_hex_to_bytes(test, 4,
					  buf, 100);

  for (size_t i = 0; i < n_written; i++)
    printf("%#04hhx ", buf[i]);
  printf("\n");
  return 0;
}

int main(void)
{
  char *input_buffer = NULL;
  size_t input_buffer_len = 0;
  size_t write_region_size = getpagesize();
  unsigned char *write_region = mmap(NULL,
				     write_region_size,
				     PROT_READ | PROT_WRITE,
				     MAP_SHARED | MAP_ANONYMOUS,
				     -1,
				     0);
  ssize_t chars_read;
  ssize_t bytes_written = -1;

  if (write_region == NULL) {
    error_at_line(EXIT_FAILURE, errno,
		  __FILE__, __LINE__,
		  "Couldn't allocate exec page\n");
  }

  printf("Input bytes to execute (b8 ff 00 00 00 c3, for instance) ");

  if ((chars_read = getline(&input_buffer,
			      &input_buffer_len,
			      stdin)) == -1) {
    error_at_line(EXIT_FAILURE, errno,
		  __FILE__, __LINE__,
		  "Error reading from stdin.");
  }

  while ((bytes_written = convert_hex_to_bytes(input_buffer,
					       chars_read,
					       write_region,
					       write_region_size)) == -1) {
    printf("Invalid input (allowed formats: 0x0A AA aF 01). Try again: ");
    chars_read = getline(&input_buffer, &input_buffer_len, stdin);
  }

  mprotect(write_region, getpagesize(), PROT_READ | PROT_EXEC);

  {
    int (*jmp_func)(void) = (void *) write_region;

    printf("Returned value: %x\n", jmp_func());
  }

  return 0;
}
	/*
	* Copyright (c) 2023 Jacob Martin
	*
	* Permission is hereby granted, free of charge, to any person obtaining a copy
	* of this software and associated documentation files (the "Software"), to deal
	* in the Software without restriction, including without limitation the rights
	* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
	* copies of the Software, and to permit persons to whom the Software is
	* furnished to do so, subject to the following conditions:
	*
	* The above copyright notice and this permission notice shall be included in all
	* copies or substantial portions of the Software.
	*
	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
	* SOFTWARE.
	*/

	/**
	* Please, please please please, take the lack of warranty seriously.
	* This program literally executes arbitrary code with no checking whatsoever.
	*
	* Takes hexadecimal bytes from stdin and executes them as machine code,
	* assuming they implement a function that takes no arguments
	* and returns an int
	* prints out the returned value as a hex number
	* Sample input on x86_64: echo b8ff000000c3 \| ./jit
	* makes a function that returns 0xff
	*/

	#include <stdio.h>
	#include <stdlib.h>
	#include <string.h>
	#include <error.h>
	#include <errno.h>
	#include <unistd.h>
	#include <sys/mman.h>

	/**
	* Takes a string of ascii bytes of length `len` in string
	* `str` and convertes them into hexadecimal bytes, stored
	* in `buf` with capacity `size`. Upon success, reeturns the
	* number of bytes written to buf.
	*
	* For example, "0102" becomes { 0x01, 0x02 } regardless of
	* the endianness of the system. The string may be specified
	* with "0x0102..." or "0x01 0x02" or "0102" or "01 02". Or a
	* mix such as "01 0x02".
	*
	* Upon error, the function returns 0 and the contents of
	* `buf` are unspecified, except that it will never write more than
	* `size` bytes to it.
	*
	* Including less than one full byte before whitespace or 0x
	* sequence is an error. For instance "0x1 0x20" is an error.
	* "121" is also an error since it does not specify full bytes.
	*
	* Using any characters other than 0..9, a..f, A..F, ascii whitespace,
	* the sequence "0x", and the null character is an error.
	*
	* The function will read at most len bytes from str, or until it
	* encounters a null character, whichever comes first. So for
	* instance, if you have a string like "It is 0x01.", you
	* can pass in str+6 and len 4 to extract only the hexadecimal.
	*
	*/
	ssize_t convert_hex_to_bytes(const char str[], size_t len,
	unsigned char* buf, size_t size)
	{
	enum char_type {
	INVALID = 0,
	ZERO,
	POSITIVE_DIGIT,
	EX,
	STRING_END,
	SPACE,
	};

	const enum char_type char_table[256] = {
	['0'] = ZERO,
	['1' ... '9'] = POSITIVE_DIGIT,
	['a' ... 'f'] = POSITIVE_DIGIT,
	['A' ... 'F'] = POSITIVE_DIGIT,
	['x'] = EX,
	['\0'] = STRING_END,
	[' '] = SPACE,
	['\n'] = SPACE,
	['\t'] = SPACE
	};

	const unsigned char number_table[128] = {
	['0'] = 0,
	['1'] = 1,
	['2'] = 2,
	['3'] = 3,
	['4'] = 4,
	['5'] = 5,
	['6'] = 6,
	['7'] = 7,
	['8'] = 8,
	['9'] = 9,
	['a'] = 10,
	['A'] = 10,
	['b'] = 11,
	['B'] = 11,
	['c'] = 12,
	['C'] = 12,
	['d'] = 13,
	['D'] = 13,
	['e'] = 14,
	['E'] = 14,
	['f'] = 15,
	['F'] = 15
	};

	enum {
	OUTSIDE_OF_BYTE, /* we have begun the string or have
	* just read a low nibble */
	ZERO_BEGINNING, /* we have read a 0 in possibly the high
	* nibble position--
	* ambiguous state, could be "0x" or "0F" etc. */
	HIGH_NIBBLE, /* we have unambiguously read the high nibble */
	} state = OUTSIDE_OF_BYTE;

	size_t place_in_buf = 0;
	unsigned char current_byte = 0;

	for (size_t i = 0; i < len; i++) {
	unsigned char c = str[i];

	switch (state) {
	case OUTSIDE_OF_BYTE:
	switch(char_table[c]) {
	case ZERO:
	state = ZERO_BEGINNING;
	break;

	case POSITIVE_DIGIT:
	current_byte = 16 * number_table[c];
	state = HIGH_NIBBLE;
	break;

	case STRING_END:
	goto NORMAL_RETURN;

	case SPACE:
	break;

	case INVALID: /* fallthrough */
	case EX:
	goto ERROR;
	}
	break;

	case ZERO_BEGINNING:
	switch(char_table[c]) {
	case ZERO: /* fallthrough */
	case POSITIVE_DIGIT:
	current_byte += number_table[c];
	if (place_in_buf <= size) {
	buf[place_in_buf++] = current_byte;
	current_byte = 0;
	} else {
	goto ERROR;
	}
	state = OUTSIDE_OF_BYTE;
	break;

	case EX:
	state = OUTSIDE_OF_BYTE;
	break;

	case INVALID: /* fallthrough */
	case STRING_END: /* fallthrough */
	case SPACE:
	goto ERROR;
	}
	break;

	case HIGH_NIBBLE:
	switch(char_table[c]) {
	case ZERO: /* fallthrough */
	case POSITIVE_DIGIT:
	current_byte += number_table[c];
	if (place_in_buf <= size) {
	buf[place_in_buf++] = current_byte;
	current_byte = 0;
	} else {
	goto ERROR;
	}
	state = OUTSIDE_OF_BYTE;
	break;

	case INVALID: /* fallthrough */
	case EX: /* fallthrough */
	case STRING_END: /* fallthrough */
	case SPACE:
	goto ERROR;
	}
	break;
	}
	}

	if (state == OUTSIDE_OF_BYTE) {
	NORMAL_RETURN:
	return place_in_buf;
	} else {
	ERROR:
	return -1;
	}
	}

	int test(void)
	{
	unsigned char buf[100];
	const char *test = "0x01 02 03 0x04 05 06 07 08 09 0A 0b 0c 0D 0x0E 0F 0x10 0x21 0x22 0x35 0xAA 0xFF";
	size_t n_written = convert_hex_to_bytes(test, 4,
	buf, 100);

	for (size_t i = 0; i < n_written; i++)
	printf("%#04hhx ", buf[i]);
	printf("\n");
	return 0;
	}

	int main(void)
	{
	char *input_buffer = NULL;
	size_t input_buffer_len = 0;
	size_t write_region_size = getpagesize();
	unsigned char *write_region = mmap(NULL,
	write_region_size,
	PROT_READ \| PROT_WRITE,
	MAP_SHARED \| MAP_ANONYMOUS,
	-1,
	0);
	ssize_t chars_read;
	ssize_t bytes_written = -1;

	if (write_region == NULL) {
	error_at_line(EXIT_FAILURE, errno,
	__FILE__, __LINE__,
	"Couldn't allocate exec page\n");
	}

	printf("Input bytes to execute (b8 ff 00 00 00 c3, for instance) ");

	if ((chars_read = getline(&input_buffer,
	&input_buffer_len,
	stdin)) == -1) {
	error_at_line(EXIT_FAILURE, errno,
	__FILE__, __LINE__,
	"Error reading from stdin.");
	}

	while ((bytes_written = convert_hex_to_bytes(input_buffer,
	chars_read,
	write_region,
	write_region_size)) == -1) {
	printf("Invalid input (allowed formats: 0x0A AA aF 01). Try again: ");
	chars_read = getline(&input_buffer, &input_buffer_len, stdin);
	}

	mprotect(write_region, getpagesize(), PROT_READ \| PROT_EXEC);

	{
	int (jmp_func)(void) = (void ) write_region;

	printf("Returned value: %x\n", jmp_func());
	}

	return 0;
	}