thass0/0-c-arrays.md

## 0-c-arrays.md

      
    Raw
  

              0-c-arrays.md
            
          
    The following programs demonstrate how VLAs can be used in some pretty cool ways in C.
They allow creating dynamic array types, with information about the size of the array being known only at runtime.

The first example shows how this kind of information can be attached to VLAs which are completely heap-allocated, and never touch the stack.
In the second example, the information attached to the type of the array is used in combination with UBSan to check for out of bounds access to the array at runtime!
Lastly, the third example demonstrates why we have to use this weird special syntax to correctly dereference and access pointers to VLAs.

Examples two and three make use of the fact that classic, fixed-size C arrays are interoperable with VLAs.
That is, given a fixed-size array, we're able to pass it and its its size to a function that expects a VLA.
Thanks to this answer on HN, and
this post on Stack Overflow for providing helpful
information on the topic of VLAs and array bounds-checking in C.

  
## 1-heap-vla.c
#include <stdlib.h>
#include <stdio.h>
#include <assert.h>

/* `init_arr` and `print_arr` both take pointers to
   VLAs of size `n` instead of passing the VLA by value.
   This avoids ever creating a stack-allocated VLA. */

void init_arr(unsigned n, int (*arr)[n]) {
  for (unsigned i = 0; i < n; i++) {
    (*arr)[i] = i + 1;
  }
}

void print_arr(unsigned n, int (*arr)[n]) {
  for (unsigned i = 0; i < n; i++) {
    if (i == 0) {
      printf("%d", (*arr)[i]);
    } else {
      printf(", %d", (*arr)[i]);
    }
  }
  printf("\n");
}

int main(void) {
  {
    /* (1) Storing the array size next to the VLA. */

    /* `arr` is a pointer to a heap-allocated
       array consisting of `n` elements. The
       size of this array is encoded in its type.
       This is why `sizeof(*arr)` is equal to
       `sizeof(int) * n`. */

    unsigned n = 10;
    int (*arr)[n] = malloc(sizeof(*arr));

    assert(sizeof(*arr) == sizeof(int) * n);

    init_arr(n, arr);
    print_arr(n , arr);

    free(arr);
  }

  {
    /* (2) Storing the array size in the type (local scope only). */

    /* Defines the type `Arr`. This type stores
       the information that its size is `7` along
       with it, in a separate place, independent of
       the later values of `type_n`. */

    unsigned type_n = 7;
    typedef int Arr[type_n];

    Arr *arr = malloc(sizeof(*arr));

    /* Retrieve the length of the array from its type. */
    unsigned n = sizeof(*arr) / sizeof(**arr);
    init_arr(n, arr);
    print_arr(n, arr);

    free(arr);
  }

  return 0;
}

## 2-bounds-check.c
/* Compile with `-fsanitize=undefined` and `-fno-sanitize-recover`
   to catch the out-of-bounds access and crash when it happens. */

#include <stdlib.h>
#include <stdio.h>

void print_safe(unsigned n, int (*arr)[n], unsigned idx) {
  int x = (*arr)[idx];		/* Out-of-bounds access caught by UBSan. */
  printf("%d\n", x);
}

void print_unsafe1(int (*arr)[], unsigned idx) {
  int x = (*arr)[idx];		/* Not caught by UBSan. */
  printf("%d\n", x);
}

int main(void) {
  int arr[5] = { 1, 2, 3, 4, 5 };
  print_unsafe1(&arr, 5);
  print_safe(5, &arr, 5);

  return 0;
}

## 3-array-deref.c
/* Example of how to dereference one-dimensional arrays in C.
   Compile with `-fsanitize=address` the make the buffer overflow in `print2` crash this program. */

#include <stdlib.h>
#include <stdio.h>

void print1(unsigned n, int (*arr)[n]) {
  /* 1. Dereference the pointer to the VLA of size `n`.
     2. Access the element with index `1` in this array. */
  int x = (*arr)[1];
  printf("%d\n", x);
}

void print2(unsigned n, int (*arr)[n]) {
  /* 1. By treating the pointer to the VLA as an array by itself,
        access the element with index `1` in this array.
     2. Dereference this second VLA in the assumed array of VLAs,
        thus retrieving its first element. */
  int x = *arr[1];
  printf("%d\n", x);
}

int main(void) {
  int arr[5] = { 1, 2, 3, 4, 5 };
  print1(5, &arr);		    /* Prints `2`. */
  /* print2(5, &arr); */	/* Stack buffer overflow! */

  int arr2[6] = { 1, 2, 3, 4, 5, 6 };
  print1(5, &arr2);  /* Prints `2` again. */
  print2(5, &arr2);  /* Prints `6` because `print2` accesses the first
                        element in the second array of fife-element arrays. */

  return 0;
}
	#include <stdlib.h>
	#include <stdio.h>
	#include <assert.h>

	/* `init_arr` and `print_arr` both take pointers to
	VLAs of size `n` instead of passing the VLA by value.
	This avoids ever creating a stack-allocated VLA. */

	void init_arr(unsigned n, int (*arr)[n]) {
	for (unsigned i = 0; i < n; i++) {
	(*arr)[i] = i + 1;
	}
	}

	void print_arr(unsigned n, int (*arr)[n]) {
	for (unsigned i = 0; i < n; i++) {
	if (i == 0) {
	printf("%d", (*arr)[i]);
	} else {
	printf(", %d", (*arr)[i]);
	}
	}
	printf("\n");
	}

	int main(void) {
	{
	/* (1) Storing the array size next to the VLA. */

	/* `arr` is a pointer to a heap-allocated
	array consisting of `n` elements. The
	size of this array is encoded in its type.
	This is why `sizeof(*arr)` is equal to
	`sizeof(int) * n`. */

	unsigned n = 10;
	int (arr)[n] = malloc(sizeof(arr));

	assert(sizeof(arr) == sizeof(int) n);

	init_arr(n, arr);
	print_arr(n , arr);

	free(arr);
	}

	{
	/* (2) Storing the array size in the type (local scope only). */

	/* Defines the type `Arr`. This type stores
	the information that its size is `7` along
	with it, in a separate place, independent of
	the later values of `type_n`. */

	unsigned type_n = 7;
	typedef int Arr[type_n];

	Arr arr = malloc(sizeof(arr));

	/* Retrieve the length of the array from its type. */
	unsigned n = sizeof(arr) / sizeof(*arr);
	init_arr(n, arr);
	print_arr(n, arr);

	free(arr);
	}

	return 0;
	}
	/* Compile with `-fsanitize=undefined` and `-fno-sanitize-recover`
	to catch the out-of-bounds access and crash when it happens. */

	#include <stdlib.h>
	#include <stdio.h>

	void print_safe(unsigned n, int (*arr)[n], unsigned idx) {
	int x = (arr)[idx]; / Out-of-bounds access caught by UBSan. */
	printf("%d\n", x);
	}

	void print_unsafe1(int (*arr)[], unsigned idx) {
	int x = (arr)[idx]; / Not caught by UBSan. */
	printf("%d\n", x);
	}

	int main(void) {
	int arr[5] = { 1, 2, 3, 4, 5 };
	print_unsafe1(&arr, 5);
	print_safe(5, &arr, 5);

	return 0;
	}
	/* Example of how to dereference one-dimensional arrays in C.
	Compile with `-fsanitize=address` the make the buffer overflow in `print2` crash this program. */

	#include <stdlib.h>
	#include <stdio.h>

	void print1(unsigned n, int (*arr)[n]) {
	/* 1. Dereference the pointer to the VLA of size `n`.
	2. Access the element with index `1` in this array. */
	int x = (*arr)[1];
	printf("%d\n", x);
	}

	void print2(unsigned n, int (*arr)[n]) {
	/* 1. By treating the pointer to the VLA as an array by itself,
	access the element with index `1` in this array.
	2. Dereference this second VLA in the assumed array of VLAs,
	thus retrieving its first element. */
	int x = *arr[1];
	printf("%d\n", x);
	}

	int main(void) {
	int arr[5] = { 1, 2, 3, 4, 5 };
	print1(5, &arr); /* Prints `2`. */
	/* print2(5, &arr); / / Stack buffer overflow! */

	int arr2[6] = { 1, 2, 3, 4, 5, 6 };
	print1(5, &arr2); /* Prints `2` again. */
	print2(5, &arr2); /* Prints `6` because `print2` accesses the first
	element in the second array of fife-element arrays. */

	return 0;
	}