wch/as_json_numeric.R

## as_json_numeric.c
#include <R.h>
#include <Rdefines.h>

// Given a string:
// - Find the offset of the string without any trailing 0's that come after a
//   decimal point.
// - If the string is "-0", replace it with "0".
// - Place a null terminator at the end of the trimmed string.
// - Return the new length of the string.
int strip_trailing_zeros(char* str, int len) {
  int len_tmp = len; // Tentative end of string, while trying to remove trailing 0's
  char tmp;

  while(1) {
    if (len_tmp == 0) {
      // We've backed all the way to the beginning and there was no decimal.
      break;
    }

    tmp = str[len_tmp-1];
    if (tmp == '0') {
      len_tmp--;
    } else if (tmp == '.') {
      len = len_tmp - 1;
      break;
    } else {
      len = len_tmp;
      break;
    }
  }

  // If the output was "-0", replace it with "0"
  if (len == 2 && str[0] == '-' && str[1] == '0') {
    str[0] = '0';
    len = 1;
  }

  str[len] = '\0';
  return len;
}

// Given a numeric vector, return a string that's formatted as a JSON array
// with all the values.
SEXP C_as_json_numeric_collapsed(SEXP x, SEXP digits, SEXP round, SEXP na) {
  if (!isReal(x) && !isInteger(x))
    error("x must be a numeric or integer vector.");
  if (!isInteger(digits) && !isReal(digits))
    error("digits must be a number.");
  if (!isString(round))
    error("round must be a string.");
  if (!isString(na))
    error("na must be a string.");

  int len = length(x);
  if (len == 0)
    return mkString("[]");

  // Do some things different for doubles vs int
  double* x_double;
  int* x_int;
  Rboolean is_int;
  if (isReal(x)) {
    x_double = REAL(x);
    is_int = FALSE;
  } else if (isInteger(x)) {
    x_int = INTEGER(x);
    is_int = TRUE;
  }

  // Set up the format string for snprintf
  char* format_specifier;
  if (strcmp(CHAR(asChar(round)), "decimal") == 0)
    format_specifier = "f";
  else if (strcmp(CHAR(asChar(round)), "signif") == 0)
    format_specifier = "g";
  else
    error("round must be either 'decimal' or 'signif'.");

  char format_str[20];
  snprintf(format_str, 20, "%%.%d%s", asInteger(digits), format_specifier);

  // How to handle NA's
  Rboolean na_null;
  if (strcmp(CHAR(asChar(na)), "string") == 0)
    na_null = FALSE;
  else if (strcmp(CHAR(asChar(na)), "null") == 0)
    na_null = TRUE;
  else
    error("na must be either 'string' or 'null'.");

  // Allocate a buffer for output
  int outlen = len * (asInteger(digits) + 4) + 100;
  char* out = (char*)malloc(outlen);

  int n = 0;    // Number of bytes used so far
  out[n] = '[';
  n++;

  const int max_len = 40; // Maximum length of string generated by snprintf
  int inc;      // How many characters were added in last iteration
  double num;   // Current numeric value

  for (int i = 0; i < len; i++) {
    // Grow if necessary - make sure there's enough space for the maximum
    // number string length, plus "]\0".
    if (n + max_len + 2 > outlen) {
      outlen = outlen * 1.5;
      out = (char*) realloc(out, outlen);
    }

    if (is_int) {
      // Need to explicitly set NA because (double)NA_INTEGER != NA_REAL
      if (x_int[i] == NA_INTEGER)
        num = NA_REAL;
      else
        num = (double) x_int[i];
    } else {
      num = x_double[i];
    }

    if (ISNA(num)) {
      if (na_null) {
        strcpy(out + n, "null");
        n += 4;
      } else {
        strcpy(out + n, "\"NA\"");
        n += 4;
      }
    } else if (ISNAN(num)) {
      if (na_null) {
        strcpy(out + n, "null");
        n += 4;
      } else {
        strcpy(out + n, "\"NaN\"");
        n += 5;
      }
    } else if (!R_FINITE(num)) {
      if (na_null) {
        strcpy(out + n, "null");
        n += 4;
      } else {
        if (num > 0) {
          strcpy(out + n, "\"Inf\"");
          n += 5;
        } else {
          strcpy(out + n, "\"-Inf\"");
          n += 6;
        }
      }

    } else {
      // Put the number string directly into the out buffer
      inc = snprintf((char*)(out + n), max_len, format_str, num);

      // Remove trailing 0's, if they're after a decimal point.
      inc = strip_trailing_zeros((char*)(out + n), inc);
      n += inc;
    }

    out[n] = ',';
    n++;
  }

  out[n-1] = ']';
  out[n] = '\0';

  SEXP outstring = PROTECT(mkString(out));
  free(out);
  UNPROTECT(1);
  return outstring;
}

## as_json_numeric.R
#' @export
#' @useDynLib rspeed C_as_json_numeric_collapsed
as_json_numeric_collapsed <- function(x, digits = 5,
                        round = c("signif", "decimal"),
                        na = c("string", "null")) {

  round <- match.arg(round)
  na <- match.arg(na)

  .Call(C_as_json_numeric_collapsed, x, digits, round, na)
}

## results.R
library(rspeed)
x <- rnorm(1e6)
system.time(r1 <- jsonlite:::asJSON(x, digits=3))
#>    user  system elapsed
#>   0.906   0.014   0.935
system.time(r2 <- as_json_numeric_collapsed(x, 3))
#>    user  system elapsed
#>   0.256   0.002   0.258

identical(r1, r2)
#> [1] FALSE

# Misc. formatting options
x <- c(-2934273e6, 8e-8, 0, 0.001, -0.001, 0.0001, -0.0001, NA, NaN, Inf, -Inf)
jsonlite:::asJSON(x, digits=3)
#> [1] "[-2.934273e+12,0,0,0.001,-0.001,0,0,\"NA\",\"NaN\",\"Inf\",\"-Inf\"]"
as_json_numeric_collapsed(x, 3, round = "decimal")
#> [1] "[-2934273000000,0,0,0.001,-0.001,0,0,\"NA\",\"NaN\",\"Inf\",\"-Inf\"]"
as_json_numeric_collapsed(x, 3, round = "signif")
#> [1] "[-2.93e+12,8e-08,0,0.001,-0.001,0.0001,-0.0001,\"NA\",\"NaN\",\"Inf\",\"-Inf\"]"
as_json_numeric_collapsed(x, 4)
#> [1] "[-2.934e+12,8e-08,0,0.001,-0.001,0.0001,-0.0001,\"NA\",\"NaN\",\"Inf\",\"-Inf\"]"
as_json_numeric_collapsed(x, 4, na = "null")
#> [1] "[-2.934e+12,8e-08,0,0.001,-0.001,0.0001,-0.0001,null,null,null,null]"

# Integer vectors
as_json_numeric_collapsed(c(-2L, 0L, 1L, NA_integer_))
#> [1] "[-2,0,1,\"NA\"]"
as_json_numeric_collapsed(c(-2L, 0L, 1L, NA_integer_), na = "null")
#> [1] "[-2,0,1,null]"

# Empty vectors
as_json_numeric_collapsed(numeric(0))
#> [1] "[]"
as_json_numeric_collapsed(integer(0))
#> [1] "[]"
	#include <R.h>
	#include <Rdefines.h>

	// Given a string:
	// - Find the offset of the string without any trailing 0's that come after a
	// decimal point.
	// - If the string is "-0", replace it with "0".
	// - Place a null terminator at the end of the trimmed string.
	// - Return the new length of the string.
	int strip_trailing_zeros(char* str, int len) {
	int len_tmp = len; // Tentative end of string, while trying to remove trailing 0's
	char tmp;

	while(1) {
	if (len_tmp == 0) {
	// We've backed all the way to the beginning and there was no decimal.
	break;
	}

	tmp = str[len_tmp-1];
	if (tmp == '0') {
	len_tmp--;
	} else if (tmp == '.') {
	len = len_tmp - 1;
	break;
	} else {
	len = len_tmp;
	break;
	}
	}

	// If the output was "-0", replace it with "0"
	if (len == 2 && str[0] == '-' && str[1] == '0') {
	str[0] = '0';
	len = 1;
	}

	str[len] = '\0';
	return len;
	}

	// Given a numeric vector, return a string that's formatted as a JSON array
	// with all the values.
	SEXP C_as_json_numeric_collapsed(SEXP x, SEXP digits, SEXP round, SEXP na) {
	if (!isReal(x) && !isInteger(x))
	error("x must be a numeric or integer vector.");
	if (!isInteger(digits) && !isReal(digits))
	error("digits must be a number.");
	if (!isString(round))
	error("round must be a string.");
	if (!isString(na))
	error("na must be a string.");

	int len = length(x);
	if (len == 0)
	return mkString("[]");

	// Do some things different for doubles vs int
	double* x_double;
	int* x_int;
	Rboolean is_int;
	if (isReal(x)) {
	x_double = REAL(x);
	is_int = FALSE;
	} else if (isInteger(x)) {
	x_int = INTEGER(x);
	is_int = TRUE;
	}

	// Set up the format string for snprintf
	char* format_specifier;
	if (strcmp(CHAR(asChar(round)), "decimal") == 0)
	format_specifier = "f";
	else if (strcmp(CHAR(asChar(round)), "signif") == 0)
	format_specifier = "g";
	else
	error("round must be either 'decimal' or 'signif'.");

	char format_str[20];
	snprintf(format_str, 20, "%%.%d%s", asInteger(digits), format_specifier);

	// How to handle NA's
	Rboolean na_null;
	if (strcmp(CHAR(asChar(na)), "string") == 0)
	na_null = FALSE;
	else if (strcmp(CHAR(asChar(na)), "null") == 0)
	na_null = TRUE;
	else
	error("na must be either 'string' or 'null'.");

	// Allocate a buffer for output
	int outlen = len * (asInteger(digits) + 4) + 100;
	char* out = (char*)malloc(outlen);

	int n = 0; // Number of bytes used so far
	out[n] = '[';
	n++;

	const int max_len = 40; // Maximum length of string generated by snprintf
	int inc; // How many characters were added in last iteration
	double num; // Current numeric value

	for (int i = 0; i < len; i++) {
	// Grow if necessary - make sure there's enough space for the maximum
	// number string length, plus "]\0".
	if (n + max_len + 2 > outlen) {
	outlen = outlen * 1.5;
	out = (char*) realloc(out, outlen);
	}

	if (is_int) {
	// Need to explicitly set NA because (double)NA_INTEGER != NA_REAL
	if (x_int[i] == NA_INTEGER)
	num = NA_REAL;
	else
	num = (double) x_int[i];
	} else {
	num = x_double[i];
	}

	if (ISNA(num)) {
	if (na_null) {
	strcpy(out + n, "null");
	n += 4;
	} else {
	strcpy(out + n, "\"NA\"");
	n += 4;
	}
	} else if (ISNAN(num)) {
	if (na_null) {
	strcpy(out + n, "null");
	n += 4;
	} else {
	strcpy(out + n, "\"NaN\"");
	n += 5;
	}
	} else if (!R_FINITE(num)) {
	if (na_null) {
	strcpy(out + n, "null");
	n += 4;
	} else {
	if (num > 0) {
	strcpy(out + n, "\"Inf\"");
	n += 5;
	} else {
	strcpy(out + n, "\"-Inf\"");
	n += 6;
	}
	}

	} else {
	// Put the number string directly into the out buffer
	inc = snprintf((char*)(out + n), max_len, format_str, num);

	// Remove trailing 0's, if they're after a decimal point.
	inc = strip_trailing_zeros((char*)(out + n), inc);
	n += inc;
	}

	out[n] = ',';
	n++;
	}

	out[n-1] = ']';
	out[n] = '\0';

	SEXP outstring = PROTECT(mkString(out));
	free(out);
	UNPROTECT(1);
	return outstring;
	}
	#' @export
	#' @useDynLib rspeed C_as_json_numeric_collapsed
	as_json_numeric_collapsed <- function(x, digits = 5,
	round = c("signif", "decimal"),
	na = c("string", "null")) {

	round <- match.arg(round)
	na <- match.arg(na)

	.Call(C_as_json_numeric_collapsed, x, digits, round, na)
	}
	library(rspeed)
	x <- rnorm(1e6)
	system.time(r1 <- jsonlite:::asJSON(x, digits=3))
	#> user system elapsed
	#> 0.906 0.014 0.935
	system.time(r2 <- as_json_numeric_collapsed(x, 3))
	#> user system elapsed
	#> 0.256 0.002 0.258

	identical(r1, r2)
	#> [1] FALSE

	# Misc. formatting options
	x <- c(-2934273e6, 8e-8, 0, 0.001, -0.001, 0.0001, -0.0001, NA, NaN, Inf, -Inf)
	jsonlite:::asJSON(x, digits=3)
	#> [1] "[-2.934273e+12,0,0,0.001,-0.001,0,0,\"NA\",\"NaN\",\"Inf\",\"-Inf\"]"
	as_json_numeric_collapsed(x, 3, round = "decimal")
	#> [1] "[-2934273000000,0,0,0.001,-0.001,0,0,\"NA\",\"NaN\",\"Inf\",\"-Inf\"]"
	as_json_numeric_collapsed(x, 3, round = "signif")
	#> [1] "[-2.93e+12,8e-08,0,0.001,-0.001,0.0001,-0.0001,\"NA\",\"NaN\",\"Inf\",\"-Inf\"]"
	as_json_numeric_collapsed(x, 4)
	#> [1] "[-2.934e+12,8e-08,0,0.001,-0.001,0.0001,-0.0001,\"NA\",\"NaN\",\"Inf\",\"-Inf\"]"
	as_json_numeric_collapsed(x, 4, na = "null")
	#> [1] "[-2.934e+12,8e-08,0,0.001,-0.001,0.0001,-0.0001,null,null,null,null]"

	# Integer vectors
	as_json_numeric_collapsed(c(-2L, 0L, 1L, NA_integer_))
	#> [1] "[-2,0,1,\"NA\"]"
	as_json_numeric_collapsed(c(-2L, 0L, 1L, NA_integer_), na = "null")
	#> [1] "[-2,0,1,null]"

	# Empty vectors
	as_json_numeric_collapsed(numeric(0))
	#> [1] "[]"
	as_json_numeric_collapsed(integer(0))
	#> [1] "[]"