Skip to content

Instantly share code, notes, and snippets.

@wch
Last active August 29, 2015 14:06
Show Gist options
  • Save wch/562de64335cf986322d4 to your computer and use it in GitHub Desktop.
Save wch/562de64335cf986322d4 to your computer and use it in GitHub Desktop.
asJSON numeric collapsed
#include <R.h>
#include <Rdefines.h>
// Given a string:
// - Find the offset of the string without any trailing 0's that come after a
// decimal point.
// - If the string is "-0", replace it with "0".
// - Place a null terminator at the end of the trimmed string.
// - Return the new length of the string.
int strip_trailing_zeros(char* str, int len) {
int len_tmp = len; // Tentative end of string, while trying to remove trailing 0's
char tmp;
while(1) {
if (len_tmp == 0) {
// We've backed all the way to the beginning and there was no decimal.
break;
}
tmp = str[len_tmp-1];
if (tmp == '0') {
len_tmp--;
} else if (tmp == '.') {
len = len_tmp - 1;
break;
} else {
len = len_tmp;
break;
}
}
// If the output was "-0", replace it with "0"
if (len == 2 && str[0] == '-' && str[1] == '0') {
str[0] = '0';
len = 1;
}
str[len] = '\0';
return len;
}
// Given a numeric vector, return a string that's formatted as a JSON array
// with all the values.
SEXP C_as_json_numeric_collapsed(SEXP x, SEXP digits, SEXP round, SEXP na) {
if (!isReal(x) && !isInteger(x))
error("x must be a numeric or integer vector.");
if (!isInteger(digits) && !isReal(digits))
error("digits must be a number.");
if (!isString(round))
error("round must be a string.");
if (!isString(na))
error("na must be a string.");
int len = length(x);
if (len == 0)
return mkString("[]");
// Do some things different for doubles vs int
double* x_double;
int* x_int;
Rboolean is_int;
if (isReal(x)) {
x_double = REAL(x);
is_int = FALSE;
} else if (isInteger(x)) {
x_int = INTEGER(x);
is_int = TRUE;
}
// Set up the format string for snprintf
char* format_specifier;
if (strcmp(CHAR(asChar(round)), "decimal") == 0)
format_specifier = "f";
else if (strcmp(CHAR(asChar(round)), "signif") == 0)
format_specifier = "g";
else
error("round must be either 'decimal' or 'signif'.");
char format_str[20];
snprintf(format_str, 20, "%%.%d%s", asInteger(digits), format_specifier);
// How to handle NA's
Rboolean na_null;
if (strcmp(CHAR(asChar(na)), "string") == 0)
na_null = FALSE;
else if (strcmp(CHAR(asChar(na)), "null") == 0)
na_null = TRUE;
else
error("na must be either 'string' or 'null'.");
// Allocate a buffer for output
int outlen = len * (asInteger(digits) + 4) + 100;
char* out = (char*)malloc(outlen);
int n = 0; // Number of bytes used so far
out[n] = '[';
n++;
const int max_len = 40; // Maximum length of string generated by snprintf
int inc; // How many characters were added in last iteration
double num; // Current numeric value
for (int i = 0; i < len; i++) {
// Grow if necessary - make sure there's enough space for the maximum
// number string length, plus "]\0".
if (n + max_len + 2 > outlen) {
outlen = outlen * 1.5;
out = (char*) realloc(out, outlen);
}
if (is_int) {
// Need to explicitly set NA because (double)NA_INTEGER != NA_REAL
if (x_int[i] == NA_INTEGER)
num = NA_REAL;
else
num = (double) x_int[i];
} else {
num = x_double[i];
}
if (ISNA(num)) {
if (na_null) {
strcpy(out + n, "null");
n += 4;
} else {
strcpy(out + n, "\"NA\"");
n += 4;
}
} else if (ISNAN(num)) {
if (na_null) {
strcpy(out + n, "null");
n += 4;
} else {
strcpy(out + n, "\"NaN\"");
n += 5;
}
} else if (!R_FINITE(num)) {
if (na_null) {
strcpy(out + n, "null");
n += 4;
} else {
if (num > 0) {
strcpy(out + n, "\"Inf\"");
n += 5;
} else {
strcpy(out + n, "\"-Inf\"");
n += 6;
}
}
} else {
// Put the number string directly into the out buffer
inc = snprintf((char*)(out + n), max_len, format_str, num);
// Remove trailing 0's, if they're after a decimal point.
inc = strip_trailing_zeros((char*)(out + n), inc);
n += inc;
}
out[n] = ',';
n++;
}
out[n-1] = ']';
out[n] = '\0';
SEXP outstring = PROTECT(mkString(out));
free(out);
UNPROTECT(1);
return outstring;
}
#' @export
#' @useDynLib rspeed C_as_json_numeric_collapsed
as_json_numeric_collapsed <- function(x, digits = 5,
round = c("signif", "decimal"),
na = c("string", "null")) {
round <- match.arg(round)
na <- match.arg(na)
.Call(C_as_json_numeric_collapsed, x, digits, round, na)
}
library(rspeed)
x <- rnorm(1e6)
system.time(r1 <- jsonlite:::asJSON(x, digits=3))
#> user system elapsed
#> 0.906 0.014 0.935
system.time(r2 <- as_json_numeric_collapsed(x, 3))
#> user system elapsed
#> 0.256 0.002 0.258
identical(r1, r2)
#> [1] FALSE
# Misc. formatting options
x <- c(-2934273e6, 8e-8, 0, 0.001, -0.001, 0.0001, -0.0001, NA, NaN, Inf, -Inf)
jsonlite:::asJSON(x, digits=3)
#> [1] "[-2.934273e+12,0,0,0.001,-0.001,0,0,\"NA\",\"NaN\",\"Inf\",\"-Inf\"]"
as_json_numeric_collapsed(x, 3, round = "decimal")
#> [1] "[-2934273000000,0,0,0.001,-0.001,0,0,\"NA\",\"NaN\",\"Inf\",\"-Inf\"]"
as_json_numeric_collapsed(x, 3, round = "signif")
#> [1] "[-2.93e+12,8e-08,0,0.001,-0.001,0.0001,-0.0001,\"NA\",\"NaN\",\"Inf\",\"-Inf\"]"
as_json_numeric_collapsed(x, 4)
#> [1] "[-2.934e+12,8e-08,0,0.001,-0.001,0.0001,-0.0001,\"NA\",\"NaN\",\"Inf\",\"-Inf\"]"
as_json_numeric_collapsed(x, 4, na = "null")
#> [1] "[-2.934e+12,8e-08,0,0.001,-0.001,0.0001,-0.0001,null,null,null,null]"
# Integer vectors
as_json_numeric_collapsed(c(-2L, 0L, 1L, NA_integer_))
#> [1] "[-2,0,1,\"NA\"]"
as_json_numeric_collapsed(c(-2L, 0L, 1L, NA_integer_), na = "null")
#> [1] "[-2,0,1,null]"
# Empty vectors
as_json_numeric_collapsed(numeric(0))
#> [1] "[]"
as_json_numeric_collapsed(integer(0))
#> [1] "[]"
@jeroen
Copy link

jeroen commented Sep 9, 2014

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment