Skip to content

Instantly share code, notes, and snippets.

@knapply
Last active May 17, 2020 23:24
Show Gist options
  • Save knapply/0cfda08e85ba3fa4f7e61071f83d4768 to your computer and use it in GitHub Desktop.
Save knapply/0cfda08e85ba3fa4f7e61071f83d4768 to your computer and use it in GitHub Desktop.
simdjson via Rcpp
// SIMDJSON_VERSION == 0.3.1
#include <Rcpp.h>
#include <simdjson/simdjson.h>
#include <simdjson/simdjson.cpp>
namespace Rcpp {
template <>
inline SEXP wrap<int64_t>(const int64_t& obj) {
auto out = Rcpp::NumericVector(1);
std::memcpy(&(out[0]), &(obj), sizeof(double));
out.attr("class") = "integer64";
return out;
}
} // namespace Rcpp
namespace simdjsonr {
template <typename int_T>
inline constexpr bool is_really_int64_t(int_T);
template <>
inline constexpr bool is_really_int64_t<uint64_t>(uint64_t x) {
return x > INT_MAX - 1;
}
template <>
inline constexpr bool is_really_int64_t<int64_t>(int64_t x) {
return x > INT_MAX - 1 || x < INT_MIN + 1;
}
template <typename int_T, bool bit64_integer64, bool int_64_strings>
inline constexpr SEXP resolve_int(int_T x) {
return is_really_int64_t<int_T>(x)
? (bit64_integer64 ? Rcpp::wrap<int64_t>(x)
: int_64_strings ? Rcpp::wrap(std::to_string(x)) : Rcpp::wrap<double>(x))
: Rcpp::wrap<int>(x);
}
template <typename F>
inline SEXP build_object(dom::object&& object, F f) {
const R_xlen_t n = std::size(object);
Rcpp::List out(n);
Rcpp::CharacterVector out_names(n);
R_xlen_t i = 0;
for (auto [key, val] : object) {
out[i] = f(val);
out_names[i] = std::string(key);
i++;
}
out.attr("names") = out_names;
return out;
}
template <typename F>
inline auto build_array(dom::array&& object, F f) {
Rcpp::List out;
for (dom::element child : object) {
out.push_back(f(child));
}
return out;
}
template <bool bit64_integer64, bool int_64_strings>
SEXP dump_json(dom::element element) {
switch (element.type()) {
case dom::element_type::ARRAY:
return build_array(element, dump_json<bit64_integer64, int_64_strings>);
case dom::element_type::OBJECT:
return build_object(element, dump_json<bit64_integer64, int_64_strings>);
case dom::element_type::INT64:
return resolve_int<int64_t, bit64_integer64, int_64_strings>(element);
case dom::element_type::UINT64:
return resolve_int<uint64_t, bit64_integer64, int_64_strings>(element);
case dom::element_type::DOUBLE:
return Rcpp::wrap<double>(element);
case dom::element_type::STRING:
return Rcpp::wrap(std::string(element));
case dom::element_type::BOOL:
return Rcpp::wrap<bool>(element);
case dom::element_type::NULL_VALUE:
[[fallthrough]];
default:
return R_NilValue;
}
}
template <bool use_json_pointer>
inline constexpr simdjson::dom::element stage_element(simdjson::dom::element element,
const std::string_view& json_pointer) {
return use_json_pointer ? element.at(json_pointer) : element;
}
template <bool warning>
inline constexpr void throw_bad_parse(const char* msg) {
warning ? Rcpp::warning(msg) : Rcpp::stop(msg);
}
template <bool warning, bool use_json_pointer, bool bit64_integer64, bool int_64_strings>
SEXP parse_json(const Rcpp::CharacterVector& json, const std::string_view& json_pointer) {
const R_xlen_t n = std::size(json);
Rcpp::List out(n);
simdjson::dom::parser parser;
for (R_xlen_t i = 0; i < n; ++i) {
auto [res, error] = parser.parse(std::string_view(json[i]));
if (error) {
throw_bad_parse<warning>("parse error");
continue;
}
out[i] = dump_json<bit64_integer64, int_64_strings>(stage_element<use_json_pointer>(res, json_pointer));
}
return out;
}
inline constexpr auto parse_int64_as_integer64_stop = parse_json<false, false, true, false>;
inline constexpr auto parse_int64_as_string_stop = parse_json<false, false, false, true>;
inline constexpr auto parse_int64_as_double_stop = parse_json<false, false, false, false>;
inline constexpr auto parse_pointer_int64_as_integer64_stop = parse_json<false, true, true, false>;
inline constexpr auto parse_pointer_int64_as_string_stop = parse_json<false, true, false, true>;
inline constexpr auto parse_pointer_int64_as_double_stop = parse_json<false, true, false, false>;
inline constexpr auto parse_int64_as_integer64_warning = parse_json<true, false, true, false>;
inline constexpr auto parse_int64_as_string_warning = parse_json<true, false, false, true>;
inline constexpr auto parse_int64_as_double_warning = parse_json<true, false, false, false>;
inline constexpr auto parse_pointer_int64_as_integer64_warning = parse_json<true, true, true, false>;
inline constexpr auto parse_pointer_int64_as_string_warning = parse_json<true, true, false, true>;
inline constexpr auto parse_pointer_int64_as_double_warning = parse_json<true, true, false, false>;
} // namespace simdjsonr
//
//
// [[Rcpp::export(.parse_json_impl)]]
SEXP parse_json_impl(const Rcpp::CharacterVector& json,
const std::string& json_pointer,
const bool bit64_integer64,
const bool int_64_strings,
const bool error_on_bad_parse) {
using namespace simdjsonr;
const auto use_pointer = !json_pointer.empty();
if (error_on_bad_parse) {
if (bit64_integer64) {
return use_pointer ? parse_pointer_int64_as_integer64_stop(json, json_pointer)
: parse_int64_as_integer64_stop(json, json_pointer);
}
if (int_64_strings) {
return use_pointer ? parse_pointer_int64_as_string_stop(json, json_pointer)
: parse_int64_as_string_stop(json, json_pointer);
} else {
return use_pointer ? parse_pointer_int64_as_double_stop(json, json_pointer)
: parse_int64_as_double_stop(json, json_pointer);
}
} else {
if (bit64_integer64) {
return use_pointer ? parse_pointer_int64_as_integer64_warning(json, json_pointer)
: parse_int64_as_integer64_warning(json, json_pointer);
}
if (int_64_strings) {
return use_pointer ? parse_pointer_int64_as_string_warning(json, json_pointer)
: parse_int64_as_string_warning(json, json_pointer);
} else {
return use_pointer ? parse_pointer_int64_as_double_warning(json, json_pointer)
: parse_int64_as_double_warning(json, json_pointer);
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment