Last active
May 17, 2020 23:24
-
-
Save knapply/0cfda08e85ba3fa4f7e61071f83d4768 to your computer and use it in GitHub Desktop.
simdjson via Rcpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// SIMDJSON_VERSION == 0.3.1 | |
#include <Rcpp.h> | |
#include <simdjson/simdjson.h> | |
#include <simdjson/simdjson.cpp> | |
namespace Rcpp { | |
template <> | |
inline SEXP wrap<int64_t>(const int64_t& obj) { | |
auto out = Rcpp::NumericVector(1); | |
std::memcpy(&(out[0]), &(obj), sizeof(double)); | |
out.attr("class") = "integer64"; | |
return out; | |
} | |
} // namespace Rcpp | |
namespace simdjsonr { | |
template <typename int_T> | |
inline constexpr bool is_really_int64_t(int_T); | |
template <> | |
inline constexpr bool is_really_int64_t<uint64_t>(uint64_t x) { | |
return x > INT_MAX - 1; | |
} | |
template <> | |
inline constexpr bool is_really_int64_t<int64_t>(int64_t x) { | |
return x > INT_MAX - 1 || x < INT_MIN + 1; | |
} | |
template <typename int_T, bool bit64_integer64, bool int_64_strings> | |
inline constexpr SEXP resolve_int(int_T x) { | |
return is_really_int64_t<int_T>(x) | |
? (bit64_integer64 ? Rcpp::wrap<int64_t>(x) | |
: int_64_strings ? Rcpp::wrap(std::to_string(x)) : Rcpp::wrap<double>(x)) | |
: Rcpp::wrap<int>(x); | |
} | |
template <typename F> | |
inline SEXP build_object(dom::object&& object, F f) { | |
const R_xlen_t n = std::size(object); | |
Rcpp::List out(n); | |
Rcpp::CharacterVector out_names(n); | |
R_xlen_t i = 0; | |
for (auto [key, val] : object) { | |
out[i] = f(val); | |
out_names[i] = std::string(key); | |
i++; | |
} | |
out.attr("names") = out_names; | |
return out; | |
} | |
template <typename F> | |
inline auto build_array(dom::array&& object, F f) { | |
Rcpp::List out; | |
for (dom::element child : object) { | |
out.push_back(f(child)); | |
} | |
return out; | |
} | |
template <bool bit64_integer64, bool int_64_strings> | |
SEXP dump_json(dom::element element) { | |
switch (element.type()) { | |
case dom::element_type::ARRAY: | |
return build_array(element, dump_json<bit64_integer64, int_64_strings>); | |
case dom::element_type::OBJECT: | |
return build_object(element, dump_json<bit64_integer64, int_64_strings>); | |
case dom::element_type::INT64: | |
return resolve_int<int64_t, bit64_integer64, int_64_strings>(element); | |
case dom::element_type::UINT64: | |
return resolve_int<uint64_t, bit64_integer64, int_64_strings>(element); | |
case dom::element_type::DOUBLE: | |
return Rcpp::wrap<double>(element); | |
case dom::element_type::STRING: | |
return Rcpp::wrap(std::string(element)); | |
case dom::element_type::BOOL: | |
return Rcpp::wrap<bool>(element); | |
case dom::element_type::NULL_VALUE: | |
[[fallthrough]]; | |
default: | |
return R_NilValue; | |
} | |
} | |
template <bool use_json_pointer> | |
inline constexpr simdjson::dom::element stage_element(simdjson::dom::element element, | |
const std::string_view& json_pointer) { | |
return use_json_pointer ? element.at(json_pointer) : element; | |
} | |
template <bool warning> | |
inline constexpr void throw_bad_parse(const char* msg) { | |
warning ? Rcpp::warning(msg) : Rcpp::stop(msg); | |
} | |
template <bool warning, bool use_json_pointer, bool bit64_integer64, bool int_64_strings> | |
SEXP parse_json(const Rcpp::CharacterVector& json, const std::string_view& json_pointer) { | |
const R_xlen_t n = std::size(json); | |
Rcpp::List out(n); | |
simdjson::dom::parser parser; | |
for (R_xlen_t i = 0; i < n; ++i) { | |
auto [res, error] = parser.parse(std::string_view(json[i])); | |
if (error) { | |
throw_bad_parse<warning>("parse error"); | |
continue; | |
} | |
out[i] = dump_json<bit64_integer64, int_64_strings>(stage_element<use_json_pointer>(res, json_pointer)); | |
} | |
return out; | |
} | |
inline constexpr auto parse_int64_as_integer64_stop = parse_json<false, false, true, false>; | |
inline constexpr auto parse_int64_as_string_stop = parse_json<false, false, false, true>; | |
inline constexpr auto parse_int64_as_double_stop = parse_json<false, false, false, false>; | |
inline constexpr auto parse_pointer_int64_as_integer64_stop = parse_json<false, true, true, false>; | |
inline constexpr auto parse_pointer_int64_as_string_stop = parse_json<false, true, false, true>; | |
inline constexpr auto parse_pointer_int64_as_double_stop = parse_json<false, true, false, false>; | |
inline constexpr auto parse_int64_as_integer64_warning = parse_json<true, false, true, false>; | |
inline constexpr auto parse_int64_as_string_warning = parse_json<true, false, false, true>; | |
inline constexpr auto parse_int64_as_double_warning = parse_json<true, false, false, false>; | |
inline constexpr auto parse_pointer_int64_as_integer64_warning = parse_json<true, true, true, false>; | |
inline constexpr auto parse_pointer_int64_as_string_warning = parse_json<true, true, false, true>; | |
inline constexpr auto parse_pointer_int64_as_double_warning = parse_json<true, true, false, false>; | |
} // namespace simdjsonr | |
// | |
// | |
// [[Rcpp::export(.parse_json_impl)]] | |
SEXP parse_json_impl(const Rcpp::CharacterVector& json, | |
const std::string& json_pointer, | |
const bool bit64_integer64, | |
const bool int_64_strings, | |
const bool error_on_bad_parse) { | |
using namespace simdjsonr; | |
const auto use_pointer = !json_pointer.empty(); | |
if (error_on_bad_parse) { | |
if (bit64_integer64) { | |
return use_pointer ? parse_pointer_int64_as_integer64_stop(json, json_pointer) | |
: parse_int64_as_integer64_stop(json, json_pointer); | |
} | |
if (int_64_strings) { | |
return use_pointer ? parse_pointer_int64_as_string_stop(json, json_pointer) | |
: parse_int64_as_string_stop(json, json_pointer); | |
} else { | |
return use_pointer ? parse_pointer_int64_as_double_stop(json, json_pointer) | |
: parse_int64_as_double_stop(json, json_pointer); | |
} | |
} else { | |
if (bit64_integer64) { | |
return use_pointer ? parse_pointer_int64_as_integer64_warning(json, json_pointer) | |
: parse_int64_as_integer64_warning(json, json_pointer); | |
} | |
if (int_64_strings) { | |
return use_pointer ? parse_pointer_int64_as_string_warning(json, json_pointer) | |
: parse_int64_as_string_warning(json, json_pointer); | |
} else { | |
return use_pointer ? parse_pointer_int64_as_double_warning(json, json_pointer) | |
: parse_int64_as_double_warning(json, json_pointer); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment