Skip to content

Instantly share code, notes, and snippets.

@sxlijin
Last active April 25, 2024 06:32
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sxlijin/64dfc2d1d6c50537c6cf8f154ae43476 to your computer and use it in GitHub Desktop.
Save sxlijin/64dfc2d1d6c50537c6cf8f154ae43476 to your computer and use it in GitHub Desktop.
draft: ruby to json
use magnus::{
class, define_class,
encoding::{CType, RbEncoding},
exception::runtime_error,
function, method,
prelude::*,
scan_args::get_kwargs,
value::Value,
IntoValue, KwArgs, RArray, RHash, RObject, RString, Ruby,
};
use serde::{ser, Serialize};
/// Converts serde-compatible types to Ruby objects using instance variables.
pub struct RubyObjectSerializer<'rb, T> {
ruby: &'rb Ruby,
r: T,
}
#[derive(Debug)]
pub struct Error {}
impl ser::Error for Error {
fn custom<T>(msg: T) -> Self
where
T: std::fmt::Display,
{
todo!()
}
}
impl std::fmt::Display for Error {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "")
}
}
impl std::error::Error for Error {}
type Result<T> = std::result::Result<T, Error>;
// By convention, the public API of a Serde serializer is one or more `to_abc`
// functions such as `to_string`, `to_bytes`, or `to_writer` depending on what
// Rust types the serializer is able to produce as output.
//
// This basic serializer supports only `to_string`.
pub fn to_ruby<T>(value: &T) -> Result<Value>
where
T: Serialize,
{
let ruby = match Ruby::get() {
Ok(ruby) => ruby,
Err(e) => return Err(Error {}),
};
let mut serializer = RubyObjectSerializer {
ruby: &ruby,
r: ruby.qnil().into_value(),
};
value.serialize(&mut serializer)
}
impl<'rb, T> ser::Serializer for &'rb mut RubyObjectSerializer<'rb, T>
where
T: magnus::IntoValue,
{
// The output type produced by this `Serializer` during successful
// serialization. Most serializers that produce text or binary output should
// set `Ok = ()` and serialize into an `io::Write` or buffer contained
// within the `Serializer` instance, as happens here. Serializers that build
// in-memory data structures may be simplified by using `Ok` to propagate
// the data structure around.
type Ok = Value;
// The error type when some error occurs during serialization.
type Error = Error;
// Associated types for keeping track of additional state while serializing
// compound data structures like sequences and maps. In this case no
// additional state is required beyond what is already stored in the
// Serializer struct.
type SerializeSeq = &'rb mut RubyObjectSerializer<'rb, RArray>;
type SerializeTuple = &'rb mut RubyObjectSerializer<'rb, RArray>;
type SerializeTupleStruct = Self;
type SerializeTupleVariant = Self;
type SerializeMap = Self;
type SerializeStruct = Self;
type SerializeStructVariant = Self;
// Here we go with the simple methods. The following 12 methods receive one
// of the primitive types of the data model and map it to JSON by appending
// into the output string.
fn serialize_bool(self, v: bool) -> Result<Value> {
Ok(if v {
self.ruby.qtrue().into_value()
} else {
self.ruby.qfalse().into_value()
})
}
fn serialize_i8(self, v: i8) -> Result<Value> {
self.serialize_i64(v.into())
}
fn serialize_i16(self, v: i16) -> Result<Value> {
self.serialize_i64(v.into())
}
fn serialize_i32(self, v: i32) -> Result<Value> {
self.serialize_i64(v.into())
}
fn serialize_i64(self, v: i64) -> Result<Value> {
Ok(self.ruby.integer_from_i64(v.into()).into_value())
}
fn serialize_u8(self, v: u8) -> Result<Value> {
self.serialize_u64(v.into())
}
fn serialize_u16(self, v: u16) -> Result<Value> {
self.serialize_u64(v.into())
}
fn serialize_u32(self, v: u32) -> Result<Value> {
self.serialize_u64(v.into())
}
fn serialize_u64(self, v: u64) -> Result<Value> {
Ok(self.ruby.integer_from_u64(v.into()).into_value())
}
fn serialize_f32(self, v: f32) -> Result<Value> {
self.serialize_f64(v.into())
}
fn serialize_f64(self, v: f64) -> Result<Value> {
match self.ruby.r_float_from_f64(v) {
Ok(f) => Ok(f.into_value()),
Err(f) => Ok(f.into_value()),
}
}
// Serialize a char as a single-character string. Other formats may
// represent this differently.
fn serialize_char(self, v: char) -> Result<Value> {
Ok(self.ruby.str_from_char(v).into_value())
}
// This only works for strings that don't require escape sequences but you
// get the idea. For example it would emit invalid JSON if the input string
// contains a '"' character.
fn serialize_str(self, v: &str) -> Result<Value> {
Ok(self.ruby.str_new(v).into_value())
}
// Serialize a byte array as an array of bytes. Could also use a base64
// string here. Binary formats will typically represent byte arrays more
// compactly.
fn serialize_bytes(self, v: &[u8]) -> Result<Value> {
Ok(self.ruby.str_from_slice(v).into_value())
}
// An absent optional is represented as the JSON `null`.
fn serialize_none(self) -> Result<Value> {
Ok(self.ruby.qnil().into_value())
}
// A present optional is represented as just the contained value. Note that
// this is a lossy representation. For example the values `Some(())` and
// `None` both serialize as just `null`. Unfortunately this is typically
// what people expect when working with JSON. Other formats are encouraged
// to behave more intelligently if possible.
fn serialize_some<F>(self, value: &F) -> Result<Value>
where
F: ?Sized + Serialize,
{
value.serialize(self)
}
// In Serde, unit means an anonymous value containing no data. Map this to
// JSON as `null`.
fn serialize_unit(self) -> Result<Value> {
self.serialize_none()
}
// Unit struct means a named value containing no data. Again, since there is
// no data, map this to JSON as `null`. There is no need to serialize the
// name in most formats.
fn serialize_unit_struct(self, _name: &'static str) -> Result<Value> {
self.serialize_unit()
}
// When serializing a unit variant (or any other kind of variant), formats
// can choose whether to keep track of it by index or by name. Binary
// formats typically use the index of the variant and human-readable formats
// typically use the name.
fn serialize_unit_variant(
self,
_name: &'static str,
_variant_index: u32,
_variant: &'static str,
) -> Result<Value> {
self.serialize_unit()
}
// As is done here, serializers are encouraged to treat newtype structs as
// insignificant wrappers around the data they contain.
fn serialize_newtype_struct<NewType>(
self,
_name: &'static str,
value: &NewType,
) -> Result<Value>
where
NewType: ?Sized + Serialize,
{
value.serialize(self)
}
// Note that newtype variant (and all of the other variant serialization
// methods) refer exclusively to the "externally tagged" enum
// representation.
//
// Serialize this to JSON in externally tagged form as `{ NAME: VALUE }`.
fn serialize_newtype_variant<NewType>(
self,
_name: &'static str,
_variant_index: u32,
_variant: &'static str,
value: &NewType,
) -> Result<Value>
where
NewType: ?Sized + Serialize,
{
value.serialize(&mut *self)
}
// Now we get to the serialization of compound types.
//
// The start of the sequence, each value, and the end are three separate
// method calls. This one is responsible only for serializing the start,
// which in JSON is `[`.
//
// The length of the sequence may or may not be known ahead of time. This
// doesn't make a difference in JSON because the length is not represented
// explicitly in the serialized form. Some serializers may only be able to
// support sequences for which the length is known up front.
fn serialize_seq(self, len: Option<usize>) -> Result<Self::SerializeSeq> {
Ok(&mut RubyObjectSerializer {
ruby: &self.ruby,
r: self.ruby.ary_new_capa(len.unwrap_or(0)),
})
}
// Tuples look just like sequences in JSON. Some formats may be able to
// represent tuples more efficiently by omitting the length, since tuple
// means that the corresponding `Deserialize implementation will know the
// length without needing to look at the serialized data.
fn serialize_tuple(self, len: usize) -> Result<Self::SerializeTuple> {
Ok(&mut RubyObjectSerializer {
ruby: &self.ruby,
r: self.ruby.ary_new_capa(len),
})
}
// Tuple structs look just like sequences in JSON.
fn serialize_tuple_struct(
self,
_name: &'static str,
len: usize,
) -> Result<Self::SerializeTupleStruct> {
self.serialize_seq(Some(len));
todo!()
}
// Tuple variants are represented in JSON as `{ NAME: [DATA...] }`. Again
// this method is only responsible for the externally tagged representation.
fn serialize_tuple_variant(
self,
_name: &'static str,
_variant_index: u32,
variant: &'static str,
_len: usize,
) -> Result<Self::SerializeTupleVariant> {
variant.serialize(&mut *self)?;
todo!()
}
// Maps are represented in JSON as `{ K: V, K: V, ... }`.
fn serialize_map(self, _len: Option<usize>) -> Result<Self::SerializeMap> {
todo!()
}
// Structs look just like maps in JSON. In particular, JSON requires that we
// serialize the field names of the struct. Other formats may be able to
// omit the field names when serializing structs because the corresponding
// Deserialize implementation is required to know what the keys are without
// looking at the serialized data.
fn serialize_struct(self, _name: &'static str, len: usize) -> Result<Self::SerializeStruct> {
self.serialize_map(Some(len));
todo!()
}
// Struct variants are represented in JSON as `{ NAME: { K: V, ... } }`.
// This is the externally tagged representation.
fn serialize_struct_variant(
self,
_name: &'static str,
_variant_index: u32,
variant: &'static str,
_len: usize,
) -> Result<Self::SerializeStructVariant> {
variant.serialize(&mut *self)?;
todo!()
}
}
// The following 7 impls deal with the serialization of compound types like
// sequences and maps. Serialization of such types is begun by a Serializer
// method and followed by zero or more calls to serialize individual elements of
// the compound type and one call to end the compound type.
//
// This impl is SerializeSeq so these methods are called after `serialize_seq`
// is called on the Serializer.
impl<'rb> ser::SerializeSeq for &'rb mut RubyObjectSerializer<'rb, RArray> {
type Ok = Value;
type Error = Error;
// Serialize a single element of the sequence.
fn serialize_element<T>(&mut self, value: &T) -> Result<()>
where
T: ?Sized + Serialize,
{
value.serialize(&mut **self);
todo!()
}
// Close the sequence.
fn end(self) -> Result<Value> {
todo!()
}
}
// Same thing but for tuples.
impl<'rb> ser::SerializeTuple for &'rb mut RubyObjectSerializer<'rb, RArray> {
type Ok = Value;
type Error = Error;
fn serialize_element<T>(&mut self, value: &T) -> Result<()>
where
T: ?Sized + Serialize,
{
value.serialize(&mut **self);
todo!()
}
fn end(self) -> Result<Value> {
todo!()
}
}
// Same thing but for tuple structs.
impl<'rb, T> ser::SerializeTupleStruct for &'rb mut RubyObjectSerializer<'rb, T>
where
T: IntoValue,
{
type Ok = Value;
type Error = Error;
fn serialize_field<F>(&mut self, value: &F) -> Result<()>
where
F: ?Sized + Serialize,
{
todo!()
}
fn end(self) -> Result<Value> {
todo!()
}
}
// Tuple variants are a little different. Refer back to the
// `serialize_tuple_variant` method above:
//
// self.output += "{";
// variant.serialize(&mut *self)?;
// self.output += ":[";
//
// So the `end` method in this impl is responsible for closing both the `]` and
// the `}`.
impl<'rb, T> ser::SerializeTupleVariant for &'rb mut RubyObjectSerializer<'rb, T>
where
T: IntoValue,
{
type Ok = Value;
type Error = Error;
fn serialize_field<F>(&mut self, value: &F) -> Result<()>
where
F: ?Sized + Serialize,
{
todo!()
}
fn end(self) -> Result<Value> {
todo!()
}
}
// Some `Serialize` types are not able to hold a key and value in memory at the
// same time so `SerializeMap` implementations are required to support
// `serialize_key` and `serialize_value` individually.
//
// There is a third optional method on the `SerializeMap` trait. The
// `serialize_entry` method allows serializers to optimize for the case where
// key and value are both available simultaneously. In JSON it doesn't make a
// difference so the default behavior for `serialize_entry` is fine.
impl<'rb, T> ser::SerializeMap for &'rb mut RubyObjectSerializer<'rb, T>
where
T: IntoValue,
{
type Ok = Value;
type Error = Error;
// The Serde data model allows map keys to be any serializable type. JSON
// only allows string keys so the implementation below will produce invalid
// JSON if the key serializes as something other than a string.
//
// A real JSON serializer would need to validate that map keys are strings.
// This can be done by using a different Serializer to serialize the key
// (instead of `&mut **self`) and having that other serializer only
// implement `serialize_str` and return an error on any other data type.
fn serialize_key<F>(&mut self, key: &F) -> Result<()>
where
F: ?Sized + Serialize,
{
todo!()
}
fn serialize_value<F>(&mut self, value: &F) -> Result<()>
where
F: ?Sized + Serialize,
{
todo!()
}
fn end(self) -> Result<Value> {
todo!()
}
}
// Structs are like maps in which the keys are constrained to be compile-time
// constant strings.
impl<'rb, T> ser::SerializeStruct for &'rb mut RubyObjectSerializer<'rb, T>
where
T: magnus::IntoValue,
{
type Ok = Value;
type Error = Error;
fn serialize_field<F>(&mut self, key: &'static str, value: &F) -> Result<()>
where
F: ?Sized + Serialize,
{
todo!()
}
fn end(self) -> Result<Value> {
todo!()
}
}
// Similar to `SerializeTupleVariant`, here the `end` method is responsible for
// closing both of the curly braces opened by `serialize_struct_variant`.
impl<'rb, T> ser::SerializeStructVariant for &'rb mut RubyObjectSerializer<'rb, T>
where
T: magnus::IntoValue,
{
type Ok = Value;
type Error = Error;
fn serialize_field<F>(&mut self, key: &'static str, value: &F) -> Result<()>
where
F: ?Sized + Serialize,
{
todo!()
}
fn end(self) -> Result<Value> {
todo!()
}
}
fn ruby_to_json(
any: Value,
field_pos: Vec<String>,
) -> std::result::Result<serde_json::Value, Vec<SerializationError>> {
use magnus::r_hash::ForEach;
if let Some(any) = RHash::from_value(any) {
let mut errs = vec![];
let mut map = serde_json::Map::new();
if any
.foreach(|k: Value, v: Value| {
let Some(k) = RString::from_value(k) else {
errs.push(SerializationError {
position: field_pos.clone(),
message: format!(
"expected every key in this hash to be string, but found key {:#}",
k
),
});
return Ok(ForEach::Continue);
};
let Ok(k) = k.to_string() else {
errs.push(SerializationError {
position: field_pos.clone(),
message: format!(
"failed to convert key in this hash to be UTF-8 string: {:#}",
k
),
});
return Ok(ForEach::Continue);
};
let mut field_pos = field_pos.clone();
field_pos.push(k.clone());
match ruby_to_json(v, field_pos) {
Ok(json_value) => {
map.insert(k.to_string(), json_value);
}
Err(e) => errs.extend(e),
}
Ok(ForEach::Continue)
})
.is_err()
{
errs.push(SerializationError {
position: field_pos.clone(),
message: "failed to iterate over hash".to_string(),
});
};
if !errs.is_empty() {
return Err(errs);
}
return Ok(serde_json::Value::Object(map));
}
if let Some(any) = RArray::from_value(any) {
let mut errs = vec![];
let mut arr = vec![];
for (i, value) in any.each().enumerate() {
let mut field_pos = field_pos.clone();
field_pos.push(i.to_string());
let Ok(value) = value else {
errs.push(SerializationError {
position: field_pos.clone(),
message: format!("failed to enumerate array element at index {}", i),
});
continue;
};
match ruby_to_json(value, field_pos) {
Ok(json_value) => {
arr.push(json_value);
}
Err(e) => errs.extend(e),
}
}
if !errs.is_empty() {
return Err(errs);
}
return Ok(serde_json::Value::Array(arr));
}
if let Some(any) = magnus::Integer::from_value(any) {
if let Ok(any) = any.to_i64() {
return Ok(serde_json::Value::Number(serde_json::Number::from(any)));
}
if let Ok(any) = any.to_u64() {
return Ok(serde_json::Value::Number(serde_json::Number::from(any)));
}
return Err(vec![SerializationError {
position: field_pos,
message: "failed to convert integer to i64 or u64".to_string(),
}]);
}
if let Some(any) = magnus::Float::from_value(any) {
let Some(as_json) = serde_json::Number::from_f64(any.to_f64()) else {
return Err(vec![SerializationError {
position: field_pos,
message: format!("cannot convert {:#} to float", any),
}]);
};
return Ok(serde_json::Value::Number(as_json));
}
//if let Some(any) = RString::from_value(ruby) {
// match any.to_string() {}
// return Ok(serde_json::Value::String(any.to_string()));
//}
Err(vec![SerializationError {
position: field_pos,
message: "the rest of the cases are unsupported".to_string(),
}])
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment