Last active
October 24, 2019 20:55
-
-
Save jcdyer/621c47e1942b20cc8a827246d0ba7e29 to your computer and use it in GitHub Desktop.
rawbson prototype
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use std::borrow::Cow; | |
use std::convert::TryInto; | |
enum BsonType { | |
String, | |
Document, | |
Array, | |
} | |
impl BsonType { | |
fn from_byte(byte:u8) -> Option<BsonType> { | |
match byte { | |
2 => Some(BsonType::String), | |
3 => Some(BsonType::Document), | |
4 => Some(BsonType::Array), | |
_ => None, | |
} | |
} | |
} | |
pub struct RawBsonDoc<'a> { | |
data: &'a [u8], | |
} | |
impl<'a> RawBsonDoc<'a> { | |
pub fn new(data: &'a [u8]) -> RawBsonDoc<'a> { | |
let length = i32_from_slice(&data[..4]); | |
assert_eq!(data.len() as i32, length); // Length is properly specified | |
assert_eq!(*data.iter().last().unwrap(), 0); // Document is null terminated | |
RawBsonDoc { data } | |
} | |
pub fn get(&self, key: &str) -> Option<RawBson<'a>> { | |
let mut offset = 4; | |
while offset < self.data.len() - 1 { | |
let thiskey = { | |
let mut splits = self.data[offset + 1..].splitn(2, |x| *x == 0); | |
splits.next()? | |
}; | |
let bsontype = BsonType::from_byte(self.data[offset])?; | |
let valueoffset = offset + 1 + thiskey.len() + 1; | |
let newoffset = valueoffset + match &bsontype { | |
BsonType::String => { | |
4 + i32_from_slice(&self.data[valueoffset..valueoffset + 4]) | |
} | |
BsonType::Document => { | |
i32_from_slice(&self.data[valueoffset..valueoffset + 4]) | |
} | |
BsonType::Array => { | |
i32_from_slice(&self.data[valueoffset..valueoffset + 4]) | |
} | |
} as usize; | |
if thiskey == key.as_bytes() { | |
return Some(RawBson::new(bsontype, &self.data[valueoffset..newoffset])); | |
} | |
offset = newoffset; | |
} | |
None | |
} | |
} | |
pub struct RawBsonArray<'a> { | |
doc: RawBsonDoc<'a>, | |
} | |
impl<'a> RawBsonArray<'a> { | |
pub fn new(data: &'a [u8]) -> RawBsonArray<'a> { | |
let doc = RawBsonDoc::new(data); | |
RawBsonArray{ doc } | |
} | |
pub fn get(&self, index: usize) -> Option<RawBson<'a>> { | |
let key = key_from_index(index); | |
self.doc.get(&key) | |
} | |
} | |
pub struct RawBson<'a> { | |
bsontype: BsonType, | |
data: &'a [u8], | |
} | |
impl<'a> RawBson<'a> { | |
// This is not public. A RawBson object can only be created by the .get() method | |
// on RawBsonDoc (or RawBsonArray?) | |
// | |
fn new(bsontype: BsonType, data: &'a [u8]) -> RawBson<'a> { | |
RawBson { bsontype, data } | |
} | |
pub fn as_doc(&self) -> Option<RawBsonDoc<'a>> { | |
if let BsonType::Document = self.bsontype { | |
Some(RawBsonDoc::new(self.data)) | |
} else { | |
None | |
} | |
} | |
pub fn as_array(&self) -> Option<RawBsonArray<'a>> { | |
if let BsonType::Array = self.bsontype { | |
Some(RawBsonArray::new(self.data)) | |
} else { | |
None | |
} | |
} | |
pub fn as_str(&self) -> Option<&str> { | |
if let BsonType::String = self.bsontype { | |
let length = i32_from_slice(&self.data[..4]); | |
assert_eq!(self.data.len() as i32, length + 4); | |
Some(std::str::from_utf8(&self.data[4..4 + length as usize - 1]).ok()?) | |
} else { | |
None | |
} | |
} | |
} | |
fn key_from_index(index: usize) -> Cow<'static, str> { | |
// Avoid allocation for short arrays by using str literals | |
Cow::Borrowed(match index { | |
0 => "0", | |
1 => "1", | |
2 => "2", | |
3 => "3", | |
4 => "4", | |
5 => "5", | |
6 => "6", | |
7 => "7", | |
8 => "8", | |
9 => "9", | |
10 => "10", | |
11 => "11", | |
12 => "12", | |
13 => "13", | |
14 => "14", | |
15 => "15", | |
_ => return Cow::Owned(format!("{}", index)) | |
}) | |
} | |
// Given a 4 byte u8 slice, return an i32 calculated from the bytes in | |
// little endian order | |
// | |
// # Panics | |
// | |
// This function panics if given a slice that is not four bytes long. | |
fn i32_from_slice(val: &[u8]) -> i32 { | |
i32::from_le_bytes(val.try_into().expect("i32 is four bytes")) | |
} | |
#[cfg(test)] | |
mod tests { | |
use super::*; | |
use bson::{bson, doc, encode_document}; | |
fn to_bytes(doc: &::bson::Document) -> Vec<u8> { | |
let mut docbytes = Vec::new(); | |
encode_document(&mut docbytes, doc).unwrap(); | |
docbytes | |
} | |
#[test] | |
fn string_from_document() { | |
let docbytes = to_bytes(&doc!{ | |
"this": "first", | |
"that": "second", | |
"something": "else", | |
}); | |
let rawdoc = RawBsonDoc::new(&docbytes); | |
assert_eq!( | |
rawdoc.get("that").unwrap().as_str().unwrap(), | |
"second", | |
); | |
} | |
#[test] | |
fn nested_document() { | |
let docbytes = to_bytes(&doc!{ | |
"outer": { | |
"inner": "surprise", | |
}, | |
}); | |
let rawdoc = RawBsonDoc::new(&docbytes); | |
assert_eq!( | |
rawdoc.get("outer").expect("get doc") | |
.as_doc().expect("as doc") | |
.get("inner").expect("get str") | |
.as_str().expect("as str"), | |
"surprise", | |
); | |
} | |
#[test] | |
fn array() { | |
let docbytes = to_bytes(&doc!{ | |
"array": [ | |
"cats", | |
"dogs", | |
"rhinoceroses", | |
], | |
}); | |
let rawdoc = RawBsonDoc::new(&docbytes); | |
assert_eq!( | |
rawdoc.get("array").expect("get array") | |
.as_array().expect("as array") | |
.get(2).expect("get str") | |
.as_str().expect("as str"), | |
"rhinoceroses", | |
); | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use serde::Deserialize; | |
use serde::de::{ | |
self, Deserializer, Visitor, | |
}; | |
use super RawBsonDoc; | |
pub struct Error; | |
pub struct BsonDeserializer<'de> { | |
doc: RawBsonDoc<'de>, | |
offset: usize, | |
} | |
impl<'de> BsonDeserializer<'de> { | |
pub fn from_raw_doc(doc: RawBsonDoc<'de>) -> Self { | |
BsonDeserializer { doc, offset: 0 } | |
} | |
pub fn from_slice(input: &'de [u8]) -> Option<Self> { | |
let doc = RawBsonDoc::new(input)?; | |
BsonDeserializer::from_raw_doc(doc) | |
} | |
} | |
impl<'de, 'a> Deserializer<'de> for &'a mut BsonDeserializer<'de> { | |
type Error = Error; | |
fn deserialize_map<V: Visitor<'de>>(self, visitor: V) -> Result<V::Value, Self::Error> { | |
loop { | |
let bsontype = self.doc.data.get(self.offset).map(BsonType::new); | |
match bsontype { | |
None => break, | |
Some(BsonType::String) => | |
let bsontype = BsonType::from_byte(self.doc.data[self.offset]); | |
match bsontype { | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment