Skip to content

Instantly share code, notes, and snippets.

@jcdyer
Last active October 24, 2019 20:55
Show Gist options
  • Save jcdyer/621c47e1942b20cc8a827246d0ba7e29 to your computer and use it in GitHub Desktop.
Save jcdyer/621c47e1942b20cc8a827246d0ba7e29 to your computer and use it in GitHub Desktop.
rawbson prototype
use std::borrow::Cow;
use std::convert::TryInto;
enum BsonType {
String,
Document,
Array,
}
impl BsonType {
fn from_byte(byte:u8) -> Option<BsonType> {
match byte {
2 => Some(BsonType::String),
3 => Some(BsonType::Document),
4 => Some(BsonType::Array),
_ => None,
}
}
}
pub struct RawBsonDoc<'a> {
data: &'a [u8],
}
impl<'a> RawBsonDoc<'a> {
pub fn new(data: &'a [u8]) -> RawBsonDoc<'a> {
let length = i32_from_slice(&data[..4]);
assert_eq!(data.len() as i32, length); // Length is properly specified
assert_eq!(*data.iter().last().unwrap(), 0); // Document is null terminated
RawBsonDoc { data }
}
pub fn get(&self, key: &str) -> Option<RawBson<'a>> {
let mut offset = 4;
while offset < self.data.len() - 1 {
let thiskey = {
let mut splits = self.data[offset + 1..].splitn(2, |x| *x == 0);
splits.next()?
};
let bsontype = BsonType::from_byte(self.data[offset])?;
let valueoffset = offset + 1 + thiskey.len() + 1;
let newoffset = valueoffset + match &bsontype {
BsonType::String => {
4 + i32_from_slice(&self.data[valueoffset..valueoffset + 4])
}
BsonType::Document => {
i32_from_slice(&self.data[valueoffset..valueoffset + 4])
}
BsonType::Array => {
i32_from_slice(&self.data[valueoffset..valueoffset + 4])
}
} as usize;
if thiskey == key.as_bytes() {
return Some(RawBson::new(bsontype, &self.data[valueoffset..newoffset]));
}
offset = newoffset;
}
None
}
}
pub struct RawBsonArray<'a> {
doc: RawBsonDoc<'a>,
}
impl<'a> RawBsonArray<'a> {
pub fn new(data: &'a [u8]) -> RawBsonArray<'a> {
let doc = RawBsonDoc::new(data);
RawBsonArray{ doc }
}
pub fn get(&self, index: usize) -> Option<RawBson<'a>> {
let key = key_from_index(index);
self.doc.get(&key)
}
}
pub struct RawBson<'a> {
bsontype: BsonType,
data: &'a [u8],
}
impl<'a> RawBson<'a> {
// This is not public. A RawBson object can only be created by the .get() method
// on RawBsonDoc (or RawBsonArray?)
//
fn new(bsontype: BsonType, data: &'a [u8]) -> RawBson<'a> {
RawBson { bsontype, data }
}
pub fn as_doc(&self) -> Option<RawBsonDoc<'a>> {
if let BsonType::Document = self.bsontype {
Some(RawBsonDoc::new(self.data))
} else {
None
}
}
pub fn as_array(&self) -> Option<RawBsonArray<'a>> {
if let BsonType::Array = self.bsontype {
Some(RawBsonArray::new(self.data))
} else {
None
}
}
pub fn as_str(&self) -> Option<&str> {
if let BsonType::String = self.bsontype {
let length = i32_from_slice(&self.data[..4]);
assert_eq!(self.data.len() as i32, length + 4);
Some(std::str::from_utf8(&self.data[4..4 + length as usize - 1]).ok()?)
} else {
None
}
}
}
fn key_from_index(index: usize) -> Cow<'static, str> {
// Avoid allocation for short arrays by using str literals
Cow::Borrowed(match index {
0 => "0",
1 => "1",
2 => "2",
3 => "3",
4 => "4",
5 => "5",
6 => "6",
7 => "7",
8 => "8",
9 => "9",
10 => "10",
11 => "11",
12 => "12",
13 => "13",
14 => "14",
15 => "15",
_ => return Cow::Owned(format!("{}", index))
})
}
// Given a 4 byte u8 slice, return an i32 calculated from the bytes in
// little endian order
//
// # Panics
//
// This function panics if given a slice that is not four bytes long.
fn i32_from_slice(val: &[u8]) -> i32 {
i32::from_le_bytes(val.try_into().expect("i32 is four bytes"))
}
#[cfg(test)]
mod tests {
use super::*;
use bson::{bson, doc, encode_document};
fn to_bytes(doc: &::bson::Document) -> Vec<u8> {
let mut docbytes = Vec::new();
encode_document(&mut docbytes, doc).unwrap();
docbytes
}
#[test]
fn string_from_document() {
let docbytes = to_bytes(&doc!{
"this": "first",
"that": "second",
"something": "else",
});
let rawdoc = RawBsonDoc::new(&docbytes);
assert_eq!(
rawdoc.get("that").unwrap().as_str().unwrap(),
"second",
);
}
#[test]
fn nested_document() {
let docbytes = to_bytes(&doc!{
"outer": {
"inner": "surprise",
},
});
let rawdoc = RawBsonDoc::new(&docbytes);
assert_eq!(
rawdoc.get("outer").expect("get doc")
.as_doc().expect("as doc")
.get("inner").expect("get str")
.as_str().expect("as str"),
"surprise",
);
}
#[test]
fn array() {
let docbytes = to_bytes(&doc!{
"array": [
"cats",
"dogs",
"rhinoceroses",
],
});
let rawdoc = RawBsonDoc::new(&docbytes);
assert_eq!(
rawdoc.get("array").expect("get array")
.as_array().expect("as array")
.get(2).expect("get str")
.as_str().expect("as str"),
"rhinoceroses",
);
}
}
use serde::Deserialize;
use serde::de::{
self, Deserializer, Visitor,
};
use super RawBsonDoc;
pub struct Error;
pub struct BsonDeserializer<'de> {
doc: RawBsonDoc<'de>,
offset: usize,
}
impl<'de> BsonDeserializer<'de> {
pub fn from_raw_doc(doc: RawBsonDoc<'de>) -> Self {
BsonDeserializer { doc, offset: 0 }
}
pub fn from_slice(input: &'de [u8]) -> Option<Self> {
let doc = RawBsonDoc::new(input)?;
BsonDeserializer::from_raw_doc(doc)
}
}
impl<'de, 'a> Deserializer<'de> for &'a mut BsonDeserializer<'de> {
type Error = Error;
fn deserialize_map<V: Visitor<'de>>(self, visitor: V) -> Result<V::Value, Self::Error> {
loop {
let bsontype = self.doc.data.get(self.offset).map(BsonType::new);
match bsontype {
None => break,
Some(BsonType::String) =>
let bsontype = BsonType::from_byte(self.doc.data[self.offset]);
match bsontype {
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment