Skip to content

Instantly share code, notes, and snippets.

@CAD97
Created May 4, 2023 00:21
Show Gist options
  • Save CAD97/64be7bd7c935ea771232cdda4906663f to your computer and use it in GitHub Desktop.
Save CAD97/64be7bd7c935ea771232cdda4906663f to your computer and use it in GitHub Desktop.
Compact syntax tree draft experiment
//! sorbus
#![allow(unused)]
use std::marker::PhantomData;
use std::mem::transmute;
use std::num::NonZeroU16;
use std::ops::Index;
use std::ptr;
#[derive(Copy, Clone)]
pub enum Entry<Node, Token> {
Node(Node),
Token(Token),
}
#[repr(u16)]
#[derive(Copy, Clone, Eq, PartialEq)]
enum ZeroU16 {
Value = 0,
}
/*
union Raw {
raw: u64,
node: struct {
syntax_kind: u16,
child_count: u16 in 1..,
text_len: u32,
},
child: struct {
text_offset: u32,
entry_index: u32,
},
token: struct {
syntax_kind: u16,
child_count: const 0_u16,
text_len: u32,
text: Trailing<str>
},
patch: struct {
_: const u64::MAX,
ptr: Trailing<Arc<[Raw]>>,
},
}
*/
#[repr(C)]
#[derive(Copy, Clone)]
union RawData {
raw: u64,
node: RawNode,
child: RawChild,
token: RawToken,
entry: RawEntry,
}
#[derive(Copy, Clone)]
enum Data {
Node(RawNode),
Child(RawChild),
Token(RawToken),
}
#[repr(C)]
#[derive(Copy, Clone)]
struct RawNode {
syntax_kind: u16,
child_count: NonZeroU16,
text_len: u32,
}
#[repr(C)]
#[derive(Copy, Clone)]
struct RawChild {
text_offset: u32,
entry_index: u32,
}
#[repr(C)]
#[derive(Copy, Clone)]
struct RawToken {
syntax_kind: u16,
child_count: ZeroU16,
text_len: u32,
}
#[repr(C)]
#[derive(Copy, Clone)]
struct RawEntry {
syntax_kind: u16,
child_count: u16,
text_len: u32,
}
impl RawData {
#[inline]
fn pack(data: Data) -> Self {
match data {
Data::Node(node) => Self { node },
Data::Child(child) => Self { child },
Data::Token(token) => Self { token },
}
}
#[inline]
fn entry(self) -> RawEntry {
unsafe { self.entry }
}
#[inline]
fn child(self) -> RawChild {
unsafe { self.child }
}
}
impl RawEntry {
#[inline]
fn unpack(self) -> Entry<RawNode, RawToken> {
if self.is_node() {
Entry::Node(self.as_node().unwrap())
} else {
Entry::Token(self.as_token().unwrap())
}
}
#[inline]
fn is_node(self) -> bool {
self.child_count != 0
}
#[inline]
fn is_token(self) -> bool {
self.child_count == 0
}
#[inline]
fn as_node(self) -> Option<RawNode> {
Some(RawNode {
syntax_kind: self.syntax_kind,
child_count: NonZeroU16::new(self.child_count)?,
text_len: self.text_len,
})
}
#[inline]
fn as_token(self) -> Option<RawToken> {
if self.is_token() {
Some(RawToken {
syntax_kind: self.syntax_kind,
child_count: ZeroU16::Value,
text_len: self.text_len,
})
} else {
None
}
}
}
#[repr(C)]
struct Node {
info: RawNode,
children: [RawChild],
}
#[repr(C)]
struct Token {
info: RawToken,
text: str,
}
#[repr(transparent)]
struct RawSyntax([RawData]);
impl RawSyntax {
fn entry(&self, ix: u32) -> Option<Entry<&Node, &Token>> {
let ix = ix as usize;
let entry = self.0.get(ix)?.entry();
if entry.is_node() {
let len = entry.child_count as usize;
if ix + len <= self.0.len() {
let ptr = ptr::slice_from_raw_parts(self.0.as_ptr(), len);
Some(Entry::Node(unsafe { &*(ptr as *const Node) }))
} else {
None
}
} else {
let len = entry.text_len as usize;
if ix + len % 4 <= self.0.len() {
let ptr = ptr::slice_from_raw_parts(self.0.as_ptr(), len);
Some(Entry::Token(unsafe { &*(ptr as *const Token) }))
} else {
None
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment