Skip to content

Instantly share code, notes, and snippets.

@eira-fransham
Last active August 1, 2021 16:48
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save eira-fransham/aa5b9f6ddc29448f1cafea156f719996 to your computer and use it in GitHub Desktop.
Save eira-fransham/aa5b9f6ddc29448f1cafea156f719996 to your computer and use it in GitHub Desktop.
use std::{
borrow::{Borrow, ToOwned},
marker::PhantomData,
ops::{Deref, Drop},
ptr::{self, NonNull},
};
pub unsafe trait Cursed<T: ?Sized>: Borrow<T> + Sized {
fn borrowed(borowed: &T) -> Option<NonNull<T>>;
fn owned(self) -> Option<NonNull<T>>;
fn is_owned(ptr: NonNull<T>) -> bool;
unsafe fn as_scoped_ref(ptr: &NonNull<T>) -> &T;
unsafe fn as_ref<'a>(ptr: NonNull<T>) -> &'a T;
unsafe fn reconstruct(ptr: NonNull<T>) -> Option<Self>;
}
// This ensures that this struct is always treated exactly the same as `NonNull<T>`
// at runtime.
#[repr(transparent)]
pub struct CursedCow<'a, T>
where
T: ?Sized + ToOwned,
T::Owned: Cursed<T>,
{
ptr: NonNull<T>,
_marker: PhantomData<&'a T>,
}
impl<T> Drop for CursedCow<'_, T>
where
T: ?Sized + ToOwned,
T::Owned: Cursed<T>,
{
fn drop(&mut self) {
if self.is_owned() {
let _ = unsafe { <T::Owned as Cursed<T>>::reconstruct(self.ptr) };
}
}
}
impl<'a, T> CursedCow<'a, T>
where
T: ?Sized + ToOwned,
T::Owned: Cursed<T>,
{
pub fn try_owned(owned: <T as ToOwned>::Owned) -> Option<Self> {
Some(CursedCow {
ptr: <T::Owned as Cursed<T>>::owned(owned)?,
_marker: PhantomData,
})
}
pub fn owned(owned: <T as ToOwned>::Owned) -> Self {
Self::try_owned(owned).unwrap()
}
pub fn try_borrowed(inner: &'a T) -> Option<Self> {
Some(CursedCow {
ptr: <T::Owned as Cursed<T>>::borrowed(inner)?,
_marker: PhantomData,
})
}
pub fn borrowed(inner: &'a T) -> Self {
Self::try_borrowed(inner).unwrap()
}
pub fn as_ref(&self) -> &'_ T {
unsafe { <T::Owned as Cursed<T>>::as_scoped_ref(&self.ptr) }
}
pub fn is_owned(&self) -> bool {
<T::Owned as Cursed<T>>::is_owned(self.ptr)
}
pub fn is_borrowed(&self) -> bool {
!self.is_owned()
}
pub fn into_owned(self) -> T::Owned {
if self.is_owned() {
let this = mem::ManuallyDrop::new(self);
unsafe { <T::Owned as Cursed<T>>::reconstruct(this.ptr) }
.unwrap_or_else(|| this.as_ref().to_owned())
} else {
self.as_ref().to_owned()
}
}
}
impl<T> Deref for CursedCow<'_, T>
where
T: ?Sized + ToOwned,
T::Owned: Cursed<T>,
{
type Target = T;
fn deref(&self) -> &T {
self.as_ref()
}
}
impl<'a, T> From<CursedCow<'a, T>> for std::borrow::Cow<'a, T>
where
T: ?Sized + ToOwned,
T::Owned: Cursed<T>,
{
fn from(other: CursedCow<'a, T>) -> Self {
if other.is_owned() {
std::borrow::Cow::Owned(other.into_owned())
} else {
unsafe { std::borrow::Cow::Borrowed(<T::Owned as Cursed<T>>::as_ref(other.ptr)) }
}
}
}
impl<'a, T> From<std::borrow::Cow<'a, T>> for CursedCow<'a, T>
where
T: ?Sized + ToOwned,
T::Owned: Cursed<T>,
{
fn from(other: std::borrow::Cow<'a, T>) -> Self {
match other {
std::borrow::Cow::Borrowed(b) => Self::borrowed(b),
std::borrow::Cow::Owned(o) => Self::owned(o),
}
}
}
use std::{mem, slice, str, usize};
// These calculate a mask for the two halves of the `usize`'s bits. For 64-bit this will
// be the bottom 32 bits, for 32-bit it will be the bottom 16 bits.
const CAP_SHIFT: usize = (mem::size_of::<usize>() * 8) / 2;
const LEN_MASK: usize = usize::MAX >> CAP_SHIFT;
const CAP_MASK: usize = !LEN_MASK;
#[inline(always)]
fn max_inlined_len<T>() -> usize {
// Inlining zero-sized values is useless - we might as well just ignore this optimisation
// for arrays of zero-sized values.
if mem::size_of::<T>() == 0 {
0
} else {
mem::size_of::<&[T]>().saturating_sub(mem::align_of::<T>()) / mem::size_of::<T>()
}
}
#[cfg(target_endian = "little")]
#[inline(always)]
fn split_len_ptr(val: usize) -> (u8, usize) {
(val as u8, val >> 8)
}
#[cfg(target_endian = "big")]
#[inline(always)]
fn split_len_ptr(val: usize) -> (u8, usize) {
(
val >> (mem::size_of::<usize>() - 8),
val & (std::usize::MAX >> 8),
)
}
#[cfg(target_endian = "little")]
#[inline(always)]
fn from_len_ptr(len: u8, ptr: usize) -> usize {
(len as usize) | ptr << 8
}
#[cfg(target_endian = "big")]
#[inline(always)]
fn from_len_ptr(len: u8, ptr: usize) -> usize {
(len as usize) << (mem::size_of::<usize>() - 8) | ptr & std::usize::MAX >> 8
}
unsafe impl<T> Cursed<[T]> for Vec<T> {
fn borrowed(ptr: &[T]) -> Option<NonNull<[T]>> {
let len_ptr = from_len_ptr(0, ptr.as_ptr() as *const u8 as usize);
if split_len_ptr(len_ptr).1 != ptr.as_ptr() as *const u8 as usize
|| ptr.len() & CAP_MASK != 0
{
None
} else {
unsafe {
Some(NonNull::from(slice::from_raw_parts(
mem::transmute::<usize, *const T>(len_ptr),
ptr.len() & LEN_MASK,
)))
}
}
}
fn owned(self) -> Option<NonNull<[T]>> {
use std::mem::MaybeUninit;
// Here's the new branch
if (1..=max_inlined_len::<T>()).contains(&self.len()) {
unsafe {
let this = mem::ManuallyDrop::new(self);
let mut out_ptr = MaybeUninit::<*mut [T]>::uninit();
// We know that `self.len` is smaller than `u8::max` and non-zero, so this
// will only write the first byte.
update_as_usize(&mut *out_ptr.as_mut_ptr(), |p| {
*p = from_len_ptr(this.len() as u8, 0);
});
let mut cursor = (out_ptr.as_mut_ptr() as *mut u8)
.offset(mem::align_of::<T>() as isize)
as *mut T;
for i in &**this {
cursor.write(ptr::read(i));
cursor = cursor.offset(1);
}
Some(NonNull::new_unchecked(out_ptr.assume_init()))
}
} else if split_len_ptr(self.as_ptr() as *mut u8 as usize).0 != 0
|| self.len() & CAP_MASK != 0
|| self.capacity() & CAP_MASK != 0
{
None
} else {
let mut this = mem::ManuallyDrop::new(self);
let len_ptr = from_len_ptr(0, this.as_mut_ptr() as *mut u8 as usize);
unsafe {
Some(NonNull::from(slice::from_raw_parts_mut(
mem::transmute::<usize, *mut T>(len_ptr),
this.len() | (this.capacity() << CAP_SHIFT),
)))
}
}
}
fn is_owned(ptr: NonNull<[T]>) -> bool {
let (len, _) = split_len_ptr(ptr.as_ptr() as *mut u8 as usize);
(max_inlined_len::<T>() != 0 && len != 0) || (unsafe { ptr.as_ref() }.len() & CAP_MASK != 0)
}
unsafe fn as_scoped_ref(ptr: &NonNull<[T]>) -> &[T] {
let (inlined_len, _) = split_len_ptr(ptr.as_ptr() as *mut u8 as usize);
if max_inlined_len::<T>() == 0 || inlined_len == 0 {
<Self as Cursed<[T]>>::as_ref(*ptr)
} else {
slice::from_raw_parts(
(ptr as *const NonNull<[T]> as *const u8).offset(mem::align_of::<T>() as isize)
as *const T,
inlined_len as usize,
)
}
}
unsafe fn as_ref<'a>(ptr: NonNull<[T]>) -> &'a [T] {
let (_, noninlined_ptr) = split_len_ptr(ptr.as_ptr() as *mut u8 as usize);
slice::from_raw_parts(
mem::transmute::<usize, *const T>(noninlined_ptr),
ptr.as_ref().len() & LEN_MASK,
)
}
unsafe fn reconstruct(ptr: NonNull<[T]>) -> Option<Self> {
let (inlined_len, noninlined_ptr) = split_len_ptr(ptr.as_ptr() as *mut u8 as usize);
if max_inlined_len::<T>() == 0 || inlined_len == 0 {
Some(Vec::from_raw_parts(
mem::transmute::<usize, *mut T>(noninlined_ptr),
ptr.as_ref().len() & LEN_MASK,
ptr.as_ref().len() >> CAP_SHIFT,
))
} else {
None
}
}
}
// Basically the same as for `[T]` but we need to add `str::from_utf8_unchecked`
// (which is a no-op at runtime).
unsafe impl Cursed<str> for String {
fn borrowed(ptr: &str) -> Option<NonNull<str>> {
<Vec<u8> as Cursed<[u8]>>::borrowed(ptr.as_bytes()).map(|b| unsafe { mem::transmute(b) })
}
fn owned(self) -> Option<NonNull<str>> {
<Vec<u8> as Cursed<[u8]>>::owned(self.into_bytes()).map(|b| unsafe { mem::transmute(b) })
}
fn is_owned(ptr: NonNull<str>) -> bool {
<Vec<u8> as Cursed<[u8]>>::is_owned(unsafe { mem::transmute(ptr) })
}
unsafe fn as_scoped_ref(ptr: &NonNull<str>) -> &str {
str::from_utf8_unchecked(<Vec<u8> as Cursed<[u8]>>::as_scoped_ref(mem::transmute(
ptr,
)))
}
unsafe fn as_ref<'a>(ptr: NonNull<str>) -> &'a str {
str::from_utf8_unchecked(<Vec<u8> as Cursed<[u8]>>::as_ref(mem::transmute(ptr)))
}
unsafe fn reconstruct(ptr: NonNull<str>) -> Option<Self> {
<Vec<u8> as Cursed<[u8]>>::reconstruct(mem::transmute(ptr))
.map(|v| String::from_utf8_unchecked(v))
}
}
fn update_as_usize<O, F: FnOnce(&mut usize) -> O, T: ?Sized>(ptr: &mut *mut T, func: F) -> O {
unsafe {
// Here's where we invoke the darkest of dark magic, explanation below.
let ptr_as_bytes = mem::transmute::<&mut *mut T, &mut usize>(ptr);
// Since this is dark magic, we make sure that we explode if our
// assumptions are wrong.
assert_eq!(*ptr_as_bytes, *ptr as *mut u8 as usize);
func(ptr_as_bytes)
}
}
const PTR_MASK: usize = usize::MAX >> 1;
const TAG_MASK: usize = !PTR_MASK;
unsafe impl<T: ?Sized> Cursed<T> for Box<T> {
fn borrowed(ptr: &T) -> Option<NonNull<T>> {
if Self::is_owned(NonNull::from(ptr)) {
None
} else {
Some(NonNull::from(ptr))
}
}
fn owned(self) -> Option<NonNull<T>> {
let mut this = mem::ManuallyDrop::new(self);
let original_ptr = &mut **this as *mut T;
let mut ptr = original_ptr;
update_as_usize(&mut ptr, |p| *p |= TAG_MASK);
unsafe {
if Self::is_owned(NonNull::new_unchecked(original_ptr)) {
None
} else {
Some(NonNull::new_unchecked(ptr))
}
}
}
fn is_owned(ptr: NonNull<T>) -> bool {
ptr.as_ptr() as *mut u8 as usize & TAG_MASK != 0
}
unsafe fn as_scoped_ref(ptr: &NonNull<T>) -> &T {
<Self as Cursed<T>>::as_ref(*ptr)
}
unsafe fn as_ref<'a>(ptr: NonNull<T>) -> &'a T {
let mut ptr = ptr.as_ptr();
update_as_usize(&mut ptr, |p| *p &= PTR_MASK);
&*ptr
}
unsafe fn reconstruct(ptr: NonNull<T>) -> Option<Self> {
let mut ptr = ptr.as_ptr();
update_as_usize(&mut ptr, |p| *p &= PTR_MASK);
Some(Box::from_raw(ptr))
}
}
#[cfg(test)]
mod tests {
use super::CursedCow;
#[test]
fn borrowed_str() {
let s = "Hello World";
let c = CursedCow::borrowed(s);
assert_eq!(s, &*c);
}
#[test]
fn owned_string() {
let s = String::from("Hello World");
let c: CursedCow<str> = CursedCow::owned(s.clone());
assert_eq!(s, &*c);
}
#[test]
fn into_owned() {
let hello = "Hello World";
let borrowed = CursedCow::borrowed(hello);
let owned: CursedCow<str> = CursedCow::owned(String::from(hello));
assert_eq!(borrowed.into_owned(), hello);
assert_eq!(owned.into_owned(), hello);
}
#[test]
fn borrowed_slice() {
let s: &[_] = &[1, 2, 42];
let c = CursedCow::borrowed(s);
assert_eq!(s, &*c);
}
#[test]
fn owned_slice() {
let s = vec![1, 2, 42];
let c: CursedCow<[_]> = CursedCow::owned(s.clone());
assert_eq!(s, &*c);
}
#[test]
fn into_owned_vec() {
let hello: &[u8] = b"Hello World";
let borrowed = CursedCow::borrowed(hello);
let owned: CursedCow<[u8]> = CursedCow::owned(hello.to_vec());
assert_eq!(borrowed.into_owned(), hello);
assert_eq!(owned.into_owned(), hello);
}
#[test]
fn from_std_cow() {
let std = std::borrow::Cow::Borrowed("Hello World");
let beef = CursedCow::from(std.clone());
assert_eq!(&*std, &*beef);
}
#[test]
fn make_many_trait_objects() {
use std::borrow::ToOwned;
trait Foo {
fn clone_boxed(&self) -> Box<dyn Foo>;
fn as_usize(&self) -> usize;
}
macro_rules! impl_foo {
($t:ty) => {
impl Foo for $t {
fn clone_boxed(&self) -> Box<dyn Foo> {
Box::new(*self)
}
fn as_usize(&self) -> usize {
*self as usize
}
}
};
}
impl_foo!(u8);
impl_foo!(u16);
impl_foo!(u32);
impl_foo!(u64);
impl Foo for () {
fn clone_boxed(&self) -> Box<dyn Foo> {
Box::new(())
}
fn as_usize(&self) -> usize {
0
}
}
impl ToOwned for dyn Foo {
type Owned = Box<Self>;
fn to_owned(&self) -> Self::Owned {
self.clone_boxed()
}
}
let v: Vec<CursedCow<dyn Foo>> = vec![
CursedCow::owned(Box::new(()) as _),
CursedCow::owned(Box::new(1u64) as _),
CursedCow::owned(Box::new(2u32) as _),
CursedCow::owned(Box::new(3u16) as _),
CursedCow::owned(Box::new(4u8) as _),
];
let v3: Vec<CursedCow<dyn Foo>> = vec![
CursedCow::borrowed(&() as _),
CursedCow::borrowed(&1u64 as _),
CursedCow::borrowed(&2u32 as _),
CursedCow::borrowed(&3u16 as _),
CursedCow::borrowed(&4u8 as _),
];
let v2: Vec<CursedCow<dyn Foo>> = v.iter().map(|e| CursedCow::borrowed(&**e)).collect();
for (i, c) in v
.iter()
.enumerate()
.chain(v2.iter().enumerate())
.chain(v3.iter().enumerate())
{
assert_eq!(i, c.as_usize());
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment