Skip to content

Instantly share code, notes, and snippets.

@tobz
Created November 1, 2021 17:19
Show Gist options
  • Save tobz/fec84b1ac850aeaef216608a8585c982 to your computer and use it in GitHub Desktop.
Save tobz/fec84b1ac850aeaef216608a8585c982 to your computer and use it in GitHub Desktop.
diff --git a/metrics/src/cow.rs b/metrics/src/cow.rs
index 968b5ef..f1627af 100644
--- a/metrics/src/cow.rs
+++ b/metrics/src/cow.rs
@@ -91,6 +91,10 @@ impl<'a> Cow<'a, str> {
marker: PhantomData,
}
}
+
+ pub const fn const_as_ref(&self) -> &str {
+ unsafe { const_str_ref_from_parts(self.ptr, self.meta) }
+ }
}
impl<'a> Cow<'a, [Cow<'static, str>]> {
@@ -449,12 +453,12 @@ pub struct Metadata(usize, usize);
impl Metadata {
#[inline]
- fn length(&self) -> usize {
+ const fn length(&self) -> usize {
self.0
}
#[inline]
- fn capacity(&self) -> usize {
+ const fn capacity(&self) -> usize {
self.1
}
@@ -523,3 +527,58 @@ impl Metadata {
Metadata(1 << 32)
}
}*/
+
+const unsafe fn const_str_ref_from_parts<'a>(ptr: NonNull<u8>, metadata: Metadata) -> &'a str {
+ // SAFETY:
+ //
+ // This function is just about the most cursed code I've ever written, that said...
+ //
+ // - We only call this method via `Cow<'a, str>`, so we know we're dealing with a string.
+ // - If we're in a const context, we can't possibly be dealing with a `String`, and even if
+ // support for that gets added to const, our metadata tracks the length of the string anyways.
+ // - On top of that, `String`'s own method for getting a pointer looks like this:
+ // `self as *const str as *const u8` so we should be safe, layout-wise.
+ // - Our string pointer is already aligned, as we didn't manually construct it nor do we modify
+ // it after storing it.
+ // - We're handing out an immutable reference bound to the lifetime of the CoW structure itself.
+ // - The caller ensures the lifetime is tied to the CoW structure itself.
+ //
+ // We copy the machinery used by `str::ptr` to build our fat pointer to the underlying string,
+ // where we have the pointer, and the "metadata", which for a string is simply the pointer to
+ // the bytes and then the number of bytes in the string, as a usize. After that, it's just
+ // normal casting to build our immutable reference.
+ //
+ // Final warning, from the stdlib itself, that I decided not to heed (but this code comment
+ // should only exists if we made it work):
+ //
+ // > Accessing the value from the `PtrRepr` union is safe since *const T and PtrComponents<T>
+ // > have the same memory layouts. Only std can make this guarantee.
+
+ #[repr(C)]
+ union StrPtrRepr<'b> {
+ shared_ref: &'b str,
+ components: StrPtrComponents,
+ }
+
+ #[repr(C)]
+ struct StrPtrComponents {
+ data_address: *const (),
+ metadata: usize,
+ }
+
+ impl Copy for StrPtrComponents {}
+ impl Clone for StrPtrComponents {
+ fn clone(&self) -> Self {
+ *self
+ }
+ }
+
+ let repr = StrPtrRepr {
+ components: StrPtrComponents {
+ data_address: ptr.as_ptr() as *const (),
+ metadata: metadata.length(),
+ },
+ };
+
+ repr.shared_ref
+}
diff --git a/metrics/src/handles.rs b/metrics/src/handles.rs
index 5d94086..0aa102a 100644
--- a/metrics/src/handles.rs
+++ b/metrics/src/handles.rs
@@ -158,11 +158,11 @@ impl Histogram {
impl CounterFn for AtomicU64 {
fn increment(&self, value: u64) {
- let _ = self.fetch_add(value, Ordering::Release);
+ let _ = self.fetch_add(value, Ordering::Relaxed);
}
fn absolute(&self, value: u64) {
- let _ = self.fetch_max(value, Ordering::AcqRel);
+ let _ = self.fetch_max(value, Ordering::Relaxed);
}
}
@@ -196,7 +196,7 @@ impl GaugeFn for AtomicU64 {
}
fn set(&self, value: f64) {
- let _ = self.swap(value.to_bits(), Ordering::AcqRel);
+ let _ = self.swap(value.to_bits(), Ordering::Relaxed);
}
}
diff --git a/metrics/src/hashing.rs b/metrics/src/hashing.rs
new file mode 100644
index 0000000..1f52c96
--- /dev/null
+++ b/metrics/src/hashing.rs
@@ -0,0 +1,87 @@
+use std::hash::Hasher;
+
+const FNV_OFFSET_BASIS_64: u64 = 0xcbf29ce484222325;
+const FNV_PRIME_64: u64 = 0x00000100000001B3;
+
+/// Key-specific hashing algorithm.
+///
+/// Currently uses FNV1a - <https://datatracker.ietf.org/doc/html/draft-eastlake-fnv-17.html>
+///
+/// For any use-case within a `metrics`-owned or adjacent crate, where hashing of a key is required,
+/// this is the hasher that will be used.
+pub type KeyHasher = Fnv1aConstHasher;
+
+pub struct Fnv1aConstHasher(u64);
+
+impl Fnv1aConstHasher {
+ pub const fn new() -> Self {
+ Self(FNV_OFFSET_BASIS_64)
+ }
+
+ pub const fn const_write(self, bytes: &[u8]) -> Fnv1aConstHasher {
+ let mut hash = self.0;
+
+ let mut i = 0;
+ let len = bytes.len();
+ while i < len {
+ hash ^= bytes[i] as u64;
+ hash = hash.wrapping_mul(FNV_PRIME_64);
+ i += 1;
+ }
+
+ Fnv1aConstHasher(hash)
+ }
+
+ pub const fn const_finish(&self) -> u64 {
+ self.0
+ }
+}
+
+impl Default for Fnv1aConstHasher {
+ fn default() -> Self {
+ Self::new()
+ }
+}
+
+impl Hasher for Fnv1aConstHasher {
+ fn finish(&self) -> u64 {
+ self.const_finish()
+ }
+
+ fn write(&mut self, bytes: &[u8]) {
+ let mut hash = Fnv1aConstHasher(self.0);
+ hash = hash.const_write(bytes);
+ self.0 = hash.0;
+ }
+}
+
+#[derive(Copy, Clone)]
+pub struct Fnv1aHasher(u64);
+
+impl Fnv1aHasher {
+ pub const fn new() -> Self {
+ Self(FNV_OFFSET_BASIS_64)
+ }
+}
+
+impl Default for Fnv1aHasher {
+ fn default() -> Self {
+ Self::new()
+ }
+}
+
+impl Hasher for Fnv1aHasher {
+ fn finish(&self) -> u64 {
+ self.0
+ }
+
+ fn write(&mut self, bytes: &[u8]) {
+ let mut i = 0;
+ let len = bytes.len();
+ while i < len {
+ self.0 ^= bytes[i] as u64;
+ self.0 = self.0.wrapping_mul(FNV_PRIME_64);
+ i += 1;
+ }
+ }
+}
diff --git a/metrics/src/key.rs b/metrics/src/key.rs
index f040df0..044cb86 100644
--- a/metrics/src/key.rs
+++ b/metrics/src/key.rs
@@ -1,34 +1,29 @@
-use crate::{cow::Cow, IntoLabels, KeyHasher, Label, SharedString};
+use crate::{cow::Cow, hashing::Fnv1aHasher, IntoLabels, KeyHasher, Label, SharedString};
use alloc::{string::String, vec::Vec};
use core::{fmt, hash::Hash, slice::Iter};
-use std::{
- cmp,
- hash::Hasher,
- sync::atomic::{AtomicBool, AtomicU64, Ordering},
-};
+use std::{cmp, hash::Hasher};
const NO_LABELS: [Label; 0] = [];
/// A metric identifier.
///
-/// # Safety
-/// Clippy will report any usage of `Key` as the key of a map/set as "mutable key type", meaning
-/// that it believes that there is interior mutability present which could lead to a key being
-/// hashed different over time. That behavior could lead to unexpected behavior, as standard
-/// maps/sets depend on keys having stable hashes over time, related to times when they must be
-/// recomputed as the internal storage is resized and items are moved around.
+/// # Hashing behavior / `Hash` implementation
///
-/// In this case, the `Hash` implementation of `Key` does _not_ depend on the fields which Clippy
-/// considers mutable (the atomics) and so it is actually safe against differing hashes being
-/// generated. We personally allow this Clippy lint in places where we store the key, such as
-/// helper types in the `metrics-util` crate, and you may need to do the same if you're using it in
-/// such a way as well.
-#[derive(Debug)]
+/// As part of the overall performance posture of `metrics`, we generate the hash for a `Key` at the
+/// time of construction, whether constructed in a constant fashion or not. Specific helper
+/// utilities will access this pre-generated hash via [`Key::get_hash`], but there are still times
+/// where the typical [Hash](std::hash::Hash) trait muyst be used, such as when storing a `Key` in a
+/// map or set. As keys may need to be rehashed when the underlying storage is resized, this
+/// presents an opportunity for drift between the pre-generated hash and the hash returned by `Key`
+/// when using the `Hash` trait.
+///
+/// Callers _MUST_ use the [`KeyHasher`](crate::KeyHasher) hasher for any maps or sets where `Key`
+/// is used as the key. This will ensure that hashes match if they need to be regenerated.
+#[derive(Clone, Debug)]
pub struct Key {
name: SharedString,
labels: Cow<'static, [Label]>,
- hashed: AtomicBool,
- hash: AtomicU64,
+ hash: u64,
}
impl Key {
@@ -60,12 +55,7 @@ impl Key {
where
N: Into<SharedString>,
{
- Self {
- name: name.into(),
- labels: Cow::<[Label]>::const_slice(labels),
- hashed: AtomicBool::new(false),
- hash: AtomicU64::new(0),
- }
+ Self::builder(name.into(), Cow::<[Label]>::const_slice(labels))
}
/// Creates a [`Key`] from a static name.
@@ -82,20 +72,16 @@ impl Key {
Self {
name: Cow::const_str(name),
labels: Cow::<[Label]>::const_slice(labels),
- hashed: AtomicBool::new(false),
- hash: AtomicU64::new(0),
+ hash: const_hasher(name, labels),
}
}
fn builder(name: SharedString, labels: Cow<'static, [Label]>) -> Self {
- let hash = generate_key_hash(&name, &labels);
+ let mut hasher = Fnv1aHasher::new();
+ non_const_hasher(&mut hasher, &name, &labels);
+ let hash = hasher.finish();
- Self {
- name,
- labels,
- hashed: AtomicBool::new(true),
- hash: AtomicU64::new(hash),
- }
+ Self { name, labels, hash }
}
/// Name of this key.
@@ -128,36 +114,35 @@ impl Key {
/// Gets the hash value for this key.
pub fn get_hash(&self) -> u64 {
- if self.hashed.load(Ordering::Acquire) {
- self.hash.load(Ordering::Acquire)
- } else {
- let hash = generate_key_hash(&self.name, &self.labels);
- self.hash.store(hash, Ordering::Release);
- self.hashed.store(true, Ordering::Release);
- hash
- }
+ self.hash
}
}
-fn generate_key_hash(name: &SharedString, labels: &Cow<'static, [Label]>) -> u64 {
- let mut hasher = KeyHasher::default();
- key_hasher_impl(&mut hasher, name, labels);
- hasher.finish()
+const fn const_hasher(name: &'static str, labels: &'static [Label]) -> u64 {
+ let mut hasher = KeyHasher::new();
+ hasher = const_hash_str(hasher, name);
+ let mut i = 0;
+ let n = labels.len();
+ while i < n {
+ hasher = const_hash_str(hasher, labels[i].0.const_as_ref());
+ hasher = const_hash_str(hasher, labels[i].1.const_as_ref());
+ i += 1;
+ }
+
+ hasher.const_finish()
}
-fn key_hasher_impl<H: Hasher>(state: &mut H, name: &SharedString, labels: &Cow<'static, [Label]>) {
- name.hash(state);
- labels.hash(state);
+const fn const_hash_str(mut hasher: KeyHasher, s: &str) -> KeyHasher {
+ let bytes = s.as_bytes();
+ hasher = hasher.const_write(&bytes.len().to_ne_bytes());
+ hasher.const_write(bytes)
}
-impl Clone for Key {
- fn clone(&self) -> Self {
- Self {
- name: self.name.clone(),
- labels: self.labels.clone(),
- hashed: AtomicBool::new(self.hashed.load(Ordering::Acquire)),
- hash: AtomicU64::new(self.hash.load(Ordering::Acquire)),
- }
+fn non_const_hasher<H: Hasher>(state: &mut H, name: &SharedString, labels: &Cow<'static, [Label]>) {
+ let name = name.as_ref().as_bytes();
+ name.hash(state);
+ for label in labels.as_ref() {
+ label.hash(state);
}
}
@@ -183,7 +168,7 @@ impl Ord for Key {
impl Hash for Key {
fn hash<H: Hasher>(&self, state: &mut H) {
- key_hasher_impl(state, &self.name, &self.labels);
+ non_const_hasher(state, &self.name, &self.labels);
}
}
@@ -232,8 +217,11 @@ where
#[cfg(test)]
mod tests {
use super::Key;
- use crate::Label;
- use std::collections::HashMap;
+ use crate::{KeyHasher, Label};
+ use std::{
+ collections::HashMap,
+ hash::{Hash, Hasher},
+ };
static BORROWED_NAME: &'static str = "name";
static FOOBAR_NAME: &'static str = "foobar";
@@ -329,4 +317,38 @@ mod tests {
"Key(foobar, [black = black, lives = lives, matter = matter])"
);
}
+
+ #[test]
+ fn test_static_vs_dynamic_hashing() {
+ let get_non_const_hash = |key: &Key| {
+ let mut hasher = KeyHasher::new();
+ key.hash(&mut hasher);
+ hasher.finish()
+ };
+
+ let from_name = Key::from_name(FOOBAR_NAME);
+ let from_static_name = Key::from_static_name(FOOBAR_NAME);
+ assert_eq!(from_name.get_hash(), from_static_name.get_hash());
+ assert_eq!(get_non_const_hash(&from_name), from_name.get_hash());
+ assert_eq!(
+ get_non_const_hash(&from_static_name),
+ from_static_name.get_hash()
+ );
+ assert_eq!(from_name.get_hash(), from_static_name.get_hash());
+
+ let from_parts = Key::from_parts(FOOBAR_NAME, LABELS.iter());
+ let from_static_labels = Key::from_static_labels(FOOBAR_NAME, &LABELS);
+ let from_static_parts = Key::from_static_parts(FOOBAR_NAME, &LABELS);
+ assert_eq!(get_non_const_hash(&from_parts), from_parts.get_hash());
+ assert_eq!(
+ get_non_const_hash(&from_static_labels),
+ from_static_labels.get_hash()
+ );
+ assert_eq!(
+ get_non_const_hash(&from_static_parts),
+ from_static_parts.get_hash()
+ );
+ assert_eq!(from_parts.get_hash(), from_static_labels.get_hash());
+ assert_eq!(from_static_labels.get_hash(), from_static_parts.get_hash());
+ }
}
diff --git a/metrics/src/label.rs b/metrics/src/label.rs
index bcffd68..444454b 100644
--- a/metrics/src/label.rs
+++ b/metrics/src/label.rs
@@ -1,4 +1,4 @@
-use std::slice::Iter;
+use std::{hash, slice::Iter};
use crate::SharedString;
use alloc::vec::Vec;
@@ -13,7 +13,7 @@ use alloc::vec::Vec;
/// the request currently being processed, or the request path being processed. Another example may
/// be that if you were running a piece o code that was turned on or off by a feature toggle, you may
/// wish to include a label in metrics to indicate whether or not they were using the feature toggle.
-#[derive(PartialEq, Eq, Hash, Clone, Debug, PartialOrd, Ord)]
+#[derive(PartialEq, Eq, Clone, Debug, PartialOrd, Ord)]
pub struct Label(pub(crate) SharedString, pub(crate) SharedString);
impl Label {
@@ -47,6 +47,13 @@ impl Label {
}
}
+impl hash::Hash for Label {
+ fn hash<H: hash::Hasher>(&self, state: &mut H) {
+ self.0.as_ref().as_bytes().hash(state);
+ self.1.as_ref().as_bytes().hash(state);
+ }
+}
+
impl<K, V> From<&(K, V)> for Label
where
K: Into<SharedString> + Clone,
diff --git a/metrics/src/lib.rs b/metrics/src/lib.rs
index 903b758..1ed3b74 100644
--- a/metrics/src/lib.rs
+++ b/metrics/src/lib.rs
@@ -262,6 +262,9 @@ mod cow;
mod handles;
pub use self::handles::*;
+mod hashing;
+pub use self::hashing::KeyHasher;
+
mod key;
pub use self::key::*;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment