Skip to content

Instantly share code, notes, and snippets.

@Twister915
Last active April 21, 2022 07:30
Show Gist options
  • Star 4 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Twister915/365887ffff7519f3d8c428bfa046e2d7 to your computer and use it in GitHub Desktop.
Save Twister915/365887ffff7519f3d8c428bfa046e2d7 to your computer and use it in GitHub Desktop.
topk
use std::marker::PhantomData;
pub enum TopK<I, E, K, F, const N: usize> {
// this TopK has just been created and no data has been computed
// this is the initial state
Prepared {
itr: I,
f: F,
_k: PhantomData<K>,
},
// results have been demanded by a call to .next(), so we have computed the top K items
Ready {
items: std::array::IntoIter<Option<E>, N>,
size: usize,
}
}
impl<I, E, K, F, const N: usize> TopK<I, E, K, F, N>
where
I: Iterator<Item=E>,
K: PartialOrd<K>,
F: Fn(&E) -> K,
[Option<K>; N]: Default,
[Option<E>; N]: Default,
{
pub fn new(itr: I, f: F) -> Self {
Self::Prepared {
itr,
f,
_k: PhantomData,
}
}
fn items_iter(&mut self) -> &mut std::array::IntoIter<Option<E>, N> {
loop {
use TopK::*;
match self {
Ready { items, .. } => {
return items;
},
Prepared { itr, f, .. } => {
*self = Self::compute_ready_state(itr, f);
}
}
}
}
// using an iterator and a scoring function, this produces a Self::Ready
fn compute_ready_state(itr: &mut I, f: &mut F) -> Self {
// size (updated below). Should be N or less, depending on if source iterator emits <N items
let mut size = 0;
// these two arrays are coordinated such that if scores[x].is_some() then items[x].is_some()
// scores[x] is f(&items[x])
let mut scores: [Option<K>; N] = Default::default();
let mut items: [Option<E>; N] = Default::default();
// exhaust the iterator (look at every item)
while let Some(item) = itr.next() {
// compute score
let score = f(&item);
// find if the score is larger than anything in our array currently
for i in 0..N {
// we should insert if we are larger OR if the slot is available
if if let Some(other) = &scores[i] {
other < &score
} else {
true
} {
// insert score and item
array_insert(&mut scores, Some(score), i);
array_insert(&mut items, Some(item), i);
// ensure size is correct
if size < N {
size += 1;
}
// this break combined with the structure of this loop ensures that
// the arrays are always sorted from greatest -> least score value
break;
}
}
}
// convert items into an iterator, which is what the Self::Ready wants
let items = items.into_iter();
Self::Ready { items, size }
}
}
impl<I, E, K, F, const N: usize> Iterator for TopK<I, E, K, F, N>
where
I: Iterator<Item=E>,
K: PartialOrd<K>,
F: Fn(&E) -> K,
[Option<K>; N]: Default,
[Option<E>; N]: Default,
{
type Item = E;
fn next(&mut self) -> Option<Self::Item> {
self.items_iter().next().flatten()
}
fn size_hint(&self) -> (usize, Option<usize>) {
match self {
TopK::Ready { size, .. } => (0, Some(*size)),
TopK::Prepared { .. } => (0, Some(N)),
}
}
}
pub trait TopKExt: Iterator + Sized {
fn top_k<K, F, const N: usize>(self, score_f: F) -> TopK<Self, Self::Item, K, F, N>
where
F: Fn(&Self::Item) -> K,
K: PartialOrd<K>,
[Option<K>; N]: Default,
[Option<Self::Item>; N]: Default,
{
TopK::new(self, score_f)
}
}
impl<I> TopKExt for I where I: Iterator + Sized {}
#[inline]
fn array_insert<E, const N: usize>(elems: &mut[E; N], mut tmp: E, idx: usize) {
for i in idx..N {
std::mem::swap(&mut tmp, &mut elems[i]);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment