Created
September 14, 2019 21:51
-
-
Save thomcc/570b1af7f80a8b660918007de1d22739 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#[derive(Default, Debug, Clone)] | |
pub struct RadixSorter { | |
// TODO: only need one buffer. | |
unsorted_buf: Vec<(usize, u32)>, | |
pub sorted_buf: Vec<(usize, u32)>, | |
} | |
impl RadixSorter { | |
#[inline] | |
pub fn new() -> Self { | |
Default::default() | |
} | |
#[inline] | |
pub fn with_capacity(v: usize) -> Self { | |
Self { | |
unsorted_buf: Vec::with_capacity(v), | |
sorted_buf: Vec::with_capacity(v), | |
} | |
} | |
#[inline] | |
pub fn clear(&mut self) { | |
self.unsorted_buf.clear(); | |
self.sorted_buf.clear(); | |
} | |
#[inline] | |
pub fn into_results(self) -> Vec<(usize, u32)> { | |
self.sorted_buf | |
} | |
#[inline] | |
#[must_use] | |
pub fn sort<T: ToRadixKey>(&mut self, v: &[T]) -> &[(usize, u32)] { | |
self.sort_by(v, ToRadixKey::to_radix_key) | |
} | |
#[inline] | |
#[must_use] | |
pub fn sort_by<T, F>(&mut self, v: &[T], mut cb: F) -> &[(usize, u32)] | |
where | |
F: FnMut(&T) -> u32, | |
{ | |
self.unsorted_buf.clear(); | |
self.unsorted_buf.extend(v.iter().enumerate().map(|(i, t)| (i, cb(t)))); | |
// self.sorted_buf.clear(); | |
self.sorted_buf.resize(v.len(), (0, 0)); | |
RadixSorter::do_sort(&mut self.unsorted_buf, &mut self.sorted_buf); | |
&self.sorted_buf | |
} | |
const HIST: usize = 2048; | |
#[inline(always)] | |
fn k0(k: u32) -> usize { | |
(k & 0x7ff) as usize | |
} | |
#[inline(always)] | |
fn k1(k: u32) -> usize { | |
((k >> 11) & 0x7ff) as usize | |
} | |
#[inline(always)] | |
fn k2(k: u32) -> usize { | |
(k >> 22) as usize | |
} | |
fn build_histograms( | |
input: &[(usize, u32)], | |
h0: &mut [u32; Self::HIST], | |
h1: &mut [u32; Self::HIST], | |
h2: &mut [u32; Self::HIST], | |
) { | |
for (_, k) in input { | |
let k = *k; | |
let k0 = Self::k0(k); | |
h0[k0] = h0[k0].wrapping_add(1); | |
let k1 = Self::k1(k); | |
h1[k1] = h1[k1].wrapping_add(1); | |
let k2 = Self::k2(k); | |
h2[k2] = h2[k2].wrapping_add(1); | |
} | |
} | |
fn sum_histograms(h0: &mut [u32; Self::HIST], h1: &mut [u32; Self::HIST], h2: &mut [u32; Self::HIST]) { | |
let mut sum0: u32 = 0; | |
let mut sum1: u32 = 0; | |
let mut sum2: u32 = 0; | |
for i in 0..Self::HIST { | |
let tally = h0[i].wrapping_add(sum0); | |
h0[i] = sum0.wrapping_sub(1); | |
sum0 = tally; | |
let tally = h1[i].wrapping_add(sum1); | |
h1[i] = sum1.wrapping_sub(1); | |
sum1 = tally; | |
let tally = h2[i].wrapping_add(sum2); | |
h2[i] = sum2.wrapping_sub(1); | |
sum2 = tally; | |
} | |
} | |
fn histosort( | |
unsorted: &mut [(usize, u32)], | |
sorted: &mut [(usize, u32)], | |
h0: &mut [u32; Self::HIST], | |
h1: &mut [u32; Self::HIST], | |
h2: &mut [u32; Self::HIST], | |
) { | |
// Note: unsorted into sorted | |
for &(idx, key) in unsorted.iter() { | |
let pos = &mut h0[Self::k0(key)]; | |
*pos = pos.wrapping_add(1); | |
sorted[*pos as usize] = (idx, key); | |
} | |
// Note: sorted into unsorted | |
for &(idx, key) in sorted.iter() { | |
let pos = &mut h1[Self::k1(key)]; | |
*pos = pos.wrapping_add(1); | |
unsorted[*pos as usize] = (idx, key); | |
} | |
// Note: unsorted into sorted -- ending in sorted as we want. | |
for &(idx, key) in unsorted.iter() { | |
let pos = &mut h2[Self::k2(key)]; | |
*pos = pos.wrapping_add(1); | |
sorted[*pos as usize] = (idx, key); | |
} | |
} | |
fn do_sort(unsorted: &mut [(usize, u32)], sorted: &mut [(usize, u32)]) { | |
// we control the input to this fn, so debug_assert is fine. | |
debug_assert!(unsorted.len() == sorted.len()); | |
let mut h0 = [0u32; Self::HIST]; | |
let mut h1 = [0u32; Self::HIST]; | |
let mut h2 = [0u32; Self::HIST]; | |
Self::build_histograms(unsorted, &mut h0, &mut h1, &mut h2); | |
Self::sum_histograms(&mut h0, &mut h1, &mut h2); | |
Self::histosort(unsorted, sorted, &mut h0, &mut h1, &mut h2); | |
} | |
pub fn sort_unkey<T: ToRadixKey + Clone>(&mut self, v: &[T]) -> Vec<T> { | |
self.sort_by(v, ToRadixKey::to_radix_key).iter().map(|(i, _)| v[*i].clone()).collect() | |
} | |
pub fn sort_from_key<T: ToRadixKey + FromRadixKey>(&mut self, v: &[T]) -> Vec<T> { | |
self.sort_by(v, ToRadixKey::to_radix_key).iter().map(|(_, v)| T::from_radix_key(*v)).collect() | |
} | |
pub fn sort_in_place<T: ToRadixKey + FromRadixKey>(&mut self, v: &mut [T]) { | |
self.sort_by(v, ToRadixKey::to_radix_key).iter().enumerate().for_each(|(j, (_i, key))| { | |
v[j] = T::from_radix_key(*key); | |
}); | |
} | |
} | |
// TODO these should return iterators instead! | |
pub fn radix_sort_by<T, F>(v: &[T], cb: F) -> Vec<(usize, &T)> | |
where | |
F: FnMut(&T) -> u32, | |
{ | |
let mut sorter = RadixSorter::with_capacity(v.len()); | |
sorter.sort_by(v, cb).iter().map(|&(i, _)| (i, &v[i])).collect() | |
} | |
pub fn radix_sort<T: ToRadixKey>(v: &[T]) -> Vec<(usize, &T)> { | |
let mut sorter = RadixSorter::with_capacity(v.len()); | |
sorter.sort_by(v, ToRadixKey::to_radix_key).iter().map(|&(i, _)| (i, &v[i])).collect() | |
} | |
pub fn radix_sort_unkey<T: ToRadixKey + Clone>(v: &[T]) -> Vec<T> { | |
let mut sorter = RadixSorter::with_capacity(v.len()); | |
sorter.sort_by(v, ToRadixKey::to_radix_key).iter().map(|(i, _)| v[*i].clone()).collect() | |
} | |
pub fn radix_sort_from_key<T: ToRadixKey + FromRadixKey>(v: &[T]) -> Vec<T> { | |
let mut sorter = RadixSorter::with_capacity(v.len()); | |
sorter.sort_by(v, ToRadixKey::to_radix_key).iter().map(|(_, v)| T::from_radix_key(*v)).collect() | |
} | |
pub fn radix_sort_from_key_in_place<T: ToRadixKey + FromRadixKey>(v: &mut [T]) { | |
let mut sorter = RadixSorter::with_capacity(v.len()); | |
sorter.sort_by(v, ToRadixKey::to_radix_key).iter().enumerate().for_each(|(j, (_i, key))| { | |
v[j] = T::from_radix_key(*key); | |
}); | |
} | |
// pub fn radix_sort_in_place<T: ToRadixKey+Clone>(v: &mut [T]) { | |
// let mut sorter = RadixSorter::with_capacity(v.len()); | |
// sorter.sort_in_place(v) | |
// } | |
pub trait ToRadixKey { | |
fn to_radix_key(&self) -> u32; | |
} | |
pub trait FromRadixKey: Copy { | |
fn from_radix_key(k: u32) -> Self; | |
} | |
impl ToRadixKey for f32 { | |
#[inline] | |
fn to_radix_key(&self) -> u32 { | |
let f = self.to_bits(); | |
let m0 = (f >> 31) as i32; | |
let mn = -m0; | |
let mask = (mn as u32) | 0x8000_0000u32; | |
f ^ mask | |
} | |
} | |
impl FromRadixKey for f32 { | |
#[inline] | |
fn from_radix_key(f: u32) -> Self { | |
let mask = (f >> 31).wrapping_sub(1) | 0x80000000; | |
f32::from_bits(f ^ mask) | |
} | |
} | |
impl ToRadixKey for u32 { | |
#[inline] | |
fn to_radix_key(&self) -> u32 { | |
*self | |
} | |
} | |
impl FromRadixKey for u32 { | |
#[inline] | |
fn from_radix_key(u: u32) -> Self { u } | |
} | |
impl ToRadixKey for u16 { | |
#[inline] | |
fn to_radix_key(&self) -> u32 { | |
*self as u32 | |
} | |
} | |
impl FromRadixKey for u16 { | |
#[inline] | |
fn from_radix_key(u: u32) -> Self { u as u16 } | |
} | |
impl ToRadixKey for u8 { | |
#[inline] | |
fn to_radix_key(&self) -> u32 { | |
*self as u32 | |
} | |
} | |
impl FromRadixKey for u8 { | |
#[inline] | |
fn from_radix_key(u: u32) -> Self { u as u8 } | |
} | |
impl ToRadixKey for i32 { | |
#[inline] | |
fn to_radix_key(&self) -> u32 { | |
(*self ^ i32::min_value()) as u32 | |
} | |
} | |
impl FromRadixKey for i32 { | |
#[inline] | |
fn from_radix_key(u: u32) -> Self { ((u as i32) ^ i32::min_value()) } | |
} | |
impl ToRadixKey for i16 { | |
#[inline] | |
fn to_radix_key(&self) -> u32 { | |
(*self as i32).to_radix_key() | |
} | |
} | |
impl FromRadixKey for i16 { | |
#[inline] | |
fn from_radix_key(u: u32) -> Self { i32::from_radix_key(u) as i16 } | |
} | |
impl ToRadixKey for i8 { | |
#[inline] | |
fn to_radix_key(&self) -> u32 { | |
(*self as i32).to_radix_key() | |
} | |
} | |
impl FromRadixKey for i8 { | |
#[inline] | |
fn from_radix_key(u: u32) -> Self { i32::from_radix_key(u) as i8 } | |
} | |
// } | |
#[cfg(test)] | |
mod tests { | |
use super::*; | |
use rand::distributions::Standard; | |
use rand::prelude::*; | |
fn is_ascending<T: PartialOrd>(s: &[T]) -> bool { | |
if s.len() < 2 { | |
return true; | |
} | |
for i in 1..s.len() { | |
if s[i - 1] > s[i] { | |
return false; | |
} | |
} | |
true | |
} | |
#[test] | |
fn test_sort_lots() { | |
let mut r = thread_rng(); | |
for &size in &[0, 1, 2, 3, 4, 5, 10, 100] { | |
for _ in 0..50 { | |
let buf: Vec<u32> = (0..size).map(|_| r.gen()).collect(); | |
let mut seen = vec![false; buf.len()]; | |
let mut sorted = vec![]; | |
let mut sorter = RadixSorter::new(); | |
for &(i, key) in sorter.sort(&buf) { | |
assert_eq!(key, buf[i]); | |
assert!(!seen[i]); | |
seen[i] = true; | |
sorted.push(buf[i]); | |
} | |
assert!(is_ascending(&sorted), "{:?}", sorted); | |
assert!(seen.iter().all(|&f| f)); | |
} | |
} | |
} | |
fn test_sort_generic<T>() | |
where | |
T: ToRadixKey + PartialEq + PartialOrd + Clone + std::fmt::Debug, | |
Standard: Distribution<T>, | |
{ | |
let mut r = thread_rng(); | |
for _ in 0..10 { | |
let buf: Vec<T> = (0..500).map(|_| r.gen()).collect(); | |
let mut sorted = vec![]; | |
let mut seen = vec![false; buf.len()]; | |
let mut sorter = RadixSorter::new(); | |
for &(i, key) in sorter.sort(&buf) { | |
sorted.push(buf[i].clone()); | |
assert!(!seen[i]); | |
seen[i] = true; | |
assert_eq!(<T as ToRadixKey>::to_radix_key(&buf[i]), key); | |
} | |
assert!(is_ascending(&sorted), "{:?}", sorted); | |
assert!(seen.iter().all(|&f| f)); | |
} | |
} | |
fn test_sort_generic_dist<T, D>(d: D) | |
where | |
T: ToRadixKey + PartialEq + PartialOrd + Clone + std::fmt::Debug, | |
D: Distribution<T> + Copy, | |
{ | |
let mut r = thread_rng(); | |
for _ in 0..50 { | |
let buf: Vec<T> = (0..100).map(|_| r.sample(d)).collect(); | |
let mut sorted = vec![]; | |
let mut seen = vec![false; buf.len()]; | |
let mut sorter = RadixSorter::new(); | |
for &(i, key) in sorter.sort(&buf) { | |
sorted.push(buf[i].clone()); | |
assert!(!seen[i]); | |
seen[i] = true; | |
assert_eq!(<T as ToRadixKey>::to_radix_key(&buf[i]), key); | |
} | |
assert!(is_ascending(&sorted), "{:?}", sorted); | |
assert!(seen.iter().all(|&f| f)); | |
} | |
} | |
#[test] | |
fn test_sort_f32() { | |
test_sort_generic::<f32>(); | |
test_sort_generic_dist::<f32, _>(rand::distributions::Uniform::new(-1.0, 1.0)); | |
test_sort_generic_dist::<f32, _>(rand::distributions::Uniform::new(-100.0, -50.0)); | |
test_sort_generic_dist::<f32, _>(rand::distributions::Uniform::new(50.0, 100.0)); | |
test_sort_generic_dist::<f32, _>(rand::distributions::Uniform::new(1.0e5, 1.0e7)); | |
test_sort_generic_dist::<f32, _>(rand::distributions::Uniform::new(-1.0e7, -1.0e5)); | |
} | |
#[test] | |
fn test_sort_u32() { | |
test_sort_generic::<u32>(); | |
} | |
#[test] | |
fn test_sort_i32() { | |
test_sort_generic::<i32>(); | |
} | |
#[test] | |
fn test_sort_u16() { | |
test_sort_generic::<u16>(); | |
} | |
#[test] | |
fn test_sort_i16() { | |
test_sort_generic::<i16>(); | |
} | |
#[test] | |
fn test_sort_u8() { | |
test_sort_generic::<u8>(); | |
} | |
#[test] | |
fn test_sort_i8() { | |
test_sort_generic::<i8>(); | |
} | |
#[test] | |
fn test_sort_fulldupe() { | |
let buf = vec![40u32; 16000]; | |
let mut seen = vec![false; buf.len()]; | |
let mut sorter = RadixSorter::new(); | |
for &(i, key) in sorter.sort(&buf) { | |
assert!(!seen[i]); | |
seen[i] = true; | |
assert!(key == 40u32); | |
} | |
assert!(seen.iter().all(|&f| f)); | |
} | |
#[test] | |
fn test_sort_manydupe() { | |
let buf: Vec<u32> = (0u32..).take(9000).map(|i| (i % 3) + 30).collect(); | |
let mut seen = vec![false; buf.len()]; | |
let mut counts = [0usize; 3]; | |
let mut sorted = vec![]; | |
let mut sorter = RadixSorter::new(); | |
for &(i, key) in sorter.sort(&buf) { | |
assert!(!seen[i]); | |
seen[i] = true; | |
assert!(key == buf[i]); | |
sorted.push(buf[i]); | |
counts[(buf[i] - 30) as usize] += 1; | |
} | |
assert!(is_ascending(&sorted), "{:?}", sorted); | |
assert!(seen.iter().all(|&f| f)); | |
for i in buf { | |
counts[(i - 30) as usize] = counts[(i - 30) as usize].checked_sub(1).unwrap(); | |
} | |
assert_eq!(counts, [0, 0, 0]); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment