Navigation Menu

Skip to content

Instantly share code, notes, and snippets.

@thomcc
Created September 14, 2019 21:51
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save thomcc/570b1af7f80a8b660918007de1d22739 to your computer and use it in GitHub Desktop.
Save thomcc/570b1af7f80a8b660918007de1d22739 to your computer and use it in GitHub Desktop.
#[derive(Default, Debug, Clone)]
pub struct RadixSorter {
// TODO: only need one buffer.
unsorted_buf: Vec<(usize, u32)>,
pub sorted_buf: Vec<(usize, u32)>,
}
impl RadixSorter {
#[inline]
pub fn new() -> Self {
Default::default()
}
#[inline]
pub fn with_capacity(v: usize) -> Self {
Self {
unsorted_buf: Vec::with_capacity(v),
sorted_buf: Vec::with_capacity(v),
}
}
#[inline]
pub fn clear(&mut self) {
self.unsorted_buf.clear();
self.sorted_buf.clear();
}
#[inline]
pub fn into_results(self) -> Vec<(usize, u32)> {
self.sorted_buf
}
#[inline]
#[must_use]
pub fn sort<T: ToRadixKey>(&mut self, v: &[T]) -> &[(usize, u32)] {
self.sort_by(v, ToRadixKey::to_radix_key)
}
#[inline]
#[must_use]
pub fn sort_by<T, F>(&mut self, v: &[T], mut cb: F) -> &[(usize, u32)]
where
F: FnMut(&T) -> u32,
{
self.unsorted_buf.clear();
self.unsorted_buf.extend(v.iter().enumerate().map(|(i, t)| (i, cb(t))));
// self.sorted_buf.clear();
self.sorted_buf.resize(v.len(), (0, 0));
RadixSorter::do_sort(&mut self.unsorted_buf, &mut self.sorted_buf);
&self.sorted_buf
}
const HIST: usize = 2048;
#[inline(always)]
fn k0(k: u32) -> usize {
(k & 0x7ff) as usize
}
#[inline(always)]
fn k1(k: u32) -> usize {
((k >> 11) & 0x7ff) as usize
}
#[inline(always)]
fn k2(k: u32) -> usize {
(k >> 22) as usize
}
fn build_histograms(
input: &[(usize, u32)],
h0: &mut [u32; Self::HIST],
h1: &mut [u32; Self::HIST],
h2: &mut [u32; Self::HIST],
) {
for (_, k) in input {
let k = *k;
let k0 = Self::k0(k);
h0[k0] = h0[k0].wrapping_add(1);
let k1 = Self::k1(k);
h1[k1] = h1[k1].wrapping_add(1);
let k2 = Self::k2(k);
h2[k2] = h2[k2].wrapping_add(1);
}
}
fn sum_histograms(h0: &mut [u32; Self::HIST], h1: &mut [u32; Self::HIST], h2: &mut [u32; Self::HIST]) {
let mut sum0: u32 = 0;
let mut sum1: u32 = 0;
let mut sum2: u32 = 0;
for i in 0..Self::HIST {
let tally = h0[i].wrapping_add(sum0);
h0[i] = sum0.wrapping_sub(1);
sum0 = tally;
let tally = h1[i].wrapping_add(sum1);
h1[i] = sum1.wrapping_sub(1);
sum1 = tally;
let tally = h2[i].wrapping_add(sum2);
h2[i] = sum2.wrapping_sub(1);
sum2 = tally;
}
}
fn histosort(
unsorted: &mut [(usize, u32)],
sorted: &mut [(usize, u32)],
h0: &mut [u32; Self::HIST],
h1: &mut [u32; Self::HIST],
h2: &mut [u32; Self::HIST],
) {
// Note: unsorted into sorted
for &(idx, key) in unsorted.iter() {
let pos = &mut h0[Self::k0(key)];
*pos = pos.wrapping_add(1);
sorted[*pos as usize] = (idx, key);
}
// Note: sorted into unsorted
for &(idx, key) in sorted.iter() {
let pos = &mut h1[Self::k1(key)];
*pos = pos.wrapping_add(1);
unsorted[*pos as usize] = (idx, key);
}
// Note: unsorted into sorted -- ending in sorted as we want.
for &(idx, key) in unsorted.iter() {
let pos = &mut h2[Self::k2(key)];
*pos = pos.wrapping_add(1);
sorted[*pos as usize] = (idx, key);
}
}
fn do_sort(unsorted: &mut [(usize, u32)], sorted: &mut [(usize, u32)]) {
// we control the input to this fn, so debug_assert is fine.
debug_assert!(unsorted.len() == sorted.len());
let mut h0 = [0u32; Self::HIST];
let mut h1 = [0u32; Self::HIST];
let mut h2 = [0u32; Self::HIST];
Self::build_histograms(unsorted, &mut h0, &mut h1, &mut h2);
Self::sum_histograms(&mut h0, &mut h1, &mut h2);
Self::histosort(unsorted, sorted, &mut h0, &mut h1, &mut h2);
}
pub fn sort_unkey<T: ToRadixKey + Clone>(&mut self, v: &[T]) -> Vec<T> {
self.sort_by(v, ToRadixKey::to_radix_key).iter().map(|(i, _)| v[*i].clone()).collect()
}
pub fn sort_from_key<T: ToRadixKey + FromRadixKey>(&mut self, v: &[T]) -> Vec<T> {
self.sort_by(v, ToRadixKey::to_radix_key).iter().map(|(_, v)| T::from_radix_key(*v)).collect()
}
pub fn sort_in_place<T: ToRadixKey + FromRadixKey>(&mut self, v: &mut [T]) {
self.sort_by(v, ToRadixKey::to_radix_key).iter().enumerate().for_each(|(j, (_i, key))| {
v[j] = T::from_radix_key(*key);
});
}
}
// TODO these should return iterators instead!
pub fn radix_sort_by<T, F>(v: &[T], cb: F) -> Vec<(usize, &T)>
where
F: FnMut(&T) -> u32,
{
let mut sorter = RadixSorter::with_capacity(v.len());
sorter.sort_by(v, cb).iter().map(|&(i, _)| (i, &v[i])).collect()
}
pub fn radix_sort<T: ToRadixKey>(v: &[T]) -> Vec<(usize, &T)> {
let mut sorter = RadixSorter::with_capacity(v.len());
sorter.sort_by(v, ToRadixKey::to_radix_key).iter().map(|&(i, _)| (i, &v[i])).collect()
}
pub fn radix_sort_unkey<T: ToRadixKey + Clone>(v: &[T]) -> Vec<T> {
let mut sorter = RadixSorter::with_capacity(v.len());
sorter.sort_by(v, ToRadixKey::to_radix_key).iter().map(|(i, _)| v[*i].clone()).collect()
}
pub fn radix_sort_from_key<T: ToRadixKey + FromRadixKey>(v: &[T]) -> Vec<T> {
let mut sorter = RadixSorter::with_capacity(v.len());
sorter.sort_by(v, ToRadixKey::to_radix_key).iter().map(|(_, v)| T::from_radix_key(*v)).collect()
}
pub fn radix_sort_from_key_in_place<T: ToRadixKey + FromRadixKey>(v: &mut [T]) {
let mut sorter = RadixSorter::with_capacity(v.len());
sorter.sort_by(v, ToRadixKey::to_radix_key).iter().enumerate().for_each(|(j, (_i, key))| {
v[j] = T::from_radix_key(*key);
});
}
// pub fn radix_sort_in_place<T: ToRadixKey+Clone>(v: &mut [T]) {
// let mut sorter = RadixSorter::with_capacity(v.len());
// sorter.sort_in_place(v)
// }
pub trait ToRadixKey {
fn to_radix_key(&self) -> u32;
}
pub trait FromRadixKey: Copy {
fn from_radix_key(k: u32) -> Self;
}
impl ToRadixKey for f32 {
#[inline]
fn to_radix_key(&self) -> u32 {
let f = self.to_bits();
let m0 = (f >> 31) as i32;
let mn = -m0;
let mask = (mn as u32) | 0x8000_0000u32;
f ^ mask
}
}
impl FromRadixKey for f32 {
#[inline]
fn from_radix_key(f: u32) -> Self {
let mask = (f >> 31).wrapping_sub(1) | 0x80000000;
f32::from_bits(f ^ mask)
}
}
impl ToRadixKey for u32 {
#[inline]
fn to_radix_key(&self) -> u32 {
*self
}
}
impl FromRadixKey for u32 {
#[inline]
fn from_radix_key(u: u32) -> Self { u }
}
impl ToRadixKey for u16 {
#[inline]
fn to_radix_key(&self) -> u32 {
*self as u32
}
}
impl FromRadixKey for u16 {
#[inline]
fn from_radix_key(u: u32) -> Self { u as u16 }
}
impl ToRadixKey for u8 {
#[inline]
fn to_radix_key(&self) -> u32 {
*self as u32
}
}
impl FromRadixKey for u8 {
#[inline]
fn from_radix_key(u: u32) -> Self { u as u8 }
}
impl ToRadixKey for i32 {
#[inline]
fn to_radix_key(&self) -> u32 {
(*self ^ i32::min_value()) as u32
}
}
impl FromRadixKey for i32 {
#[inline]
fn from_radix_key(u: u32) -> Self { ((u as i32) ^ i32::min_value()) }
}
impl ToRadixKey for i16 {
#[inline]
fn to_radix_key(&self) -> u32 {
(*self as i32).to_radix_key()
}
}
impl FromRadixKey for i16 {
#[inline]
fn from_radix_key(u: u32) -> Self { i32::from_radix_key(u) as i16 }
}
impl ToRadixKey for i8 {
#[inline]
fn to_radix_key(&self) -> u32 {
(*self as i32).to_radix_key()
}
}
impl FromRadixKey for i8 {
#[inline]
fn from_radix_key(u: u32) -> Self { i32::from_radix_key(u) as i8 }
}
// }
#[cfg(test)]
mod tests {
use super::*;
use rand::distributions::Standard;
use rand::prelude::*;
fn is_ascending<T: PartialOrd>(s: &[T]) -> bool {
if s.len() < 2 {
return true;
}
for i in 1..s.len() {
if s[i - 1] > s[i] {
return false;
}
}
true
}
#[test]
fn test_sort_lots() {
let mut r = thread_rng();
for &size in &[0, 1, 2, 3, 4, 5, 10, 100] {
for _ in 0..50 {
let buf: Vec<u32> = (0..size).map(|_| r.gen()).collect();
let mut seen = vec![false; buf.len()];
let mut sorted = vec![];
let mut sorter = RadixSorter::new();
for &(i, key) in sorter.sort(&buf) {
assert_eq!(key, buf[i]);
assert!(!seen[i]);
seen[i] = true;
sorted.push(buf[i]);
}
assert!(is_ascending(&sorted), "{:?}", sorted);
assert!(seen.iter().all(|&f| f));
}
}
}
fn test_sort_generic<T>()
where
T: ToRadixKey + PartialEq + PartialOrd + Clone + std::fmt::Debug,
Standard: Distribution<T>,
{
let mut r = thread_rng();
for _ in 0..10 {
let buf: Vec<T> = (0..500).map(|_| r.gen()).collect();
let mut sorted = vec![];
let mut seen = vec![false; buf.len()];
let mut sorter = RadixSorter::new();
for &(i, key) in sorter.sort(&buf) {
sorted.push(buf[i].clone());
assert!(!seen[i]);
seen[i] = true;
assert_eq!(<T as ToRadixKey>::to_radix_key(&buf[i]), key);
}
assert!(is_ascending(&sorted), "{:?}", sorted);
assert!(seen.iter().all(|&f| f));
}
}
fn test_sort_generic_dist<T, D>(d: D)
where
T: ToRadixKey + PartialEq + PartialOrd + Clone + std::fmt::Debug,
D: Distribution<T> + Copy,
{
let mut r = thread_rng();
for _ in 0..50 {
let buf: Vec<T> = (0..100).map(|_| r.sample(d)).collect();
let mut sorted = vec![];
let mut seen = vec![false; buf.len()];
let mut sorter = RadixSorter::new();
for &(i, key) in sorter.sort(&buf) {
sorted.push(buf[i].clone());
assert!(!seen[i]);
seen[i] = true;
assert_eq!(<T as ToRadixKey>::to_radix_key(&buf[i]), key);
}
assert!(is_ascending(&sorted), "{:?}", sorted);
assert!(seen.iter().all(|&f| f));
}
}
#[test]
fn test_sort_f32() {
test_sort_generic::<f32>();
test_sort_generic_dist::<f32, _>(rand::distributions::Uniform::new(-1.0, 1.0));
test_sort_generic_dist::<f32, _>(rand::distributions::Uniform::new(-100.0, -50.0));
test_sort_generic_dist::<f32, _>(rand::distributions::Uniform::new(50.0, 100.0));
test_sort_generic_dist::<f32, _>(rand::distributions::Uniform::new(1.0e5, 1.0e7));
test_sort_generic_dist::<f32, _>(rand::distributions::Uniform::new(-1.0e7, -1.0e5));
}
#[test]
fn test_sort_u32() {
test_sort_generic::<u32>();
}
#[test]
fn test_sort_i32() {
test_sort_generic::<i32>();
}
#[test]
fn test_sort_u16() {
test_sort_generic::<u16>();
}
#[test]
fn test_sort_i16() {
test_sort_generic::<i16>();
}
#[test]
fn test_sort_u8() {
test_sort_generic::<u8>();
}
#[test]
fn test_sort_i8() {
test_sort_generic::<i8>();
}
#[test]
fn test_sort_fulldupe() {
let buf = vec![40u32; 16000];
let mut seen = vec![false; buf.len()];
let mut sorter = RadixSorter::new();
for &(i, key) in sorter.sort(&buf) {
assert!(!seen[i]);
seen[i] = true;
assert!(key == 40u32);
}
assert!(seen.iter().all(|&f| f));
}
#[test]
fn test_sort_manydupe() {
let buf: Vec<u32> = (0u32..).take(9000).map(|i| (i % 3) + 30).collect();
let mut seen = vec![false; buf.len()];
let mut counts = [0usize; 3];
let mut sorted = vec![];
let mut sorter = RadixSorter::new();
for &(i, key) in sorter.sort(&buf) {
assert!(!seen[i]);
seen[i] = true;
assert!(key == buf[i]);
sorted.push(buf[i]);
counts[(buf[i] - 30) as usize] += 1;
}
assert!(is_ascending(&sorted), "{:?}", sorted);
assert!(seen.iter().all(|&f| f));
for i in buf {
counts[(i - 30) as usize] = counts[(i - 30) as usize].checked_sub(1).unwrap();
}
assert_eq!(counts, [0, 0, 0]);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment