jaseemabid/topk.rs

## topk.rs
/*! Top K elements of an unordered list

There are 3 ways to get the top k elements from an unsorted list.

1. Build a heap with all the elements and pop k times
2. Sort the whole list and return a slice from the end
3. Quick select k elements

Naively, I would expect 1 to be slowest, 2 to be faster and 3 the fastest.

Results:

```ignore
$ cargo bench topk

test topk::test::bench_heap     ... bench:   1,434,903 ns/iter (+/- 379,674)
test topk::test::bench_qselect  ... bench:      94,129 ns/iter (+/- 27,035)
test topk::test::bench_sort     ... bench:      60,324 ns/iter (+/- 12,335)
```

🤯🤯🤯🤯🤯

A slow heap is understandable since its doing a lot of additional work under the
hood. My implementation of quick select seems to be doing very close to the
minimal work required, but the stdlib sort is SO MUCH FASTER and well optimized,
that sorting beats both by a very good margin.

Great TIL.

*/

use std::collections::BinaryHeap;

// Top k elements with a heap
pub fn top_k_heap(nums: &[i32], k: i32) -> Vec<i32> {
    let mut heap = BinaryHeap::<i32>::new();

    for n in nums {
        heap.push(*n);
    }

    // NOTE: Heap iter() order is random, so that's not what we want here.
    (0..k).map(|_| heap.pop().unwrap()).collect()
}

// Top k elements with quick select
pub fn quickselect(nums: &mut [i32], k: i32) -> &[i32] {
    qselect(nums, 0, nums.len() - 1, k as usize)
}

// Sorts a (portion of an) array, divides it into partitions, then sorts those
//
// <https://en.wikipedia.org/wiki/Quicksort#Hoare_partition_scheme>
pub fn quicksort(nums: &mut [i32]) {
    qsort(nums, 0, nums.len() - 1)
}

fn qselect(nums: &mut [i32], lo: usize, hi: usize, k: usize) -> &[i32] {
    let boundary = nums.len() - k; // nums[boundary..] is the final result

    if lo >= hi {
        return &nums[nums.len() - k..];
    }

    let p = partition(nums, lo, hi);

    // Instead of partitioning inside the boundary, sort the last known subsection
    // with top k elements (lo..hi) inclusive and return sub range.
    if p >= boundary {
        qsort(nums, lo, nums.len() - 1);
        &nums[boundary..]
    } else if boundary < p {
        qselect(nums, lo, p - 1, k) // Go left
    } else {
        qselect(nums, p + 1, hi, k) // Go right
    }
}

fn qsort(nums: &mut [i32], lo: usize, hi: usize) {
    if lo < hi {
        let p = partition(nums, lo, hi);
        qsort(nums, lo, p); // Note: the pivot is now included
        qsort(nums, p + 1, hi);
    }
}

// Divides array into two partitions
fn partition(nums: &mut [i32], lo: usize, hi: usize) -> usize {
    let pivot = nums[(hi + lo).div_euclid(2)]; // The value in the middle of the array

    // The original partition scheme described by Tony Hoare uses two pointers
    // (indices into the range) that start at both ends of the array being
    // partitioned, then move toward each other, until they detect an inversion: a
    // pair of elements, one greater than the bound (Hoare's terms for the pivot
    // value) at the first pointer, and one less than the bound at the second
    // pointer.
    let mut i: i32 = lo as i32 - 1; // Left index. NOTE: this could be negative since i32 instead of usize
    let mut j: i32 = hi as i32 + 1; // Right index.

    loop {
        // Move the left index to the right at least once and while the element at
        // the left index is less than the pivot
        i += 1;
        while nums[i as usize] < pivot {
            i += 1;
        }

        // Move the right index to the left at least once and while the element at
        // the right index is greater than the pivot
        j -= 1;
        while nums[j as usize] > pivot {
            j -= 1;
        }

        // If the indices crossed, return
        if i >= j {
            return j as usize;
        }

        // Swap the elements at the left and right indices
        nums.swap(i as usize, j as usize)
    }
}

#[cfg(test)]
mod test {
    extern crate test;
    use {super::*, rand::prelude::random, std::i32, test::Bencher};

    fn rand(n: usize) -> Vec<i32> {
        (0..n).map(|_| random()).collect()
    }

    #[test]
    fn sort_simple() {
        let mut a = vec![0, 1, 2, 4, 1, 4, 9, 7, 0];
        let b = vec![0, 0, 1, 1, 2, 4, 4, 7, 9];

        quicksort(&mut a);
        assert_eq!(b, a);
    }

    #[test]
    fn sort_random() {
        let mut a = rand(100);
        let mut b = a.clone();

        b.sort_unstable();

        quicksort(&mut a);
        assert_eq!(b, a);
    }

    #[test]
    fn select_simple() {
        let mut a = vec![9, 0, 1, 2, 4, 1, 4, 9, 7, 0, 1, 2, 3];
        let len = a.len();

        assert_eq!(vec![7, 9, 9], qselect(&mut a, 0, len - 1, 3));
    }

    #[test]
    fn select_random() {
        let mut a = rand(100);
        let mut b = a.clone();
        let len = a.len();

        b.sort_unstable();

        assert_eq!(&b[95..100], qselect(&mut a, 0, len - 1, 5));
    }

    #[bench]
    fn bench_sort(b: &mut Bencher) {
        let mut a = rand(100_000);

        b.iter(|| {
            let l = a.len();
            assert_eq!(l, 100_000);
            a.sort_unstable();

            // Sum up the results to make sure its used. i32 would overflow and panic, so
            // cast to i64. Would be nice if I could do this without an extra
            // map.
            a[l - 5..].iter().map(|i| *i as i64).sum::<i64>()
        });

        test::black_box(());
    }

    #[bench]
    fn bench_qselect(b: &mut Bencher) {
        let mut a = rand(100_000);

        b.iter(|| {
            let r = quickselect(&mut a, 5);
            r.iter().map(|i| *i as i64).sum::<i64>()
        });

        test::black_box(());
    }

    #[bench]
    fn bench_heap(b: &mut Bencher) {
        let a = rand(100_000);

        b.iter(|| {
            let r = top_k_heap(&a, 5);
            r.iter().map(|i| *i as i64).sum::<i64>()
        });

        test::black_box(());
    }
}
	/*! Top K elements of an unordered list

	There are 3 ways to get the top k elements from an unsorted list.

	1. Build a heap with all the elements and pop k times
	2. Sort the whole list and return a slice from the end
	3. Quick select k elements

	Naively, I would expect 1 to be slowest, 2 to be faster and 3 the fastest.

	Results:

	```ignore
	$ cargo bench topk

	test topk::test::bench_heap ... bench: 1,434,903 ns/iter (+/- 379,674)
	test topk::test::bench_qselect ... bench: 94,129 ns/iter (+/- 27,035)
	test topk::test::bench_sort ... bench: 60,324 ns/iter (+/- 12,335)
	```

	🤯🤯🤯🤯🤯

	A slow heap is understandable since its doing a lot of additional work under the
	hood. My implementation of quick select seems to be doing very close to the
	minimal work required, but the stdlib sort is SO MUCH FASTER and well optimized,
	that sorting beats both by a very good margin.

	Great TIL.

	*/

	use std::collections::BinaryHeap;

	// Top k elements with a heap
	pub fn top_k_heap(nums: &[i32], k: i32) -> Vec<i32> {
	let mut heap = BinaryHeap::<i32>::new();

	for n in nums {
	heap.push(*n);
	}

	// NOTE: Heap iter() order is random, so that's not what we want here.
	(0..k).map(\|_\| heap.pop().unwrap()).collect()
	}

	// Top k elements with quick select
	pub fn quickselect(nums: &mut [i32], k: i32) -> &[i32] {
	qselect(nums, 0, nums.len() - 1, k as usize)
	}

	// Sorts a (portion of an) array, divides it into partitions, then sorts those
	//
	// <https://en.wikipedia.org/wiki/Quicksort#Hoare_partition_scheme>
	pub fn quicksort(nums: &mut [i32]) {
	qsort(nums, 0, nums.len() - 1)
	}

	fn qselect(nums: &mut [i32], lo: usize, hi: usize, k: usize) -> &[i32] {
	let boundary = nums.len() - k; // nums[boundary..] is the final result

	if lo >= hi {
	return &nums[nums.len() - k..];
	}

	let p = partition(nums, lo, hi);

	// Instead of partitioning inside the boundary, sort the last known subsection
	// with top k elements (lo..hi) inclusive and return sub range.
	if p >= boundary {
	qsort(nums, lo, nums.len() - 1);
	&nums[boundary..]
	} else if boundary < p {
	qselect(nums, lo, p - 1, k) // Go left
	} else {
	qselect(nums, p + 1, hi, k) // Go right
	}
	}

	fn qsort(nums: &mut [i32], lo: usize, hi: usize) {
	if lo < hi {
	let p = partition(nums, lo, hi);
	qsort(nums, lo, p); // Note: the pivot is now included
	qsort(nums, p + 1, hi);
	}
	}

	// Divides array into two partitions
	fn partition(nums: &mut [i32], lo: usize, hi: usize) -> usize {
	let pivot = nums[(hi + lo).div_euclid(2)]; // The value in the middle of the array

	// The original partition scheme described by Tony Hoare uses two pointers
	// (indices into the range) that start at both ends of the array being
	// partitioned, then move toward each other, until they detect an inversion: a
	// pair of elements, one greater than the bound (Hoare's terms for the pivot
	// value) at the first pointer, and one less than the bound at the second
	// pointer.
	let mut i: i32 = lo as i32 - 1; // Left index. NOTE: this could be negative since i32 instead of usize
	let mut j: i32 = hi as i32 + 1; // Right index.

	loop {
	// Move the left index to the right at least once and while the element at
	// the left index is less than the pivot
	i += 1;
	while nums[i as usize] < pivot {
	i += 1;
	}

	// Move the right index to the left at least once and while the element at
	// the right index is greater than the pivot
	j -= 1;
	while nums[j as usize] > pivot {
	j -= 1;
	}

	// If the indices crossed, return
	if i >= j {
	return j as usize;
	}

	// Swap the elements at the left and right indices
	nums.swap(i as usize, j as usize)
	}
	}

	#[cfg(test)]
	mod test {
	extern crate test;
	use {super::*, rand::prelude::random, std::i32, test::Bencher};

	fn rand(n: usize) -> Vec<i32> {
	(0..n).map(\|_\| random()).collect()
	}

	#[test]
	fn sort_simple() {
	let mut a = vec![0, 1, 2, 4, 1, 4, 9, 7, 0];
	let b = vec![0, 0, 1, 1, 2, 4, 4, 7, 9];

	quicksort(&mut a);
	assert_eq!(b, a);
	}

	#[test]
	fn sort_random() {
	let mut a = rand(100);
	let mut b = a.clone();

	b.sort_unstable();

	quicksort(&mut a);
	assert_eq!(b, a);
	}

	#[test]
	fn select_simple() {
	let mut a = vec![9, 0, 1, 2, 4, 1, 4, 9, 7, 0, 1, 2, 3];
	let len = a.len();

	assert_eq!(vec![7, 9, 9], qselect(&mut a, 0, len - 1, 3));
	}

	#[test]
	fn select_random() {
	let mut a = rand(100);
	let mut b = a.clone();
	let len = a.len();

	b.sort_unstable();

	assert_eq!(&b[95..100], qselect(&mut a, 0, len - 1, 5));
	}

	#[bench]
	fn bench_sort(b: &mut Bencher) {
	let mut a = rand(100_000);

	b.iter(\|\| {
	let l = a.len();
	assert_eq!(l, 100_000);
	a.sort_unstable();

	// Sum up the results to make sure its used. i32 would overflow and panic, so
	// cast to i64. Would be nice if I could do this without an extra
	// map.
	a[l - 5..].iter().map(\|i\| *i as i64).sum::<i64>()
	});

	test::black_box(());
	}

	#[bench]
	fn bench_qselect(b: &mut Bencher) {
	let mut a = rand(100_000);

	b.iter(\|\| {
	let r = quickselect(&mut a, 5);
	r.iter().map(\|i\| *i as i64).sum::<i64>()
	});

	test::black_box(());
	}

	#[bench]
	fn bench_heap(b: &mut Bencher) {
	let a = rand(100_000);

	b.iter(\|\| {
	let r = top_k_heap(&a, 5);
	r.iter().map(\|i\| *i as i64).sum::<i64>()
	});

	test::black_box(());
	}
	}