Skip to content

Instantly share code, notes, and snippets.

@vurtun
Last active May 2, 2024 20:48
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save vurtun/7063afddcf1491af16037a207a167e49 to your computer and use it in GitHub Desktop.
Save vurtun/7063afddcf1491af16037a207a167e49 to your computer and use it in GitHub Desktop.
print K largest(or smallest) elements in an array
/* Problem: Directory file view ui. Files can be sorted by different properties (name, size, ...). Ui itself only needs elements
that are currently in the visible section of the list view from x to y So I want to use a fixed size buffer with up to k elements
and walk through all files in the directory and only have the final elements between index x and y in the end in the fixed size buffer.
Temp buffers are fine for me as long as they have a fixed at compile time size. For those familiar with SQL basically this is
a SORT BY LIMIT begin, count.
Idea: Use Floyd–Rivest algorithm with average O(n) to find the x and y values. Walk over list again and fill fixed size k buffer with elements greater
than x and smaller than y. Then just sort the k values in k*log(k). So we have O(n) + k*log(k)
Problem: Find x and y is not directly possible since not all elements are in memory because we have only fixed size buffer.
Idea: Create a buffer of size 2𝑘. Read in 2𝑘 elements. Use Floyd–Rivest algorithm to partition the buffer
so that the 𝑘 smallest elements are first; this takes 𝑂(𝑘) time. Now read in another 𝑘 items from your array into the
buffer, replacing the 𝑘 largest items in the buffer, partition the buffer as before, and repeat.
Problem: k should be fixed size. So when we have k=256 we are not able to address any index greater than 256.
Idea: Multiple iterations in multiple of fixed k. So when k=256 then have index 256 initally and iterate over all n. Then
new iteration we skip all elements smaller than the element at k=256 and use a new index. Repeat until we hit the index
we want.
Problem: Kind of expensive. We get O(2*k * (n*0.5)/k) + k*log(k)
Optimizations:
- initially use heap with heapify up/down to sort first k elements for n*log(k)
- Instead of k being size of visible section use bigger window and only recalculate when reaching the end
- Jumps don't happen to often (shortcuts + shift-scrollbar-click) so reusing the elements for the
starting/ending element makes sense
- File count smaller than k only neeed O(k*log(k))
- start/end indicies are limited to n/2 by walking backwards
*/
// https://cs.stackexchange.com/questions/68125/finding-kth-smallest-element-from-a-given-sequence-only-with-ok-memory-on-t
// https://en.wikipedia.org/wiki/Floyd%E2%80%93Rivest_algorithm
#include <math.h>
#define heap_parent(i) (((i)-1) >> 1)
#define heap_right(i) (((i)+1) << 1)
#define heap_left(i) (heap_right(i)-1)
#define sgn(val) ((0 < val) - (val < 0))
#define max(x,y) ((x<y)?y:x)
#define min(x,y) ((x<y)?x:y)
static inline void
swpi(void *a, void *b) {
int t = *(int*)a;
*(int*)a = *(int*)b;
*(int*)b = t;
}
static inline int
cmpid(const void *a, const void *b) {
return *(const int*)b - *(const int*)a;
}
static inline int
cmpiu(const void *a, const void *b) {
return *(const int*)a - *(const int*)b;
}
static inline void
heapifyd(int *a, int i, int n,
int(*cmp)(const void*, const void*),
void(*swp)(void *a, void *b)) {
while (n) {
int lo, l = heap_left(i);
int r = heap_right(i);
lo = (l < n && cmp(&a[l], &a[i]) < 0) ? l: i;
lo = (r < n && cmp(&a[r], &a[lo]) < 0) ? r: lo;
if (lo == i) {
return;
}
swp(&a[i], &a[lo]);
i = lo;
}
}
static inline void
heapifyu(int *a, int i,
int(*cmp)(const void*, const void*),
void(*swp)(void *a, void *b)) {
while (i && cmp(&a[heap_parent(i)], &a[i]) > 0) {
int p = heap_parent(i);
swp(&a[p], &a[i]);
i = p;
}
}
static int
partition(int a[], int l, int r, int k,
int(*cmp)(const void*, const void*),
void(*swp)(void *a, void *b)) {
while (r > l) {
if (r - l > 600) {
int n = r - l + 1;
int i = k - l + 1;
float z = logf(n);
float s = 0.5 * expf(2 * z / 3);
float sd = 0.5 * sqrtf(z * s * (n - s) / n) * sgn(i - n / 2);
int l2 = max(l, (int)(k - i * s / n + sd));
int r2 = min(r, (int)(k + (n - i) * s / n + sd));
partition(a, l2, r2, k, cmp, swp);
}
int t = a[k];
int i = l;
int j = r;
swp(&a[l], &a[k]);
if (cmp(&a[r],&t) > 0) {
swp(&a[l], &a[r]);
}
while (i < j) {
swp(&a[i], &a[j]);
i++, j--;
for (;cmp(&a[i],&t)<0; ++i);
for (;cmp(&a[j],&t)>0; --j);
}
if (cmp(&a[l],&t) == 0) {
swp(&a[l], &a[j]);
} else {
swp(&a[r], &a[++j]);
}
if (j <= k) {
l = j + 1;
}
if (k <= j) {
r = j - 1;
}
}
return a[k];
}
#include <stdio.h>
#include <stdlib.h>
extern int
main(void) {
int a[] = {1000, 400, 300, -10, 20, -50, 65, 47, 45, 50, 40, 75, 60, 100};
int n = sizeof(a) / sizeof(a[0]);
int off = 3;
int k = 3;
int i = 0;
partition(a, 0, n-1, off, cmpiu, swpi);
for (int j = 0; j < n; ++j) {
printf("%d,", a[j]);
}
printf("\n");
int lst[k], cnt = 0;
for (i = off; i < off + k; ++i) {
lst[cnt++] = a[i];
heapifyu(lst, cnt-1, cmpid, swpi);
}
for (; i < n; ++i) {
if (a[i] < lst[0]) {
lst[0] = a[i];
heapifyd(lst, 0, cnt, cmpid, swpi);
}
}
int res[k], num = 0;
while (cnt) {
res[k - ++num] = lst[0];
lst[0] = lst[--cnt];
heapifyd(lst, 0, cnt, cmpid, swpi);
}
printf("Smallest values: [");
for (int i = 0; i < num; ++i) {
printf("%d,", res[i]);
}
printf("]\n");
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment