Last active
May 2, 2024 20:48
-
-
Save vurtun/7063afddcf1491af16037a207a167e49 to your computer and use it in GitHub Desktop.
print K largest(or smallest) elements in an array
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* Problem: Directory file view ui. Files can be sorted by different properties (name, size, ...). Ui itself only needs elements | |
that are currently in the visible section of the list view from x to y So I want to use a fixed size buffer with up to k elements | |
and walk through all files in the directory and only have the final elements between index x and y in the end in the fixed size buffer. | |
Temp buffers are fine for me as long as they have a fixed at compile time size. For those familiar with SQL basically this is | |
a SORT BY LIMIT begin, count. | |
Idea: Use Floyd–Rivest algorithm with average O(n) to find the x and y values. Walk over list again and fill fixed size k buffer with elements greater | |
than x and smaller than y. Then just sort the k values in k*log(k). So we have O(n) + k*log(k) | |
Problem: Find x and y is not directly possible since not all elements are in memory because we have only fixed size buffer. | |
Idea: Create a buffer of size 2𝑘. Read in 2𝑘 elements. Use Floyd–Rivest algorithm to partition the buffer | |
so that the 𝑘 smallest elements are first; this takes 𝑂(𝑘) time. Now read in another 𝑘 items from your array into the | |
buffer, replacing the 𝑘 largest items in the buffer, partition the buffer as before, and repeat. | |
Problem: k should be fixed size. So when we have k=256 we are not able to address any index greater than 256. | |
Idea: Multiple iterations in multiple of fixed k. So when k=256 then have index 256 initally and iterate over all n. Then | |
new iteration we skip all elements smaller than the element at k=256 and use a new index. Repeat until we hit the index | |
we want. | |
Problem: Kind of expensive. We get O(2*k * (n*0.5)/k) + k*log(k) | |
Optimizations: | |
- initially use heap with heapify up/down to sort first k elements for n*log(k) | |
- Instead of k being size of visible section use bigger window and only recalculate when reaching the end | |
- Jumps don't happen to often (shortcuts + shift-scrollbar-click) so reusing the elements for the | |
starting/ending element makes sense | |
- File count smaller than k only neeed O(k*log(k)) | |
- start/end indicies are limited to n/2 by walking backwards | |
*/ | |
// https://cs.stackexchange.com/questions/68125/finding-kth-smallest-element-from-a-given-sequence-only-with-ok-memory-on-t | |
// https://en.wikipedia.org/wiki/Floyd%E2%80%93Rivest_algorithm | |
#include <math.h> | |
#define heap_parent(i) (((i)-1) >> 1) | |
#define heap_right(i) (((i)+1) << 1) | |
#define heap_left(i) (heap_right(i)-1) | |
#define sgn(val) ((0 < val) - (val < 0)) | |
#define max(x,y) ((x<y)?y:x) | |
#define min(x,y) ((x<y)?x:y) | |
static inline void | |
swpi(void *a, void *b) { | |
int t = *(int*)a; | |
*(int*)a = *(int*)b; | |
*(int*)b = t; | |
} | |
static inline int | |
cmpid(const void *a, const void *b) { | |
return *(const int*)b - *(const int*)a; | |
} | |
static inline int | |
cmpiu(const void *a, const void *b) { | |
return *(const int*)a - *(const int*)b; | |
} | |
static inline void | |
heapifyd(int *a, int i, int n, | |
int(*cmp)(const void*, const void*), | |
void(*swp)(void *a, void *b)) { | |
while (n) { | |
int lo, l = heap_left(i); | |
int r = heap_right(i); | |
lo = (l < n && cmp(&a[l], &a[i]) < 0) ? l: i; | |
lo = (r < n && cmp(&a[r], &a[lo]) < 0) ? r: lo; | |
if (lo == i) { | |
return; | |
} | |
swp(&a[i], &a[lo]); | |
i = lo; | |
} | |
} | |
static inline void | |
heapifyu(int *a, int i, | |
int(*cmp)(const void*, const void*), | |
void(*swp)(void *a, void *b)) { | |
while (i && cmp(&a[heap_parent(i)], &a[i]) > 0) { | |
int p = heap_parent(i); | |
swp(&a[p], &a[i]); | |
i = p; | |
} | |
} | |
static int | |
partition(int a[], int l, int r, int k, | |
int(*cmp)(const void*, const void*), | |
void(*swp)(void *a, void *b)) { | |
while (r > l) { | |
if (r - l > 600) { | |
int n = r - l + 1; | |
int i = k - l + 1; | |
float z = logf(n); | |
float s = 0.5 * expf(2 * z / 3); | |
float sd = 0.5 * sqrtf(z * s * (n - s) / n) * sgn(i - n / 2); | |
int l2 = max(l, (int)(k - i * s / n + sd)); | |
int r2 = min(r, (int)(k + (n - i) * s / n + sd)); | |
partition(a, l2, r2, k, cmp, swp); | |
} | |
int t = a[k]; | |
int i = l; | |
int j = r; | |
swp(&a[l], &a[k]); | |
if (cmp(&a[r],&t) > 0) { | |
swp(&a[l], &a[r]); | |
} | |
while (i < j) { | |
swp(&a[i], &a[j]); | |
i++, j--; | |
for (;cmp(&a[i],&t)<0; ++i); | |
for (;cmp(&a[j],&t)>0; --j); | |
} | |
if (cmp(&a[l],&t) == 0) { | |
swp(&a[l], &a[j]); | |
} else { | |
swp(&a[r], &a[++j]); | |
} | |
if (j <= k) { | |
l = j + 1; | |
} | |
if (k <= j) { | |
r = j - 1; | |
} | |
} | |
return a[k]; | |
} | |
#include <stdio.h> | |
#include <stdlib.h> | |
extern int | |
main(void) { | |
int a[] = {1000, 400, 300, -10, 20, -50, 65, 47, 45, 50, 40, 75, 60, 100}; | |
int n = sizeof(a) / sizeof(a[0]); | |
int off = 3; | |
int k = 3; | |
int i = 0; | |
partition(a, 0, n-1, off, cmpiu, swpi); | |
for (int j = 0; j < n; ++j) { | |
printf("%d,", a[j]); | |
} | |
printf("\n"); | |
int lst[k], cnt = 0; | |
for (i = off; i < off + k; ++i) { | |
lst[cnt++] = a[i]; | |
heapifyu(lst, cnt-1, cmpid, swpi); | |
} | |
for (; i < n; ++i) { | |
if (a[i] < lst[0]) { | |
lst[0] = a[i]; | |
heapifyd(lst, 0, cnt, cmpid, swpi); | |
} | |
} | |
int res[k], num = 0; | |
while (cnt) { | |
res[k - ++num] = lst[0]; | |
lst[0] = lst[--cnt]; | |
heapifyd(lst, 0, cnt, cmpid, swpi); | |
} | |
printf("Smallest values: ["); | |
for (int i = 0; i < num; ++i) { | |
printf("%d,", res[i]); | |
} | |
printf("]\n"); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment