Created
September 6, 2021 21:05
-
-
Save maxsei/5f95f7dfe0d14556ed90384424a2fa8a to your computer and use it in GitHub Desktop.
Failed attemptet to calculate rolling median in golang. I tried using a heap. I also tried using a two slices: one representing a sorted index of the current slice and the other representing the actual sorted order with index reference to the sorted index
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"fmt" | |
"math" | |
"sort" | |
"strings" | |
) | |
func main() { | |
var data = []float64{ | |
// 7, 2, 5, 3, 4, | |
// 3, 5, 0, 8, 3, | |
67, 92, 82, 75, 66, 93, 66, 76, 68, 90, 90, 59, 60, 51, 62, 90, 74, 69, 69, 61, | |
// 0.87914651, 0.29610294, 0.61987944, 0.85705143, 0.34293991, | |
// 0.55476114, 0.69043374, 0.22326187, 0.62599817, 0.29401568, | |
// 0.75597507, 0.82290709, 0.93483924, 0.25436015, 0.27924857, | |
// 0.64780617, 0.80416631, 0.14345783, 0.52980476, 0.87262491, | |
// 0.012934, 0.5410165, 0.65216262, 0.481543, 0.45271674, | |
// 0.29958247, 0.72369019, 0.8504142, 0.60650678, 0.17290565, | |
// 0.21692281, 0.92410626, 0.69764974, 0.19414343, 0.14972919, | |
// 0.94714216, 0.86078896, 0.43152306, 0.67743273, 0.9666571, | |
} | |
// fmt.Println("[NaN NaN NaN NaN 4 3 4 3 4 3]") | |
fmt.Println("[NaN NaN NaN NaN 75 82 75 75 68 76 76 76 68 60 60 60 62 69 69 69]") | |
// RollingMedian(data, 3) | |
result := RollingMedian2(data, 5) | |
fmt.Printf("result = %+v\n", result) | |
fmt.Printf("len(result) = %+v\n", len(result)) | |
fmt.Printf("cap(result) = %+v\n", cap(result)) | |
// RollingMedian2(data, 10) | |
// fmt.Printf("RollingMedian(data) = %+v\n", RollingMedian(data, 4)) | |
} | |
type Leaf struct { | |
value *float64 | |
left, right *Leaf | |
} | |
func (l *Leaf) SearchFirstDepth(predicate func(tgt *Leaf) *Leaf) { | |
cursor := l | |
for cursor != nil { | |
cursor = predicate(cursor) | |
} | |
} | |
func (l *Leaf) Insert(newLeaf *Leaf) { | |
// | newLeaf? | left | newLeaf? | middle | newLeaf? | right | newLeaf? | | |
l.SearchFirstDepth(func(tgt *Leaf) *Leaf { | |
if *newLeaf.value < *tgt.value { | |
if tgt.left == nil { | |
tgt.left = newLeaf | |
return nil | |
} | |
if *tgt.left.value < *newLeaf.value { | |
tmp := tgt.left | |
tgt.left = newLeaf | |
newLeaf = tmp | |
return tgt.left | |
} | |
return tgt.left | |
} else { | |
if tgt.right == nil { | |
tgt.right = newLeaf | |
return nil | |
} | |
if *newLeaf.value < *tgt.right.value { | |
tmp := tgt.right | |
tgt.right = newLeaf | |
newLeaf = tmp | |
return tgt.right | |
} | |
return tgt.right | |
} | |
}) | |
} | |
func (l *Leaf) MermaidString() string { | |
connections := []string{"graph TD", fmt.Sprintf("\tsubgraph Root %p", l)} | |
var searchFunc func(tgt *Leaf) *Leaf | |
searchFunc = func(tgt *Leaf) *Leaf { | |
lr := []*Leaf{tgt.left, tgt.right} | |
const lrStr = "leftright" | |
for i := range lr { | |
if lr[i] == nil { | |
continue | |
} | |
connections = append(connections, fmt.Sprintf( | |
"\t%p[%p %f] -->|%s| %p[%p %f]", | |
tgt, | |
tgt, | |
*tgt.value, | |
lrStr[i*4:i*4+5-(1-i)], | |
lr[i], | |
lr[i], | |
*lr[i].value), | |
) | |
lr[i].SearchFirstDepth(searchFunc) | |
} | |
return nil | |
} | |
l.SearchFirstDepth(searchFunc) | |
connections = append(connections, "\tend") | |
return strings.Join(connections, "\n") | |
} | |
func (l *Leaf) TreeString() string { | |
addr := fmt.Sprintf("%p", l) | |
var lines []string | |
const sp string = " " | |
if l.right != nil { | |
lines = strings.Split(l.right.TreeString(), "\n") | |
lines[0] = addr + " ---- " + lines[0] | |
for i := 1; i < len(lines); i++ { | |
lines[i] = "|" + sp + " " + lines[i] | |
} | |
} | |
if len(lines) == 0 { | |
lines = append(lines, addr) | |
} else { | |
lines = append(lines, "| "+sp) | |
} | |
if l.left != nil { | |
linesRight := strings.Split(l.left.TreeString(), "\n") | |
lines = append(lines, linesRight...) | |
} | |
result := strings.Join(lines, "\n") | |
return result | |
} | |
func RollingMedian(data []float64, m int) []float64 { | |
// Set up Tree. | |
leaves := make([]Leaf, m) | |
// Assign leaf data. | |
for i := 0; i < m; i++ { | |
leaves[i].value = &data[i] | |
} | |
// Sort Leaves. | |
for i := 0; i < len(leaves); i++ { | |
for j := 0; j < len(leaves); j++ { | |
if i == j { | |
continue | |
} | |
if (*leaves[i].value) >= (*leaves[j].value) { | |
continue | |
} | |
tmp := *leaves[i].value | |
*leaves[i].value = *leaves[j].value | |
*leaves[j].value = tmp | |
} | |
} | |
// var buildTree func(leaf *Leaf, a, b int) | |
// buildTree = func(leaf *Leaf, a, b int) { | |
// middle := (a + b - 1) / 2 | |
// { | |
// middleLeft := (middle + a) / 2 | |
// if leaf != &leaves[middleLeft] { | |
// leaf.left = &leaves[middleLeft] | |
// buildTree(leaf.left, a, middle) | |
// } | |
// } | |
// { | |
// middleRight := (middle + b) / 2 | |
// if leaf != &leaves[middleRight] { | |
// leaf.right = &leaves[middleRight] | |
// buildTree(leaf.right, middle+1, b) | |
// } | |
// } | |
// } | |
root := &leaves[m/2-1] | |
// root := &leaves[2] | |
for i := range leaves { | |
if &leaves[i] == root { | |
continue | |
} | |
fmt.Printf("inserting %p %f into: \n", &leaves[i], *leaves[i].value) | |
fmt.Println(root.MermaidString()) | |
fmt.Println() | |
root.Insert(&leaves[i]) | |
} | |
fmt.Println(root.MermaidString()) | |
// buildTree(root, 0, m) | |
return nil | |
// fmt.Println(root.TreeString()) | |
fmt.Println() | |
for i, v := range leaves { | |
// fmt.Println(&v, v, *v.value) | |
fmt.Printf("%p %v, %v\n", &leaves[i], v, *v.value) | |
} | |
fmt.Println() | |
var result []float64 | |
for i := range data[:len(data)-m] { | |
var replace func(oldV, newV *float64, leaf *Leaf) | |
replace = func(oldV, newV *float64, leaf *Leaf) { | |
// Base case. | |
if leaf == nil { | |
return | |
} | |
// Replace. | |
if oldV == leaf.value { | |
gtLeft := true | |
if leaf.left != nil { | |
gtLeft = *newV > *leaf.left.value | |
} | |
ltRight := true | |
if leaf.right != nil { | |
ltRight = *newV < *leaf.right.value | |
} | |
if gtLeft && ltRight { | |
fmt.Printf("replacing %v with %v\n", leaf.value, newV) | |
leaf.value = newV | |
return | |
} | |
if gtLeft { | |
fmt.Printf("replacing %v with %v\n", leaf.value, leaf.right.value) | |
leaf.value = leaf.right.value | |
replace(leaf.value, newV, leaf.right) | |
return | |
} | |
if ltRight { | |
fmt.Printf("replacing %v with %v\n", leaf.value, leaf.left.value) | |
leaf.value = leaf.left.value | |
replace(leaf.left.value, newV, leaf.left) | |
return | |
} | |
} | |
// Keep Searching. | |
if *oldV < *leaf.value { | |
replace(oldV, newV, leaf.left) | |
} else { | |
replace(oldV, newV, leaf.right) | |
} | |
} | |
fmt.Printf("replace %v with %v\n", &data[i], &data[i+m]) | |
replace(&data[i], &data[i+m], root) | |
fmt.Println(root, *root.value) | |
fmt.Println(FindMedian(data[i : i+m+1])) | |
// fmt.Println(root, *root.value) | |
fmt.Println() | |
// fmt.Println(root, *root.value) | |
// fmt.Println("done", "\n") | |
// fmt.Printf("data[i:i+m] = %+v\n", data[i:i+m+1]) | |
if i > 8 { | |
break | |
} | |
} | |
return result | |
} | |
func NewHeapIndexer(n int) *HeapIndexer { | |
index := make([]int, n) | |
for i := range index { | |
index[i] = i | |
} | |
return &HeapIndexer{ | |
start: 0, | |
Index: index, | |
} | |
} | |
type HeapIndexer struct { | |
start int | |
Index []int | |
} | |
func (h *HeapIndexer) String() string { | |
var result string | |
for i, v := range h.Index { | |
if i == h.start { | |
result += "*" | |
} | |
result += fmt.Sprintf("%v ", v) | |
} | |
return fmt.Sprintf("[%s]", result[:len(result)-1]) | |
} | |
func (h *HeapIndexer) indexer(i int) int { | |
_ = h.Index[i] | |
return (i + h.start) % | |
len(h.Index) | |
} | |
func (h *HeapIndexer) Start() int { return h.start } | |
func (h *HeapIndexer) Get(i int) int { return h.Index[h.indexer(i)] } | |
func (h *HeapIndexer) Set(i, v int) { h.Index[h.indexer(i)] = v } | |
func (h *HeapIndexer) View(data []float64) []float64 { | |
if len(data) != len(h.Index) { | |
panic("must be same length as heap") | |
} | |
result := make([]float64, len(data)) | |
for i := range h.Index { | |
j := h.Get(i) | |
result[j] = data[i] | |
} | |
return result | |
} | |
func (h *HeapIndexer) Push(v int) { | |
h.Set(0, v) | |
h.start++ | |
} | |
func RollingMedian2(data []float64, m int) []float64 { | |
fmt.Printf("data = %+v\n", data) | |
A := make([]int, m) | |
B := NewHeapIndexer(m) | |
for i := range A { | |
A[i] = i | |
} | |
for i := 0; i < len(A); i++ { | |
for j := 0; j < len(A); j++ { | |
if i == j { | |
continue | |
} | |
if !(data[A[i]] < data[A[j]]) { | |
continue | |
} | |
tmp := A[i] | |
A[i] = A[j] | |
A[j] = tmp | |
} | |
} | |
for i, v := range A { | |
B.Set(v, i) | |
} | |
replace := func(x float64, idxRemoveB int, data []float64) { | |
// idxRemoveB = 0 | |
idxRemove := B.Get(0) | |
idxInsert := sort.Search(m, func(i int) bool { | |
i = (A[i] - B.Start()) % len(B.Index) | |
if i < 0 { | |
i *= -1 | |
} | |
return x < data[A[i]] | |
}) | |
// defer func() { B.Push(idxInsert) }() | |
if data[idxRemove] < x { | |
idxInsert-- | |
} | |
copyDebug := func(a, b, c, d int) { | |
src := A[a:b] | |
dst := A[c:d] | |
copy(dst, src) | |
} | |
sign := 1 | |
copyFunc := func() {} | |
if idxRemove < idxInsert { | |
sign = -1 | |
copyFunc = func() { | |
copyDebug( | |
idxRemove+1, idxInsert+1, | |
idxRemove, idxInsert, | |
) | |
} | |
} | |
if idxRemove > idxInsert { | |
copyFunc = func() { | |
copyDebug( | |
idxInsert, idxRemove, | |
idxInsert+1, idxRemove+1, | |
) | |
} | |
} | |
for i := idxRemove - sign; i != (idxInsert - sign); i -= sign { | |
B.Index[A[i]] = B.Index[A[i]] + sign | |
// B.Set(A[i], B.Index[A[i]]+sign) | |
} | |
// B.Set(A[idxInsert], B.Get(A[idxInsert])+sign) | |
copyFunc() | |
A[idxInsert] = idxRemoveB | |
B.Push(idxInsert) | |
} | |
middleIdx := m / 2 | |
calcMedian := func(data []float64) float64 { | |
i := (A[middleIdx] - B.Start()) % len(B.Index) | |
if i < 0 { | |
i *= -1 | |
} | |
// return data[B.Get(middleIdx)] | |
fmt.Println(i) | |
return data[i] | |
} | |
// Add NaN values to the result. | |
result := make([]float64, 0, len(data)) | |
for i := 0; i < m-1; i++ { | |
result = append(result, math.NaN()) | |
} | |
// Calculate rolling median. | |
for i := 0; i < len(data)-m; i++ { | |
sl := data[i : i+m] | |
result = append(result, calcMedian(sl)) | |
prev := B.View(sl) | |
replace(data[i+m], B.Start(), sl) | |
fmt.Printf("rpl:%v ins:%v %+v -> %+v\n", data[i], data[i+m], prev, B.View(data[i+1:i+m+1])) | |
} | |
sl := data[len(data)-m:] | |
fmt.Printf("B.View(sl) = %+v\n", B.View(sl)) | |
result = append(result, calcMedian(sl)) | |
return result | |
} | |
func FindMedian(xx []float64) float64 { | |
xxCopy := make([]float64, len(xx)) | |
copy(xxCopy, xx) | |
fmt.Println(xxCopy) | |
for i := 0; i < len(xxCopy); i++ { | |
for j := 0; j < len(xxCopy); j++ { | |
if i == j { | |
continue | |
} | |
if xxCopy[i] < xxCopy[j] { | |
continue | |
} | |
tmp := xxCopy[i] | |
xxCopy[i] = xxCopy[j] | |
xxCopy[j] = tmp | |
} | |
} | |
if len(xxCopy)%2 == 0 { | |
return xxCopy[len(xxCopy)/2-1] | |
} | |
return xxCopy[len(xxCopy)/2] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment