Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
#!/bin/bash
# number of buckets
declare -i nB=6
# values by key
declare -Ai Vk=(
[a]=3632376
[b]=1403349
[c]=1509800
[d]=837855
[e]=641015
[f]=1270371
[g]=485138
[h]=1233635
[i]=1990347
[j]=253057
[k]=223848
[l]=707307
[m]=1059271
[n]=581775
[o]=1794386
[p]=1131515
[q]=41518
[r]=867451
[s]=2095579
[t]=3873869
[u]=356153
[v]=228228
[w]=1569942
[x]=10700
[y]=139113
[z]=27041
)
# bare list of values
declare -a V=( "${Vk[@]}" ) # just the values
# number of values
declare -i nV=${#V[@]}
# sum of values (and of buckets)
declare -i S
for ((i=0;i<nV;++i)) do S+=V[i] ; done
# mean of values
declare -i Mv=S/nV
# mean per bucket
declare -i Mb=S/nB
# Resulting variance should be better than what you'd get by putting everything
# in one bucket...
declare -i BestV=S*Mb # == S*S/nB
declare -i i j p q v t
declare -a B=()
declare -ai T=()
declare -i Var
declare -a SortedA=( "${V[@]}" )
# Sort nV items from V[] into SortedA[]
for ((i=0;i<nV;++i)) do
for((j=i+1;j<nV;++j)) do
(( SortedA[i] < SortedA[j] )) && t=${SortedA[i]} SortedA[i]=${SortedA[j]} SortedA[j]=$t
done
done
declare -i G=4 # gamma fuzz-convergence factor
printf 'Initial test variance=%s\n' $BestV
for ((z=0;z<100000;++z)) do
(( SECONDS != oSECONDS && ( oSECONDS = SECONDS ) )) && printf '%u \r' $z
B=()
Tb=()
if (( ( mode = 0 ) == 0 )) ; then
g=-
# Randomize order of nV items in V[]
for ((i=0;i<nV;++i)) do
((j=RANDOM%nV))
t=${V[i]} V[i]=${V[j]} V[j]=$t
done
# Pack into nB buckets B[], and compute bucket totals into Tb[]
for ((i=0;i<nV;++i)) do
v=V[i]
# choose target bucket q
t=Tb[q=0]
for ((p=1;p<nB;++p)) do
((Tb[p]<t)) && (( t=Tb[q=p] ))
done
# add to bucket
((Tb[q]+=v))
B[q]+="+$v"
done
else
# Pack nV items from SortedA[] into nB buckets B[], and compute bucket totals into Tb[]
(( g = RANDOM % (G-1) + 1 ))
for ((i=0;i<nV;++i)) do
v=V[i]
# choose target bucket q
(( q = RANDOM % ( nB * ( nV - i + 1 )**g / nV**g + 1 ) ))
# add to bucket
((Tb[q]+=v))
B[q]+="+$v"
# re-sort nB buckets Tb[] & B[]
for (( p=q+1 ; p<nV && Tb[q] > Tb[p] ; ++p )) do :; done
if (( p>q+1 )) ; then
Tb=( "${Tb[@]:0:q}" "${Tb[@]:q+1:p-q-1}" "${Tb[@]:q:1}" "${Tb[@]:p}" )
B=( "${B[@]:0:q}" "${B[@]:q+1:p-q-1}" "${B[@]:q:1}" "${B[@]:p}" )
fi
done
fi
# Compute population variance between bucket totals.
# (Don't need squareroot for stddev, since they're only for comparison)
for (( Var=-Mb*Mb, p=0 ; p<nB ; ++p )) do
(( t=Tb[p], Var += t*t/nB ))
done
if ((Var >= 0 && Var < BestV)) ; then
((BestV=Var))
BestT=( "${Tb[@]}" )
BestO=( "${B[@]}" )
# StdDev = √Var, using Newton's square-root method
for (( StdDev=3**${#Var} ; StdDev**2 > Var || (StdDev+1)**2 < Var ; StdDev+=Var/StdDev, StdDev /= 2 )) do :;done
printf 'Improved: mode=%s gamma=%s var=%s stddev=±%s µ/σ=%s\n' "$mode" "$g" "$Var" "$StdDev" "$((Mb/StdDev))"
for ((p=0;p<nB;++p)) do
printf '\t%s=%s\n' "${Tb[p]}" "${B[p]#'+'}"
done
fi
done
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment