kurahaupo/gist:ac6b66cfc3c4deda378b0b9798d13c2e

## gistfile1.txt
#!/bin/bash

# number of buckets
declare -i nB=6

# values by key
declare -Ai Vk=(
    [a]=3632376
    [b]=1403349
    [c]=1509800
    [d]=837855
    [e]=641015
    [f]=1270371
    [g]=485138
    [h]=1233635
    [i]=1990347
    [j]=253057
    [k]=223848
    [l]=707307
    [m]=1059271
    [n]=581775
    [o]=1794386
    [p]=1131515
    [q]=41518
    [r]=867451
    [s]=2095579
    [t]=3873869
    [u]=356153
    [v]=228228
    [w]=1569942
    [x]=10700
    [y]=139113
    [z]=27041
)

# bare list of values
declare -a V=( "${Vk[@]}" ) # just the values

# number of values
declare -i nV=${#V[@]}

# sum of values (and of buckets)
declare -i S
for ((i=0;i<nV;++i)) do S+=V[i] ; done

# mean of values
declare -i Mv=S/nV

# mean per bucket
declare -i Mb=S/nB

# Resulting variance should be better than what you'd get by putting everything
# in one bucket...
declare -i BestV=S*Mb       # == S*S/nB

declare -i i j p q v t

declare -a  B=()
declare -ai T=()
declare  -i Var

declare -a  SortedA=( "${V[@]}" )

# Sort nV items from V[] into SortedA[]
for ((i=0;i<nV;++i)) do
    for((j=i+1;j<nV;++j)) do
        (( SortedA[i] < SortedA[j] )) && t=${SortedA[i]} SortedA[i]=${SortedA[j]} SortedA[j]=$t
    done
done

declare -i G=4  # gamma fuzz-convergence factor

printf 'Initial test variance=%s\n' $BestV

for ((z=0;z<100000;++z)) do

    (( SECONDS != oSECONDS && ( oSECONDS = SECONDS ) )) && printf '%u  \r' $z

    B=()
    Tb=()

    if (( ( mode = 0 ) == 0 )) ; then
        g=-
        # Randomize order of nV items in V[]
        for ((i=0;i<nV;++i)) do
            ((j=RANDOM%nV))
            t=${V[i]} V[i]=${V[j]} V[j]=$t
        done

        # Pack into nB buckets B[], and compute bucket totals into Tb[]
        for ((i=0;i<nV;++i)) do
            v=V[i]

            # choose target bucket q
            t=Tb[q=0]
            for ((p=1;p<nB;++p)) do
                ((Tb[p]<t)) && (( t=Tb[q=p] ))
            done

            # add to bucket
            ((Tb[q]+=v))
            B[q]+="+$v"
        done

    else

        # Pack nV items from SortedA[] into nB buckets B[], and compute bucket totals into Tb[]
        (( g = RANDOM % (G-1) + 1 ))
        for ((i=0;i<nV;++i)) do
            v=V[i]

            # choose target bucket q
            (( q = RANDOM % ( nB * ( nV - i + 1 )**g / nV**g + 1 ) ))

            # add to bucket
            ((Tb[q]+=v))
            B[q]+="+$v"

            # re-sort nB buckets Tb[] & B[]
            for (( p=q+1 ; p<nV && Tb[q] > Tb[p] ; ++p )) do :; done
            if (( p>q+1 )) ; then
                Tb=( "${Tb[@]:0:q}" "${Tb[@]:q+1:p-q-1}" "${Tb[@]:q:1}" "${Tb[@]:p}" )
                B=( "${B[@]:0:q}" "${B[@]:q+1:p-q-1}" "${B[@]:q:1}" "${B[@]:p}" )
            fi
        done
    fi


    # Compute population variance between bucket totals.
    # (Don't need squareroot for stddev, since they're only for comparison)
    for (( Var=-Mb*Mb, p=0 ; p<nB ; ++p )) do
        (( t=Tb[p], Var += t*t/nB ))
    done

    if ((Var >= 0 && Var < BestV)) ; then
        ((BestV=Var))
        BestT=( "${Tb[@]}" )
        BestO=( "${B[@]}" )

        # StdDev = √Var, using Newton's square-root method
        for (( StdDev=3**${#Var} ; StdDev**2 > Var || (StdDev+1)**2 < Var ; StdDev+=Var/StdDev, StdDev /= 2 )) do :;done

        printf 'Improved: mode=%s gamma=%s var=%s stddev=±%s µ/σ=%s\n' "$mode" "$g" "$Var" "$StdDev" "$((Mb/StdDev))"
        for ((p=0;p<nB;++p)) do
            printf '\t%s=%s\n' "${Tb[p]}" "${B[p]#'+'}"
        done
    fi
done
	#!/bin/bash

	# number of buckets
	declare -i nB=6

	# values by key
	declare -Ai Vk=(
	[a]=3632376
	[b]=1403349
	[c]=1509800
	[d]=837855
	[e]=641015
	[f]=1270371
	[g]=485138
	[h]=1233635
	[i]=1990347
	[j]=253057
	[k]=223848
	[l]=707307
	[m]=1059271
	[n]=581775
	[o]=1794386
	[p]=1131515
	[q]=41518
	[r]=867451
	[s]=2095579
	[t]=3873869
	[u]=356153
	[v]=228228
	[w]=1569942
	[x]=10700
	[y]=139113
	[z]=27041
	)

	# bare list of values
	declare -a V=( "${Vk[@]}" ) # just the values

	# number of values
	declare -i nV=${#V[@]}

	# sum of values (and of buckets)
	declare -i S
	for ((i=0;i<nV;++i)) do S+=V[i] ; done

	# mean of values
	declare -i Mv=S/nV

	# mean per bucket
	declare -i Mb=S/nB

	# Resulting variance should be better than what you'd get by putting everything
	# in one bucket...
	declare -i BestV=SMb # == SS/nB

	declare -i i j p q v t

	declare -a B=()
	declare -ai T=()
	declare -i Var

	declare -a SortedA=( "${V[@]}" )

	# Sort nV items from V[] into SortedA[]
	for ((i=0;i<nV;++i)) do
	for((j=i+1;j<nV;++j)) do
	(( SortedA[i] < SortedA[j] )) && t=${SortedA[i]} SortedA[i]=${SortedA[j]} SortedA[j]=$t
	done
	done

	declare -i G=4 # gamma fuzz-convergence factor

	printf 'Initial test variance=%s\n' $BestV

	for ((z=0;z<100000;++z)) do

	(( SECONDS != oSECONDS && ( oSECONDS = SECONDS ) )) && printf '%u \r' $z

	B=()
	Tb=()

	if (( ( mode = 0 ) == 0 )) ; then
	g=-
	# Randomize order of nV items in V[]
	for ((i=0;i<nV;++i)) do
	((j=RANDOM%nV))
	t=${V[i]} V[i]=${V[j]} V[j]=$t
	done

	# Pack into nB buckets B[], and compute bucket totals into Tb[]
	for ((i=0;i<nV;++i)) do
	v=V[i]

	# choose target bucket q
	t=Tb[q=0]
	for ((p=1;p<nB;++p)) do
	((Tb[p]<t)) && (( t=Tb[q=p] ))
	done

	# add to bucket
	((Tb[q]+=v))
	B[q]+="+$v"
	done

	else

	# Pack nV items from SortedA[] into nB buckets B[], and compute bucket totals into Tb[]
	(( g = RANDOM % (G-1) + 1 ))
	for ((i=0;i<nV;++i)) do
	v=V[i]

	# choose target bucket q
	(( q = RANDOM % ( nB * ( nV - i + 1 )g / nVg + 1 ) ))

	# add to bucket
	((Tb[q]+=v))
	B[q]+="+$v"

	# re-sort nB buckets Tb[] & B[]
	for (( p=q+1 ; p<nV && Tb[q] > Tb[p] ; ++p )) do :; done
	if (( p>q+1 )) ; then
	Tb=( "${Tb[@]:0:q}" "${Tb[@]:q+1:p-q-1}" "${Tb[@]:q:1}" "${Tb[@]:p}" )
	B=( "${B[@]:0:q}" "${B[@]:q+1:p-q-1}" "${B[@]:q:1}" "${B[@]:p}" )
	fi
	done
	fi


	# Compute population variance between bucket totals.
	# (Don't need squareroot for stddev, since they're only for comparison)
	for (( Var=-Mb*Mb, p=0 ; p<nB ; ++p )) do
	(( t=Tb[p], Var += t*t/nB ))
	done

	if ((Var >= 0 && Var < BestV)) ; then
	((BestV=Var))
	BestT=( "${Tb[@]}" )
	BestO=( "${B[@]}" )

	# StdDev = √Var, using Newton's square-root method
	for (( StdDev=3${#Var} ; StdDev2 > Var \|\| (StdDev+1)**2 < Var ; StdDev+=Var/StdDev, StdDev /= 2 )) do :;done

	printf 'Improved: mode=%s gamma=%s var=%s stddev=±%s µ/σ=%s\n' "$mode" "$g" "$Var" "$StdDev" "$((Mb/StdDev))"
	for ((p=0;p<nB;++p)) do
	printf '\t%s=%s\n' "${Tb[p]}" "${B[p]#'+'}"
	done
	fi
	done