Skip to content

Instantly share code, notes, and snippets.

@cneill
Last active August 23, 2020 08:54
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save cneill/8869c9d4a56440ea4a0bdd817abbe73b to your computer and use it in GitHub Desktop.
Save cneill/8869c9d4a56440ea4a0bdd817abbe73b to your computer and use it in GitHub Desktop.
Analyze a set of numbers for "Benfordness"
package main
import (
"fmt"
"io/ioutil"
"log"
"math"
"os"
"strconv"
"strings"
)
/*
As an example dataset, you can collect unconfirmed bitcoin transactions (quoted in USD) with the following shell command:
cat btc_transactions.txt <(curl https://www.blockchain.com/btc/unconfirmed-transactions | grep -oE '>\$[0-9\.,]+<' | tr -d ',$<>' | grep -vE '^0') | sort | uniq > btc_transactions2.txt && mv btc_transactions2.txt btc_transactions.txt && cat btc_transactions.txt | wc -l
And if you want some random numbers, courtesy of Python:
echo -e "import random\n\nfor i in range(0, 500000):\n\tprint(random.random() * 100000000)" | python > random.txt
NOTE: this may include other prices? I don't know, I'm lazy
*/
func errh(err error) {
if err != nil {
log.Fatalf("%v", err)
}
}
// getNumbers strips out numbers with leading '0'
func getNumbers(contents []byte) []float64 {
var results = []float64{}
for _, line := range strings.Split(string(contents), "\n") {
line = strings.TrimSpace(line)
if line == "" {
continue
}
parsed, err := strconv.ParseFloat(line, 64)
if err != nil {
fmt.Printf("Failed to parse %s: %v\n", line, err)
continue
} else if parsed == 0.0 {
continue
}
results = append(results, parsed)
}
return results
}
// distribution is an array of ints with the distribution of leading characters, with the 0 index representing '1'
func getDistribution(numbers []float64) []int64 {
var buckets = make([]int64, 9)
for _, number := range numbers {
lead := string(fmt.Sprintf("%f", number)[0])
leadInt, err := strconv.ParseInt(lead, 10, 32)
errh(err)
if leadInt == 0 {
fmt.Printf("Invalid number: %f\n", number)
continue
}
buckets[leadInt-1]++
}
return buckets
}
func getPercentageDistribution(distribution []int64) []float64 {
var buckets = make([]float64, 9)
var totalNum int64 = 0
for _, num := range distribution {
totalNum += num
}
for i, num := range distribution {
buckets[i] = float64(num) / float64(totalNum)
}
return buckets
}
/*
Simon Newcomb was the original discoverer of "Benford's Law", in his paper "Note on the Frequency of Use of the
Different Digits in Natural Numbers": https://www.jstor.org/stable/2369148?seq=1#metadata_info_tab_contents
From the paper, here were Newcomb's probabilities of each leading digit:
1: 0.3010
2: 0.1761
3: 0.1249
4: 0.0969
5: 0.0792
6: 0.0669
7: 0.0580
8: 0.0512
9: 0.0458
*/
func compareNewcomb(percentages []float64) []float64 {
var buckets = make([]float64, 9)
for i := range percentages {
d := float64(i + 1)
expected := math.Log10((d + 1) / d)
buckets[i] = percentages[i] - expected
}
return buckets
}
func compareRandom(percentages []float64) []float64 {
var buckets = make([]float64, 9)
for i := range percentages {
buckets[i] = percentages[i] - 0.1111111111111
}
return buckets
}
func isRandom(percentages []float64) bool {
randomCompare := compareRandom(percentages)
for _, r := range randomCompare {
if r > 0.001 {
return false
}
}
return true
}
func main() {
if len(os.Args) < 2 {
log.Fatalf("Must supply a file name to parse")
}
filename := os.Args[1]
fileContents, err := ioutil.ReadFile(filename)
errh(err)
numbers := getNumbers(fileContents)
dist := getDistribution(numbers)
fmt.Printf("Distribution: %#v\n", dist)
percentages := getPercentageDistribution(dist)
fmt.Printf("Percentages: %#v\n", percentages)
newcombComparison := compareNewcomb(percentages)
fmt.Printf("Comparison to Newcomb: %#v\n", newcombComparison)
randomComparison := compareRandom(percentages)
fmt.Printf("Comparison to random: %#v\n", randomComparison)
fmt.Printf("Is distribution random? %t\n", isRandom(percentages))
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment