Skip to content

Instantly share code, notes, and snippets.

@dacr
Last active May 27, 2023 15:30
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dacr/facaa509c582efc65efab05c7f7013f9 to your computer and use it in GitHub Desktop.
Save dacr/facaa509c582efc65efab05c7f7013f9 to your computer and use it in GitHub Desktop.
Benford law experiments / published by https://github.com/dacr/code-examples-manager #3a44812b-bbbd-4382-9fdb-a50269bdbe98/d5cd903a3ca801c213079faedbb787175479692b
// summary : Benford law experiments
// keywords : scala, math, benfordlaw, benford, @testable
// publish : gist
// authors : David Crosson
// license : Apache NON-AI License Version 2.0 (https://raw.githubusercontent.com/non-ai-licenses/non-ai-licenses/main/NON-AI-APACHE2)
// id : 3a44812b-bbbd-4382-9fdb-a50269bdbe98
// created-on : 2020-12-06T08:41:25Z
// managed-by : https://github.com/dacr/code-examples-manager
// run-with : scala-cli $file
// ---------------------
//> using scala "3.3.0"
//> using dep "org.scalatest::scalatest:3.2.16"
//> using dep "com.lihaoyi::requests:0.8.0"
//> using objectWrapper
// ---------------------
/*
Benford low wikipedia : https://en.wikipedia.org/wiki/Benford%27s_law
*/
import org.scalatest._,flatspec._,matchers._
import scala.math._
def benfordReferenceDistributionForDigit(digit:Int):Double = log10(1d+1d/digit)
def benfordReferenceDistribution():Iterable[(Int,Double)] = {
1.to(9).map(d => d->benfordReferenceDistributionForDigit(d))
}
// the lower is better, 0 means fully compliant
def benfordConfidence(input:Iterable[String]):Double = {
???
}
def figureFrequencies(input:Iterable[String], pos:Int=0):Map[Int,Double] = {
val digits =
input
.map(_.filter(ch => ch.isDigit && ch != '0'))
.flatMap(_.drop(pos).headOption)
val digitsCount = digits.size.toDouble
digits
.to(List)
.groupBy(ch => ch.toInt-48)
.view
.mapValues(chs => chs.length/digitsCount)
.toMap
}
class FigureFrequenciesTest extends AnyFlatSpec with should.Matchers {
override def suiteName: String = "FigureFrequenciesTest"
"Benford law compute function" should "return the right frequencies" in {
val f1 = figureFrequencies(1.to(9).map(_.toString))
info("of course in that particular case, benford law is not verified")
f1.foreach{ case (ch, freq) => freq shouldBe 0.1d +- 0.02d}
}
"Benford law" should "" in {
val postalCodes =
requests
.get("https://www.data.gouv.fr/fr/datasets/r/554590ab-ae62-40ac-8353-ee75162c05ee")
.lines()
.drop(1) // first line == the CSV labels
.map(_.split(";", 4))
.filter(_.length == 4)
.map(_(2))
val frequencies = figureFrequencies(postalCodes)
info("unsatisfied on france town postal codes")
frequencies.toList.sortBy{case (d,f)=> d}.foreach(f => info(f.toString))
}
}
org.scalatest.tools.Runner.main(Array("-oDF", "-s", classOf[FigureFrequenciesTest].getName))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment