Skip to content

Instantly share code, notes, and snippets.

@trevorsibanda
Created April 10, 2017 15:57
Show Gist options
  • Save trevorsibanda/2ce1115fcc235dd80500156a7fe52121 to your computer and use it in GitHub Desktop.
Save trevorsibanda/2ce1115fcc235dd80500156a7fe52121 to your computer and use it in GitHub Desktop.
//simple algorithm to calculate entropy of a file in Scala
package zw.trevor.disorder
class Entropy(bytes: Seq[Byte]){
val max_bytes = 256
def log2(n: Double) = math.log(n)/math.log(2)
lazy val token_freqs = {
var freqs: Array[Int] = (0 until max_bytes).map{x=>0}.toArray
bytes.map{b =>
//java bytes are signed, use b & 0xff to make unsigned
freqs(b.toInt & 0xff) = freqs(b.toInt & 0xff)+1
}
freqs
}
lazy val num_tokens = token_freqs.filter(_.toInt != 0).toSet.size
lazy val token_probs = {
var probs: Array[Double] = ((0 until max_bytes).map{x=>0.00}).toArray
val len = bytes.length.toDouble
(0 until max_bytes).map{i=>
probs(i) = (token_freqs(i)/ len)
}
probs
}
lazy val entropy = ((0 until max_bytes).map{
case i if token_freqs(i)!=0 => token_probs(i) * log2(token_probs(i))
case _ => 0.00
}).sum * -1.00
lazy val max_entropy = log2(num_tokens)
lazy val m_ratio = entropy/max_entropy
}
object Entropy extends App{
import java.nio.file._
import java.io.File
val path = "/usr/bin"
new File(path).listFiles.filter(!_.isDirectory)map{
case file if file.canRead =>
val bytes = Files.readAllBytes(Paths.get(file.getPath))
val dis = new Entropy(bytes)
println(s"""${file.getPath.replaceAll(" ", "_space_")} ${dis.entropy} ${file.length}""")
case _ => ()
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment