Skip to content

Instantly share code, notes, and snippets.

@rzykov
Created October 6, 2021 11:34
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save rzykov/847f97d48e14da2b0e432af442af16e7 to your computer and use it in GitHub Desktop.
Save rzykov/847f97d48e14da2b0e432af442af16e7 to your computer and use it in GitHub Desktop.
DataAnalysisIntro6.scala
//CODE of MIC
import data.VarPairData
import mine.core.MineParameters
import analysis.Analysis
import analysis.results.BriefResult
import scala.util.Random
//Code a discrete value by randomly changing the order of the "codes
def encode(col: Array[String]): Array[Double] = {
val ns = scala.util.Random.shuffle(1 to col.toSet.size)
val encMap = col.toSet.zip(ns).toMap
col.map{encMap(_).toDouble}
}
// function to calculate MIC
def mic(x: Array[Double], y: Array[Double]) = {
val data = new VarPairData(x.map(_.toFloat), y.map(_.toFloat))
val params = new MineParameters(0.6.toFloat, 15, 0, null)
val res = Analysis.getResult(classOf[BriefResult], data, params)
res.getMIC
}
//in case of a discrete value do a lot of iterations and take the maximum
def micMax(x: Array[Double], y: Array[Double], n: Int = 100) =
(for{ i <- 1 to 100} yield mic(x, y)).max
//Now we're close to the final result, let's perform the calculation:
val aov = dataAov.filter(x => interestedBrowsers.contains(x.osFamily)) //we leave only the OSes we want
.filter(_.categoryId == 128) // filter categories
//osFamily
var aovMic = aov.map(x => (x.osFamily, x.aov)).collect()
println("osFamily MIC =" + micMax(encode(aovMic.map(_._1)), aovMic.map(_._2))
//orderId
aovMic = aov.map(x => (x.orderId, x.aov)).collect()
println("orderId MIC =" + micMax(encode(aovMic.map(_._1)), aovMic.map(_._2))
//cityId
aovMic = aov.map(x => (x.cityId, x.aov)).collect()
println("cityId MIC =" + micMax(encode(aovMic.map(_._1)), aovMic.map(_._2))
//uaName
aovMic = aov.map(x => (x.uaName, x.aov)).collect()
println("uaName MIC =" + mic(encode(aovMic.map(_._1)), aovMic.map(_._2))
//aov
println("aov MIC =" + micMax(aovMic.map(_._2), aovMic.map(_._2))
//random
println("random MIC =" + mic(aovMic.map(_ => math.random*100.0), aovMic.map(_._2))
//OUTPUT
// osFamily MIC =0.06658
// orderId MIC =0.10074
// cityId MIC =0.07281
// aov MIC =0.99999
// uaName MIC =0.05297
// random MIC =0.10599
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment