Created
January 25, 2019 20:33
-
-
Save rayfix/fef76f993c63ee8961a9b31c606b48a7 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// The contingency table contains the support counts for each item | |
// in comparision with one another. | |
struct ContingencyTable { | |
var bc: Int // B and C | |
var b_c: Int // B and not C | |
var _bc: Int // not B and C | |
var _b_c: Int // not B and not C | |
var b: Int { | |
return bc + b_c | |
} | |
var _b: Int { | |
return _bc + _b_c | |
} | |
var c: Int { | |
return bc + _bc | |
} | |
var _c: Int { | |
return b_c + _b_c | |
} | |
var total: Int { | |
precondition(c + _c == b + _b) | |
return c + _c | |
} | |
} | |
let table = ContingencyTable(bc: 400, b_c: 200, _bc: 350, _b_c: 50) | |
///////////////////////////////////////////////////////////////////////////// | |
/// LIFT | |
///////////////////////////////////////////////////////////////////////////// | |
// Lift is a statisical measure where A and B are independent | |
// if the value is 1, positively correlated if greater than 1 | |
// negatively correlated if less than 1. | |
struct Lift { | |
var value: Double | |
var isIndependent: Bool { | |
return abs(value - 1) < 1e-10 // very close to one | |
} | |
var isPositivelyCorrelated: Bool { | |
return value > 1 | |
} | |
var isNegativelyCorrelated: Bool { | |
return value < 1 | |
} | |
// relative support based computation | |
static func compute(sAB: Double, sA: Double, sB: Double) -> Lift { | |
return Lift(value: sAB/(sA*sB)) | |
} | |
// absolute support based computation | |
static func compute(ab: Int, a: Int, b: Int, total: Int) -> Lift { | |
let total = Double(total) | |
let sAB = Double(ab)/total | |
let sA = Double(a)/total | |
let sB = Double(b)/total | |
return Lift.compute(sAB: sAB, sA: sA, sB: sB) | |
} | |
} | |
// Make a method on the contingency table | |
extension ContingencyTable { | |
var lift: Lift { | |
return Lift.compute(ab: bc, a: b, b: c, total: total) | |
} | |
} | |
// Kick the tires | |
// Use the table to compute A, B | |
table.lift.value // 0.89 | |
// we can compute outside the table if we want, the slides do this | |
// lift(B, C) | |
let liftBC = Lift.compute(ab: table.bc, a: table.b, b: table.c, total: table.total) | |
liftBC.value // 0.89 | |
liftBC.isNegativelyCorrelated // true | |
liftBC.isIndependent // false | |
liftBC.isPositivelyCorrelated // false | |
let liftBNotC = Lift.compute(ab: table.b_c, a: table.b, b: table._c, total: table.total) | |
liftBNotC.value // 1.33 | |
liftBNotC.isPositivelyCorrelated // true | |
///////////////////////////////////////////////////////////////////////////// | |
// ## χ² | |
///////////////////////////////////////////////////////////////////////////// | |
struct ChiSquared { | |
var value: Double | |
var isIndependent: Bool { | |
return abs(value) < .ulpOfOne // very close to zero | |
} | |
var isCorrelated: Bool { | |
return value > 0 // can't tell positive or negative correlation | |
} | |
} | |
extension ContingencyTable { | |
private func term(observed: Int, expected: Double) -> Double { | |
let diff = Double(observed) - expected | |
return diff * diff / expected | |
} | |
var expectedBC: Double { | |
return Double(b) * (Double(c)/Double(total)) | |
} | |
var expectedB_C: Double { | |
return Double(b) * (Double(_c)/Double(total)) | |
} | |
var expected_BC: Double { | |
return Double(_b) * (Double(c)/Double(total)) | |
} | |
var expected_B_C: Double { | |
return Double(_b) * (Double(_c)/Double(total)) | |
} | |
var χ²: ChiSquared { | |
let value = term(observed: bc, expected: expectedBC) + | |
term(observed: _bc, expected: expected_BC) + | |
term(observed: b_c, expected: expectedB_C) + | |
term(observed: _b_c, expected: expected_B_C) | |
return ChiSquared(value: value) | |
} | |
} | |
table.χ².value | |
///////////////////////////////////////////////////////////////////////////// | |
// ## Other measures: NULL Invariant (Note we can use absolute support to compute) | |
///////////////////////////////////////////////////////////////////////////// | |
extension ContingencyTable { | |
var allConf: Double { | |
return Double(bc) / Double(max(b,c)) | |
} | |
var jaccard: Double { | |
return Double(bc) / (Double(b)+Double(c)-Double(bc)) | |
} | |
var cosine: Double { | |
return Double(bc) / (Double(b)*Double(c)).squareRoot() | |
} | |
var kulczynski: Double { | |
return 0.5 * ( Double(bc)/Double(b) + Double(bc)/Double(c) ) | |
} | |
var maxConf: Double { | |
return max(Double(b)/Double(bc), Double(c)/Double(bc)) | |
} | |
var imbalanceRatio: Double { | |
return abs(Double(b)-Double(c)) / (Double(b)+Double(c)-Double(bc)) | |
} | |
func printAll(bLabel: String = "b", cLabel: String = "c") { | |
print(String(repeating: "=", count: 60)) | |
print("\(bLabel)\(cLabel)", bc, | |
"¬\(bLabel)\(cLabel)",_bc, | |
"\(bLabel)¬\(cLabel)", b_c, | |
"¬\(bLabel)¬\(cLabel)", _b_c) | |
print("χ²", χ²) | |
print("Lift", lift.value) | |
print(String(repeating: "-", count: 60)) | |
print("allConf", allConf) | |
print("jaccard", jaccard) | |
print("cosine", cosine) | |
print("kulczynski", kulczynski) | |
print("maxConf", maxConf) | |
print("imbalance", imbalanceRatio) | |
} | |
} | |
// Milk and Coffee examples b is milk, c is coffee | |
print("milk and coffee examples") | |
let milkAndCoffee = [ | |
ContingencyTable(bc: 10_000, b_c: 1_000, _bc: 1_000, _b_c: 100_000), | |
ContingencyTable(bc: 10_000, b_c: 1_000, _bc: 1_000, _b_c: 100), | |
ContingencyTable(bc: 100, b_c: 1_000, _bc: 1_000, _b_c: 100_000), | |
ContingencyTable(bc: 1_000, b_c: 1_000, _bc: 1_000, _b_c: 100_000), | |
ContingencyTable(bc: 1_000, b_c: 10_000, _bc: 100, _b_c: 100_000), | |
ContingencyTable(bc: 1_000, b_c: 100_000, _bc: 10, _b_c: 100_000)] | |
// Milk and coffee | |
milkAndCoffee.forEach { $0.printAll(bLabel: "m", cLabel: "c") } | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment