Skip to content

Instantly share code, notes, and snippets.

@rayfix
Created January 25, 2019 20:33
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save rayfix/fef76f993c63ee8961a9b31c606b48a7 to your computer and use it in GitHub Desktop.
Save rayfix/fef76f993c63ee8961a9b31c606b48a7 to your computer and use it in GitHub Desktop.
// The contingency table contains the support counts for each item
// in comparision with one another.
struct ContingencyTable {
var bc: Int // B and C
var b_c: Int // B and not C
var _bc: Int // not B and C
var _b_c: Int // not B and not C
var b: Int {
return bc + b_c
}
var _b: Int {
return _bc + _b_c
}
var c: Int {
return bc + _bc
}
var _c: Int {
return b_c + _b_c
}
var total: Int {
precondition(c + _c == b + _b)
return c + _c
}
}
let table = ContingencyTable(bc: 400, b_c: 200, _bc: 350, _b_c: 50)
/////////////////////////////////////////////////////////////////////////////
/// LIFT
/////////////////////////////////////////////////////////////////////////////
// Lift is a statisical measure where A and B are independent
// if the value is 1, positively correlated if greater than 1
// negatively correlated if less than 1.
struct Lift {
var value: Double
var isIndependent: Bool {
return abs(value - 1) < 1e-10 // very close to one
}
var isPositivelyCorrelated: Bool {
return value > 1
}
var isNegativelyCorrelated: Bool {
return value < 1
}
// relative support based computation
static func compute(sAB: Double, sA: Double, sB: Double) -> Lift {
return Lift(value: sAB/(sA*sB))
}
// absolute support based computation
static func compute(ab: Int, a: Int, b: Int, total: Int) -> Lift {
let total = Double(total)
let sAB = Double(ab)/total
let sA = Double(a)/total
let sB = Double(b)/total
return Lift.compute(sAB: sAB, sA: sA, sB: sB)
}
}
// Make a method on the contingency table
extension ContingencyTable {
var lift: Lift {
return Lift.compute(ab: bc, a: b, b: c, total: total)
}
}
// Kick the tires
// Use the table to compute A, B
table.lift.value // 0.89
// we can compute outside the table if we want, the slides do this
// lift(B, C)
let liftBC = Lift.compute(ab: table.bc, a: table.b, b: table.c, total: table.total)
liftBC.value // 0.89
liftBC.isNegativelyCorrelated // true
liftBC.isIndependent // false
liftBC.isPositivelyCorrelated // false
let liftBNotC = Lift.compute(ab: table.b_c, a: table.b, b: table._c, total: table.total)
liftBNotC.value // 1.33
liftBNotC.isPositivelyCorrelated // true
/////////////////////////////////////////////////////////////////////////////
// ## χ²
/////////////////////////////////////////////////////////////////////////////
struct ChiSquared {
var value: Double
var isIndependent: Bool {
return abs(value) < .ulpOfOne // very close to zero
}
var isCorrelated: Bool {
return value > 0 // can't tell positive or negative correlation
}
}
extension ContingencyTable {
private func term(observed: Int, expected: Double) -> Double {
let diff = Double(observed) - expected
return diff * diff / expected
}
var expectedBC: Double {
return Double(b) * (Double(c)/Double(total))
}
var expectedB_C: Double {
return Double(b) * (Double(_c)/Double(total))
}
var expected_BC: Double {
return Double(_b) * (Double(c)/Double(total))
}
var expected_B_C: Double {
return Double(_b) * (Double(_c)/Double(total))
}
var χ²: ChiSquared {
let value = term(observed: bc, expected: expectedBC) +
term(observed: _bc, expected: expected_BC) +
term(observed: b_c, expected: expectedB_C) +
term(observed: _b_c, expected: expected_B_C)
return ChiSquared(value: value)
}
}
table.χ².value
/////////////////////////////////////////////////////////////////////////////
// ## Other measures: NULL Invariant (Note we can use absolute support to compute)
/////////////////////////////////////////////////////////////////////////////
extension ContingencyTable {
var allConf: Double {
return Double(bc) / Double(max(b,c))
}
var jaccard: Double {
return Double(bc) / (Double(b)+Double(c)-Double(bc))
}
var cosine: Double {
return Double(bc) / (Double(b)*Double(c)).squareRoot()
}
var kulczynski: Double {
return 0.5 * ( Double(bc)/Double(b) + Double(bc)/Double(c) )
}
var maxConf: Double {
return max(Double(b)/Double(bc), Double(c)/Double(bc))
}
var imbalanceRatio: Double {
return abs(Double(b)-Double(c)) / (Double(b)+Double(c)-Double(bc))
}
func printAll(bLabel: String = "b", cLabel: String = "c") {
print(String(repeating: "=", count: 60))
print("\(bLabel)\(cLabel)", bc,
"¬\(bLabel)\(cLabel)",_bc,
"\(bLabel)¬\(cLabel)", b_c,
"¬\(bLabel)¬\(cLabel)", _b_c)
print("χ²", χ²)
print("Lift", lift.value)
print(String(repeating: "-", count: 60))
print("allConf", allConf)
print("jaccard", jaccard)
print("cosine", cosine)
print("kulczynski", kulczynski)
print("maxConf", maxConf)
print("imbalance", imbalanceRatio)
}
}
// Milk and Coffee examples b is milk, c is coffee
print("milk and coffee examples")
let milkAndCoffee = [
ContingencyTable(bc: 10_000, b_c: 1_000, _bc: 1_000, _b_c: 100_000),
ContingencyTable(bc: 10_000, b_c: 1_000, _bc: 1_000, _b_c: 100),
ContingencyTable(bc: 100, b_c: 1_000, _bc: 1_000, _b_c: 100_000),
ContingencyTable(bc: 1_000, b_c: 1_000, _bc: 1_000, _b_c: 100_000),
ContingencyTable(bc: 1_000, b_c: 10_000, _bc: 100, _b_c: 100_000),
ContingencyTable(bc: 1_000, b_c: 100_000, _bc: 10, _b_c: 100_000)]
// Milk and coffee
milkAndCoffee.forEach { $0.printAll(bLabel: "m", cLabel: "c") }
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment