Skip to content

Instantly share code, notes, and snippets.

@sudipto80
Created January 22, 2016 08:18
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sudipto80/5d91060e998ab383b274 to your computer and use it in GitHub Desktop.
Save sudipto80/5d91060e998ab383b274 to your computer and use it in GitHub Desktop.
IR Lib
namespace IRLib
module asymSimilarity =
let private getABCD (first :string list)(second : string list) =
let all = Set.union (first |> Set.ofList) (second |> Set.ofList) |> Set.toList
let firstMatches = all |> List.map (fun t -> first |> List.contains t )
let secondMatches = all |> List.map (fun t -> second |> List.contains t )
let zipped = List.zip firstMatches secondMatches
let A = zipped |> List.filter (fun t -> fst t = true && snd t = true) |> List.length
let B = zipped |> List.filter (fun t -> fst t = false && snd t = true) |> List.length
let C = zipped |> List.filter (fun t -> fst t = true && snd t = false) |> List.length
let D = zipped |> List.filter (fun t -> fst t = false && snd t = false) |> List.length
[|A ; B ; C ;D|] |> Array.map float
let SS1 (first :string list)(second : string list) =
let abcd = getABCD first second
let a = abcd.[0]
let b = abcd.[1]
let c = abcd.[2]
let d = abcd.[3]
a / ( a + 2. * b + 2. * c)
let SS2 (first :string list)(second : string list) =
let abcd = getABCD first second
let a = abcd.[0]
let b = abcd.[1]
let c = abcd.[2]
let d = abcd.[3]
(2. * a + 2. * d )/ (float first.Length + a + d)
let SS3 (first :string list)(second : string list) =
let abcd = getABCD first second
let a = abcd.[0]
let b = abcd.[1]
let c = abcd.[2]
let d = abcd.[3]
0.25 * ( a / (a + b) + a / (a + c) + d / (b + d) + d / (c + d))
let SS4 (first :string list)(second : string list) =
let abcd = getABCD first second
let a = abcd.[0]
let b = abcd.[1]
let c = abcd.[2]
let d = abcd.[3]
(a / (sqrt ((a + b) * (a + c))) ) *
(d / (sqrt ((b + d) * (c + d))) )
let tanimotoCoeff (first : string list) ( second : string list ) =
let all = Set.union (first |> Set.ofList) (second |> Set.ofList) |> Set.toList
let firstMatches = all |> List.map (fun t -> first |> List.contains t )
let secondMatches = all |> List.map (fun t -> second |> List.contains t )
let zipped = List.zip firstMatches secondMatches
let Nc = zipped |> List.filter (fun t -> fst t = true && snd t = true) |> List.length
let Na = firstMatches |> List.filter (fun t -> t = true) |> List.length
let Nb = secondMatches |> List.filter (fun t -> t = true) |> List.length
float Nc / float (Na + Nb - Nc)
//For calculating similarity between asymteric binary attributes
let jaccardCoeff (first : string list) ( second : string list ) =
let all = Set.union (first |> Set.ofList) (second |> Set.ofList) |> Set.toList
let firstMatches = all |> List.map (fun t -> first |> List.contains t )
let secondMatches = all |> List.map (fun t -> second |> List.contains t )
let zipped = List.zip firstMatches secondMatches
let M11 = zipped |> List.filter (fun t -> fst t = true && snd t = true) |> List.length
let M01 = zipped |> List.filter (fun t -> fst t = false && snd t = true) |> List.length
let M10 = zipped |> List.filter (fun t -> fst t = true && snd t = false) |> List.length
let M00 = zipped |> List.filter (fun t -> fst t = false && snd t = false) |> List.length
let J = float M11 / float (M01 + M10 + M11)
J//return the Jaccard coefficient
//Dice coefficient
let diceCoeff (first : string list) ( second : string list ) =
let J = jaccardCoeff first second
2. * J / (1. + J)
//refer http://www.geo.arizona.edu/Antevs/ecol438/simindex.html (Simple Matching)
let simpleMatchingCoeff (first : string list) ( second : string list )=
let all = Set.union (first |> Set.ofList) (second |> Set.ofList) |> Set.toList
let firstMatches = all |> List.map (fun t -> first |> List.contains t )
let secondMatches = all |> List.map (fun t -> second |> List.contains t )
let zipped = List.zip firstMatches secondMatches
let M11 = zipped |> List.filter (fun t -> fst t = true && snd t = true) |> List.length
let M01 = zipped |> List.filter (fun t -> fst t = false && snd t = true) |> List.length
let M10 = zipped |> List.filter (fun t -> fst t = true && snd t = false) |> List.length
let M00 = zipped |> List.filter (fun t -> fst t = false && snd t = false) |> List.length
let numerator = M11 + M00
let denominator = M10 + M01 - M11 + M00
float numerator / float denominator
module similarity =
let private prod (a,b) = float a * float b
let private sqr x = float x * float x
let toPdf (h:int list)=
h |> List.map (fun t -> float t / float h.Length )
let histToPdf (h:int list)=
let sum = h |> List.sum
h |> List.map (fun t -> float t / float sum)
let listToPdf (aList : int list)=
aList |> List.countBy (fun t -> t)
|> List.map snd
|> toPdf
let sorensen (p:int list)(q:int list) =
let zipped = List.zip (p |> toPdf ) (q|>toPdf)
let numerator = zipped |> List.sumBy (fun t -> float (fst t - snd t))
let denominator = zipped |> List.sumBy (fun t -> float (fst t + snd t))
numerator / denominator
//Euclidean distnace
let euclidean (p:int list)(q:int list) =
List.zip (p|>toPdf) (q |> toPdf)|> List.sumBy (fun t -> float (fst t - snd t) ** 2.)
let euclideanBy (p:int list)(q:int list) =
List.zip (p|>toPdf) (q |> toPdf)|> List.sumBy (fun t -> float (fst t - snd t) ** 2.)
//Cityblock distance
let cityBlock (p:int list)(q:int list) =
List.zip (p|>toPdf) (q |> toPdf)|> List.map (fun t -> float (fst t - snd t)) |> List.sum
//Chebyshev distance
let chebyshev(p:int list)(q:int list) =
List.zip (p|>toPdf) (q |> toPdf)|> List.map( fun t -> abs (fst t - snd t)) |> List.max
//Soergel Disance
let soergel (p:int list)(q:int list) =
let zipped = List.zip (p|>toPdf) (q |> toPdf)
let numerator = zipped |> List.map(fun t -> abs( fst t - snd t)) |> List.sum
let denominator = zipped |> List.map (fun t -> max (fst t ) (snd t)) |> List.sum
float numerator / float denominator
let kulczynski_d (p:int list)(q:int list) =
let zipped = List.zip (p|>toPdf) (q |> toPdf)
let numerator = zipped |> List.map(fun t -> abs( fst t - snd t) )|> List.sum
let denominator = zipped |> List.map (fun t -> min (fst t ) (snd t)) |> List.sum
float numerator / float denominator
//kulczynski_s is the reciprocal of kulczynski_d distance
let kulczynski_s (p:int list)(q:int list) =
1. / kulczynski_d p q
let canberra (p:int list)(q:int list) =
let zipped = List.zip (p|>toPdf) (q |> toPdf)
let numerator = zipped |> List.map(fun t -> abs( fst t - snd t) )|> List.sum
let denominator = zipped |> List.map (fun t ->fst t + snd t) |> List.sum
float numerator / float denominator
//
let lorentzian (p:int list)(q:int list) =
List.zip (p|>toPdf) (q |> toPdf)
|> List.map (fun t -> Operators.log ( 1. + float( abs (fst t - snd t ))))
let intersection(p:int list ) (q: int list) =
List.zip (p|>toPdf) (q |> toPdf)
|> List.map (fun t -> min (fst t) (snd t)) |> List.sum
let waveHedges (p:int list)(q:int list)=
List.zip (p|>toPdf) (q |> toPdf)
|> List.map ( fun t -> 1. - float( min (fst t) (snd t))
/ float (max (fst t) (snd t)))
|> List.sum
//For color image retrieval
let czekanowski(p:int list)(q:int list) =
let zipped = List.zip (p|>toPdf) (q |> toPdf)
let numerator = 2. * (zipped |> List.map (fun t -> min (fst t) (snd t))
|> List.sum
|> float)
let denominator = zipped |> List.map (fun t -> fst t + snd t) |> List.sum |> float
numerator / denominator
let gower(p:int list)(q:int list)=
//I love this. Free flowing fluid conversion
//rather than cramping abs and fst t - snd t in a single line
let numerator = List.zip (p|>toPdf) (q |> toPdf)|> List.map (fun t -> fst t - snd t)
|> List.map (fun z -> abs z)
|> List.map float
|> List.sum
|> float
let denominator = float p.Length
numerator / denominator
let motyka(p:int list)(q:int list)=
let zipped = List.zip (p|>toPdf) (q |> toPdf)
let numerator = zipped |> List.map (fun t -> min (fst t) (snd t))
|> List.sum |> float
let denominator = zipped |> List.map (fun t -> fst t + snd t)
|> List.sum |> float
numerator / denominator
let jaccardPoint(p:int list)(q:int list)=
0.0
let dicePoint(p:int list)(q:int list)=
let zipped = List.zip (p|>toPdf) (q |> toPdf)
let numerator = zipped |> List.map (fun t -> fst t * snd t)
|> List.sum
|> float
let denominator = (p |> List.map sqr |> List.sum |> float) +
(q |> List.map sqr |> List.sum |> float)
numerator / denominator
let ruzicka (p:int list) (q:int list) =
let zipped = List.zip (p|>toPdf) (q |> toPdf)
let numerator = zipped |> List.map (fun t -> min (fst t) (snd t))
|> List.sum |> float
let denominator = zipped |> List.map (fun t -> max (fst t) (snd t))
|> List.sum |> float
numerator / denominator
let kumarHassebrook (p:int list) (q:int list) =
let sqr x = x * x
let zipped = List.zip (p|>toPdf) (q |> toPdf)
let numerator = zipped |> List.map (fun t -> fst t * snd t )
|> List.sum |> float
let denominator = (p |> List.map sqr |> List.sum |> float ) +
(q |> List.map sqr |> List.sum |> float ) - numerator
numerator / denominator
let innerProduct(p:int list)(q:int list)=
List.zip (p|>toPdf) (q |> toPdf)
|> List.sumBy (fun t -> fst t * snd t)
let harmonicMean(p:int list)(q:int list)=
2. * (List.zip (p|>toPdf) (q |> toPdf)
|> List.sumBy (fun t -> ( fst t * snd t )/
(fst t + snd t)))
//document text
let cosineSimilarity(p:int list)(q:int list)=
let zipped = List.zip p q //(p|>toPdf) (q |> toPdf)
let prod (x,y) = float x * float y
let numerator = zipped |> List.map prod |> List.sum
let denominator = sqrt ( p|> List.map sqr |> List.sum |> float) *
sqrt ( q|> List.map sqr |> List.sum |> float)
numerator / denominator
let fidelity(p:int list)(q:int list)=
List.zip (p|>toPdf) (q |> toPdf)
|> List.map (fun t -> float (fst t) * float (snd t))
|> List.map sqrt
|> List.sum
let bhattacharya(p:int list)(q:int list)=
-log (fidelity p q)
let squaredEuclidean (p:int list)(q:int list)=
let toSquare x = x * x
List.zip (p|>toPdf) (q |> toPdf)
|> List.sumBy (fun t-> (fst t - snd t) ** 2.0)
let hellinger(p:int list)(q:int list)=
let prod (a,b) = float a * float b
let product = List.zip (p|>toPdf) (q |> toPdf)
|> List.map prod
|> List.map sqrt
|> List.sum
let right = 1. - product
2. * right |> abs//taking this off will result in NaN
|> float
|> sqrt
//image color image retr - refer http://www.autom.teithe.gr/gr/drastiriotites/impro3Dpos/papers/ants_SCIA2007.pdf
let matusita(p:int list)(q:int list)=
let prod (a,b) = float a * float b
let value =2. - 2. *
( List.zip (p|>toPdf) (q |> toPdf)
|> List.map prod
|> List.map sqrt
|> List.sum)
value |> abs |> sqrt
let squarredChord(p:int list)(q:int list)=
List.zip (p|>toPdf) (q |> toPdf)
|> List.map (fun t -> sqrt (fst t ) - sqrt (snd t))
|> List.sum
let pearsonsChi(p:int list)(q:int list)=
List.zip (p|>toPdf) (q |> toPdf)
|> List.map (fun t -> (fst t - snd t ) ** 2.0 / snd t)
|> List.sum
let neymanChi(p:int list)(q:int list)=
List.zip (p|>toPdf) (q |> toPdf)
|> List.map (fun t -> (fst t - snd t ) ** 2.0 / fst t)
|> List.sum
let squaredChi(p:int list)(q:int list)=
List.zip (p|>toPdf) (q |> toPdf)
|> List.map (fun t -> (fst t - snd t ) ** 2.0 / (fst t + snd t))
|> List.sum
let probabilisticSymmetricChi(p:int list)(q:int list)=
2.0 * squaredChi p q
let divergence(p:int list)(q:int list)=
List.zip (p|>toPdf) (q |> toPdf)
|> List.map (fun t -> (fst t - snd t) ** 2. / (fst t + snd t) ** 2.)
|> List.sum
let clark(p:int list)(q:int list)=
sqrt( List.zip (p|>toPdf) (q |> toPdf)
|> List.map (fun t -> float( abs( fst t - snd t))
/ (float (fst t + snd t)))
|> List.map (fun t -> t * t)
|> List.sum )
let additiveSymmetricChi(p:int list)(q:int list)=
List.zip (p|>toPdf) (q |> toPdf)
|> List.map (fun t -> (fst t - snd t ) ** 2. * (fst t + snd t) / (fst t * snd t))
|> List.sum
let kullbackLeibler(p:int list)(q:int list)=
List.zip (p|>toPdf) (q |> toPdf)
|> List.map (fun t -> float( fst t) * log (float (fst t )/float (snd t)))
|> List.sum
let topose (p:int list)(q:int list) =
let sum ( a , b ) = a + b
//Higher order functions in action!
let firstByBoth ( a , b) = float a / float (sum (a , b))
let secondByBoth ( a , b)= float b / float (sum (a , b))
let zipped = List.zip (p|> List.map (fun t -> float t / float p.Length))
(q|> List.map (fun t -> float t / float p.Length))
zipped |> List.map ( fun t -> float (fst t) * firstByBoth t + float (snd t) * secondByBoth t)
|> List.sum
//color image
let jeffreys(p:int list)(q:int list)=
List.zip (p|>toPdf) (q |> toPdf)
|> List.map (fun t -> float( fst t - snd t) * log (float (fst t )/float (snd t)))
|> List.sum
let k_Divergence(p:int list)(q:int list)=
List.zip (p|>toPdf) (q |> toPdf)
|> List.map (fun t -> fst t * log (2.0 * fst t) / (fst t + snd t))
|> List.sum
let jensenShanon(p:int list)(q:int list)=
0.5 * topose p q
let jensenDifference (p:int list) (q:int list) =
let zipped = List.zip (p|>toPdf) (q |> toPdf)
let left = zipped |> List.map (fun t -> fst t * log (fst t) + snd t * log (snd t))
|> List.map (fun t -> t / 2.)
let right = zipped |> List.map (fun t -> ((fst t + snd t) * 0.5 ) * log (0.5*(fst t + snd t)))
List.zip left right |> List.map (fun t -> fst t - snd t) |> List.sum
let taneja(p:int list)(q:int list)=
List.zip (p|>toPdf) (q |> toPdf)
|> List.map ( fun t ->
float(fst t + snd t )/2.0
* log ( float ( fst t + snd t)/ 2. * sqrt (fst t * snd t)))
|> List.sum
let kumarJohnson(p:int list)(q:int list)=
List.zip (p|>toPdf) (q |> toPdf)
|> List.map (fun t -> (fst t ** 2. - snd t ** 2.) ** 2.
/ (2. * (fst t * snd t) ** 1.5))
|> List.sum
let jaccard (first : string list) (second:string list) =
let setOfFirst = first |> Set.ofList
let setOfSecond = second |> Set.ofList
match first.Length + second.Length with
| 0 -> 0.0 //If the union is zero there can't be any match
//But in set theory two empty sets are considered equal
//So we can return 0.0 to
| _ -> float (Set.intersect setOfFirst setOfSecond).Count /
float (Set.union setOfFirst setOfSecond).Count
//Find jaccard index for all the elements
let jaccardForAll (matchThis : string list) (aginst : (string list) list) =
aginst |> List.map ( fun t -> (t,jaccard matchThis t))
|> List.sortByDescending (fun t -> snd t)
let jaccardDistance (first : string list) (second : string list) =
1. - jaccard first second
//A generic tversky index function
let tverskyIndexG (first :'a list)(second : 'a list) =
let X = Set.ofList first
let Y = Set.ofList second
let alpha = 0.34
let beta = 0.42
let I = Set.intersect X Y |> Set.count
let Diff = X - Y |> Set.count
let Diff2 = Y - X |> Set.count
float I / float (I + Diff + Diff2)
//Calculates tversky index for couple of lists
let tverskyIndex (first : string list) ( second : string list ) =
let X = Set.ofList first
let Y = Set.ofList second
let alpha = 0.34
let beta = 0.42
let I = Set.intersect X Y |> Set.count
let Diff = X - Y |> Set.count
let Diff2 = Y - X |> Set.count
float I / float (I + Diff + Diff2)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment