Created
January 22, 2016 08:18
-
-
Save sudipto80/5d91060e998ab383b274 to your computer and use it in GitHub Desktop.
IR Lib
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
namespace IRLib | |
module asymSimilarity = | |
let private getABCD (first :string list)(second : string list) = | |
let all = Set.union (first |> Set.ofList) (second |> Set.ofList) |> Set.toList | |
let firstMatches = all |> List.map (fun t -> first |> List.contains t ) | |
let secondMatches = all |> List.map (fun t -> second |> List.contains t ) | |
let zipped = List.zip firstMatches secondMatches | |
let A = zipped |> List.filter (fun t -> fst t = true && snd t = true) |> List.length | |
let B = zipped |> List.filter (fun t -> fst t = false && snd t = true) |> List.length | |
let C = zipped |> List.filter (fun t -> fst t = true && snd t = false) |> List.length | |
let D = zipped |> List.filter (fun t -> fst t = false && snd t = false) |> List.length | |
[|A ; B ; C ;D|] |> Array.map float | |
let SS1 (first :string list)(second : string list) = | |
let abcd = getABCD first second | |
let a = abcd.[0] | |
let b = abcd.[1] | |
let c = abcd.[2] | |
let d = abcd.[3] | |
a / ( a + 2. * b + 2. * c) | |
let SS2 (first :string list)(second : string list) = | |
let abcd = getABCD first second | |
let a = abcd.[0] | |
let b = abcd.[1] | |
let c = abcd.[2] | |
let d = abcd.[3] | |
(2. * a + 2. * d )/ (float first.Length + a + d) | |
let SS3 (first :string list)(second : string list) = | |
let abcd = getABCD first second | |
let a = abcd.[0] | |
let b = abcd.[1] | |
let c = abcd.[2] | |
let d = abcd.[3] | |
0.25 * ( a / (a + b) + a / (a + c) + d / (b + d) + d / (c + d)) | |
let SS4 (first :string list)(second : string list) = | |
let abcd = getABCD first second | |
let a = abcd.[0] | |
let b = abcd.[1] | |
let c = abcd.[2] | |
let d = abcd.[3] | |
(a / (sqrt ((a + b) * (a + c))) ) * | |
(d / (sqrt ((b + d) * (c + d))) ) | |
let tanimotoCoeff (first : string list) ( second : string list ) = | |
let all = Set.union (first |> Set.ofList) (second |> Set.ofList) |> Set.toList | |
let firstMatches = all |> List.map (fun t -> first |> List.contains t ) | |
let secondMatches = all |> List.map (fun t -> second |> List.contains t ) | |
let zipped = List.zip firstMatches secondMatches | |
let Nc = zipped |> List.filter (fun t -> fst t = true && snd t = true) |> List.length | |
let Na = firstMatches |> List.filter (fun t -> t = true) |> List.length | |
let Nb = secondMatches |> List.filter (fun t -> t = true) |> List.length | |
float Nc / float (Na + Nb - Nc) | |
//For calculating similarity between asymteric binary attributes | |
let jaccardCoeff (first : string list) ( second : string list ) = | |
let all = Set.union (first |> Set.ofList) (second |> Set.ofList) |> Set.toList | |
let firstMatches = all |> List.map (fun t -> first |> List.contains t ) | |
let secondMatches = all |> List.map (fun t -> second |> List.contains t ) | |
let zipped = List.zip firstMatches secondMatches | |
let M11 = zipped |> List.filter (fun t -> fst t = true && snd t = true) |> List.length | |
let M01 = zipped |> List.filter (fun t -> fst t = false && snd t = true) |> List.length | |
let M10 = zipped |> List.filter (fun t -> fst t = true && snd t = false) |> List.length | |
let M00 = zipped |> List.filter (fun t -> fst t = false && snd t = false) |> List.length | |
let J = float M11 / float (M01 + M10 + M11) | |
J//return the Jaccard coefficient | |
//Dice coefficient | |
let diceCoeff (first : string list) ( second : string list ) = | |
let J = jaccardCoeff first second | |
2. * J / (1. + J) | |
//refer http://www.geo.arizona.edu/Antevs/ecol438/simindex.html (Simple Matching) | |
let simpleMatchingCoeff (first : string list) ( second : string list )= | |
let all = Set.union (first |> Set.ofList) (second |> Set.ofList) |> Set.toList | |
let firstMatches = all |> List.map (fun t -> first |> List.contains t ) | |
let secondMatches = all |> List.map (fun t -> second |> List.contains t ) | |
let zipped = List.zip firstMatches secondMatches | |
let M11 = zipped |> List.filter (fun t -> fst t = true && snd t = true) |> List.length | |
let M01 = zipped |> List.filter (fun t -> fst t = false && snd t = true) |> List.length | |
let M10 = zipped |> List.filter (fun t -> fst t = true && snd t = false) |> List.length | |
let M00 = zipped |> List.filter (fun t -> fst t = false && snd t = false) |> List.length | |
let numerator = M11 + M00 | |
let denominator = M10 + M01 - M11 + M00 | |
float numerator / float denominator | |
module similarity = | |
let private prod (a,b) = float a * float b | |
let private sqr x = float x * float x | |
let toPdf (h:int list)= | |
h |> List.map (fun t -> float t / float h.Length ) | |
let histToPdf (h:int list)= | |
let sum = h |> List.sum | |
h |> List.map (fun t -> float t / float sum) | |
let listToPdf (aList : int list)= | |
aList |> List.countBy (fun t -> t) | |
|> List.map snd | |
|> toPdf | |
let sorensen (p:int list)(q:int list) = | |
let zipped = List.zip (p |> toPdf ) (q|>toPdf) | |
let numerator = zipped |> List.sumBy (fun t -> float (fst t - snd t)) | |
let denominator = zipped |> List.sumBy (fun t -> float (fst t + snd t)) | |
numerator / denominator | |
//Euclidean distnace | |
let euclidean (p:int list)(q:int list) = | |
List.zip (p|>toPdf) (q |> toPdf)|> List.sumBy (fun t -> float (fst t - snd t) ** 2.) | |
let euclideanBy (p:int list)(q:int list) = | |
List.zip (p|>toPdf) (q |> toPdf)|> List.sumBy (fun t -> float (fst t - snd t) ** 2.) | |
//Cityblock distance | |
let cityBlock (p:int list)(q:int list) = | |
List.zip (p|>toPdf) (q |> toPdf)|> List.map (fun t -> float (fst t - snd t)) |> List.sum | |
//Chebyshev distance | |
let chebyshev(p:int list)(q:int list) = | |
List.zip (p|>toPdf) (q |> toPdf)|> List.map( fun t -> abs (fst t - snd t)) |> List.max | |
//Soergel Disance | |
let soergel (p:int list)(q:int list) = | |
let zipped = List.zip (p|>toPdf) (q |> toPdf) | |
let numerator = zipped |> List.map(fun t -> abs( fst t - snd t)) |> List.sum | |
let denominator = zipped |> List.map (fun t -> max (fst t ) (snd t)) |> List.sum | |
float numerator / float denominator | |
let kulczynski_d (p:int list)(q:int list) = | |
let zipped = List.zip (p|>toPdf) (q |> toPdf) | |
let numerator = zipped |> List.map(fun t -> abs( fst t - snd t) )|> List.sum | |
let denominator = zipped |> List.map (fun t -> min (fst t ) (snd t)) |> List.sum | |
float numerator / float denominator | |
//kulczynski_s is the reciprocal of kulczynski_d distance | |
let kulczynski_s (p:int list)(q:int list) = | |
1. / kulczynski_d p q | |
let canberra (p:int list)(q:int list) = | |
let zipped = List.zip (p|>toPdf) (q |> toPdf) | |
let numerator = zipped |> List.map(fun t -> abs( fst t - snd t) )|> List.sum | |
let denominator = zipped |> List.map (fun t ->fst t + snd t) |> List.sum | |
float numerator / float denominator | |
// | |
let lorentzian (p:int list)(q:int list) = | |
List.zip (p|>toPdf) (q |> toPdf) | |
|> List.map (fun t -> Operators.log ( 1. + float( abs (fst t - snd t )))) | |
let intersection(p:int list ) (q: int list) = | |
List.zip (p|>toPdf) (q |> toPdf) | |
|> List.map (fun t -> min (fst t) (snd t)) |> List.sum | |
let waveHedges (p:int list)(q:int list)= | |
List.zip (p|>toPdf) (q |> toPdf) | |
|> List.map ( fun t -> 1. - float( min (fst t) (snd t)) | |
/ float (max (fst t) (snd t))) | |
|> List.sum | |
//For color image retrieval | |
let czekanowski(p:int list)(q:int list) = | |
let zipped = List.zip (p|>toPdf) (q |> toPdf) | |
let numerator = 2. * (zipped |> List.map (fun t -> min (fst t) (snd t)) | |
|> List.sum | |
|> float) | |
let denominator = zipped |> List.map (fun t -> fst t + snd t) |> List.sum |> float | |
numerator / denominator | |
let gower(p:int list)(q:int list)= | |
//I love this. Free flowing fluid conversion | |
//rather than cramping abs and fst t - snd t in a single line | |
let numerator = List.zip (p|>toPdf) (q |> toPdf)|> List.map (fun t -> fst t - snd t) | |
|> List.map (fun z -> abs z) | |
|> List.map float | |
|> List.sum | |
|> float | |
let denominator = float p.Length | |
numerator / denominator | |
let motyka(p:int list)(q:int list)= | |
let zipped = List.zip (p|>toPdf) (q |> toPdf) | |
let numerator = zipped |> List.map (fun t -> min (fst t) (snd t)) | |
|> List.sum |> float | |
let denominator = zipped |> List.map (fun t -> fst t + snd t) | |
|> List.sum |> float | |
numerator / denominator | |
let jaccardPoint(p:int list)(q:int list)= | |
0.0 | |
let dicePoint(p:int list)(q:int list)= | |
let zipped = List.zip (p|>toPdf) (q |> toPdf) | |
let numerator = zipped |> List.map (fun t -> fst t * snd t) | |
|> List.sum | |
|> float | |
let denominator = (p |> List.map sqr |> List.sum |> float) + | |
(q |> List.map sqr |> List.sum |> float) | |
numerator / denominator | |
let ruzicka (p:int list) (q:int list) = | |
let zipped = List.zip (p|>toPdf) (q |> toPdf) | |
let numerator = zipped |> List.map (fun t -> min (fst t) (snd t)) | |
|> List.sum |> float | |
let denominator = zipped |> List.map (fun t -> max (fst t) (snd t)) | |
|> List.sum |> float | |
numerator / denominator | |
let kumarHassebrook (p:int list) (q:int list) = | |
let sqr x = x * x | |
let zipped = List.zip (p|>toPdf) (q |> toPdf) | |
let numerator = zipped |> List.map (fun t -> fst t * snd t ) | |
|> List.sum |> float | |
let denominator = (p |> List.map sqr |> List.sum |> float ) + | |
(q |> List.map sqr |> List.sum |> float ) - numerator | |
numerator / denominator | |
let innerProduct(p:int list)(q:int list)= | |
List.zip (p|>toPdf) (q |> toPdf) | |
|> List.sumBy (fun t -> fst t * snd t) | |
let harmonicMean(p:int list)(q:int list)= | |
2. * (List.zip (p|>toPdf) (q |> toPdf) | |
|> List.sumBy (fun t -> ( fst t * snd t )/ | |
(fst t + snd t))) | |
//document text | |
let cosineSimilarity(p:int list)(q:int list)= | |
let zipped = List.zip p q //(p|>toPdf) (q |> toPdf) | |
let prod (x,y) = float x * float y | |
let numerator = zipped |> List.map prod |> List.sum | |
let denominator = sqrt ( p|> List.map sqr |> List.sum |> float) * | |
sqrt ( q|> List.map sqr |> List.sum |> float) | |
numerator / denominator | |
let fidelity(p:int list)(q:int list)= | |
List.zip (p|>toPdf) (q |> toPdf) | |
|> List.map (fun t -> float (fst t) * float (snd t)) | |
|> List.map sqrt | |
|> List.sum | |
let bhattacharya(p:int list)(q:int list)= | |
-log (fidelity p q) | |
let squaredEuclidean (p:int list)(q:int list)= | |
let toSquare x = x * x | |
List.zip (p|>toPdf) (q |> toPdf) | |
|> List.sumBy (fun t-> (fst t - snd t) ** 2.0) | |
let hellinger(p:int list)(q:int list)= | |
let prod (a,b) = float a * float b | |
let product = List.zip (p|>toPdf) (q |> toPdf) | |
|> List.map prod | |
|> List.map sqrt | |
|> List.sum | |
let right = 1. - product | |
2. * right |> abs//taking this off will result in NaN | |
|> float | |
|> sqrt | |
//image color image retr - refer http://www.autom.teithe.gr/gr/drastiriotites/impro3Dpos/papers/ants_SCIA2007.pdf | |
let matusita(p:int list)(q:int list)= | |
let prod (a,b) = float a * float b | |
let value =2. - 2. * | |
( List.zip (p|>toPdf) (q |> toPdf) | |
|> List.map prod | |
|> List.map sqrt | |
|> List.sum) | |
value |> abs |> sqrt | |
let squarredChord(p:int list)(q:int list)= | |
List.zip (p|>toPdf) (q |> toPdf) | |
|> List.map (fun t -> sqrt (fst t ) - sqrt (snd t)) | |
|> List.sum | |
let pearsonsChi(p:int list)(q:int list)= | |
List.zip (p|>toPdf) (q |> toPdf) | |
|> List.map (fun t -> (fst t - snd t ) ** 2.0 / snd t) | |
|> List.sum | |
let neymanChi(p:int list)(q:int list)= | |
List.zip (p|>toPdf) (q |> toPdf) | |
|> List.map (fun t -> (fst t - snd t ) ** 2.0 / fst t) | |
|> List.sum | |
let squaredChi(p:int list)(q:int list)= | |
List.zip (p|>toPdf) (q |> toPdf) | |
|> List.map (fun t -> (fst t - snd t ) ** 2.0 / (fst t + snd t)) | |
|> List.sum | |
let probabilisticSymmetricChi(p:int list)(q:int list)= | |
2.0 * squaredChi p q | |
let divergence(p:int list)(q:int list)= | |
List.zip (p|>toPdf) (q |> toPdf) | |
|> List.map (fun t -> (fst t - snd t) ** 2. / (fst t + snd t) ** 2.) | |
|> List.sum | |
let clark(p:int list)(q:int list)= | |
sqrt( List.zip (p|>toPdf) (q |> toPdf) | |
|> List.map (fun t -> float( abs( fst t - snd t)) | |
/ (float (fst t + snd t))) | |
|> List.map (fun t -> t * t) | |
|> List.sum ) | |
let additiveSymmetricChi(p:int list)(q:int list)= | |
List.zip (p|>toPdf) (q |> toPdf) | |
|> List.map (fun t -> (fst t - snd t ) ** 2. * (fst t + snd t) / (fst t * snd t)) | |
|> List.sum | |
let kullbackLeibler(p:int list)(q:int list)= | |
List.zip (p|>toPdf) (q |> toPdf) | |
|> List.map (fun t -> float( fst t) * log (float (fst t )/float (snd t))) | |
|> List.sum | |
let topose (p:int list)(q:int list) = | |
let sum ( a , b ) = a + b | |
//Higher order functions in action! | |
let firstByBoth ( a , b) = float a / float (sum (a , b)) | |
let secondByBoth ( a , b)= float b / float (sum (a , b)) | |
let zipped = List.zip (p|> List.map (fun t -> float t / float p.Length)) | |
(q|> List.map (fun t -> float t / float p.Length)) | |
zipped |> List.map ( fun t -> float (fst t) * firstByBoth t + float (snd t) * secondByBoth t) | |
|> List.sum | |
//color image | |
let jeffreys(p:int list)(q:int list)= | |
List.zip (p|>toPdf) (q |> toPdf) | |
|> List.map (fun t -> float( fst t - snd t) * log (float (fst t )/float (snd t))) | |
|> List.sum | |
let k_Divergence(p:int list)(q:int list)= | |
List.zip (p|>toPdf) (q |> toPdf) | |
|> List.map (fun t -> fst t * log (2.0 * fst t) / (fst t + snd t)) | |
|> List.sum | |
let jensenShanon(p:int list)(q:int list)= | |
0.5 * topose p q | |
let jensenDifference (p:int list) (q:int list) = | |
let zipped = List.zip (p|>toPdf) (q |> toPdf) | |
let left = zipped |> List.map (fun t -> fst t * log (fst t) + snd t * log (snd t)) | |
|> List.map (fun t -> t / 2.) | |
let right = zipped |> List.map (fun t -> ((fst t + snd t) * 0.5 ) * log (0.5*(fst t + snd t))) | |
List.zip left right |> List.map (fun t -> fst t - snd t) |> List.sum | |
let taneja(p:int list)(q:int list)= | |
List.zip (p|>toPdf) (q |> toPdf) | |
|> List.map ( fun t -> | |
float(fst t + snd t )/2.0 | |
* log ( float ( fst t + snd t)/ 2. * sqrt (fst t * snd t))) | |
|> List.sum | |
let kumarJohnson(p:int list)(q:int list)= | |
List.zip (p|>toPdf) (q |> toPdf) | |
|> List.map (fun t -> (fst t ** 2. - snd t ** 2.) ** 2. | |
/ (2. * (fst t * snd t) ** 1.5)) | |
|> List.sum | |
let jaccard (first : string list) (second:string list) = | |
let setOfFirst = first |> Set.ofList | |
let setOfSecond = second |> Set.ofList | |
match first.Length + second.Length with | |
| 0 -> 0.0 //If the union is zero there can't be any match | |
//But in set theory two empty sets are considered equal | |
//So we can return 0.0 to | |
| _ -> float (Set.intersect setOfFirst setOfSecond).Count / | |
float (Set.union setOfFirst setOfSecond).Count | |
//Find jaccard index for all the elements | |
let jaccardForAll (matchThis : string list) (aginst : (string list) list) = | |
aginst |> List.map ( fun t -> (t,jaccard matchThis t)) | |
|> List.sortByDescending (fun t -> snd t) | |
let jaccardDistance (first : string list) (second : string list) = | |
1. - jaccard first second | |
//A generic tversky index function | |
let tverskyIndexG (first :'a list)(second : 'a list) = | |
let X = Set.ofList first | |
let Y = Set.ofList second | |
let alpha = 0.34 | |
let beta = 0.42 | |
let I = Set.intersect X Y |> Set.count | |
let Diff = X - Y |> Set.count | |
let Diff2 = Y - X |> Set.count | |
float I / float (I + Diff + Diff2) | |
//Calculates tversky index for couple of lists | |
let tverskyIndex (first : string list) ( second : string list ) = | |
let X = Set.ofList first | |
let Y = Set.ofList second | |
let alpha = 0.34 | |
let beta = 0.42 | |
let I = Set.intersect X Y |> Set.count | |
let Diff = X - Y |> Set.count | |
let Diff2 = Y - X |> Set.count | |
float I / float (I + Diff + Diff2) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment