Skip to content

Instantly share code, notes, and snippets.

@MishaelRosenthal
Last active August 29, 2015 13:57
Show Gist options
  • Save MishaelRosenthal/9740199 to your computer and use it in GitHub Desktop.
Save MishaelRosenthal/9740199 to your computer and use it in GitHub Desktop.
package com.liveperson.predictivedialer.examples.misc
/**
* Created with IntelliJ IDEA.
* User: mishaelr
* Date: 3/23/14
* Time: 11:50 AM
*
* A two pass implementation
* http://www.cs.yale.edu/homes/el327/datamining2011aFiles/ASimpleAlgorithmForFindingFrequentElementsInStreamsAndBags.pdf
*/
object FindingFrequentElements {
implicit class TwoPassFindingFrequentElements[T](trav: Iterable[T]){
def findFrequent(theta: Double) = {
val (superSet, travSize) = trav.foldLeft((Map[T, Int](), 0)){case ((acc, i), x) =>
val newAcc = if(acc.contains(x))
acc.updated(x, acc(x) + 1)
else {
val updated = acc + (x -> 1)
if(updated.size > 1/theta)
updated.mapValues(_ - 1).filter(_._2 > 0)
else
updated
}
(newAcc, i + 1)
}
val travFiltered = trav.view.filter(superSet.contains)
val secondPass = travFiltered.foldLeft(superSet.mapValues(_ => 0)){case (acc, x) => acc.updated(x, acc(x) + 1)}
secondPass.filter{case(_, count) => count >= travSize * theta}.keySet
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment