Skip to content

Instantly share code, notes, and snippets.

@iha2
Created January 21, 2017 07:15
Show Gist options
  • Save iha2/b19d67e0130fbcf0a33f112f1771ce95 to your computer and use it in GitHub Desktop.
Save iha2/b19d67e0130fbcf0a33f112f1771ce95 to your computer and use it in GitHub Desktop.
class PureHiCluster(clusters: Vector[Cluster], distances: Map[(String, String), Double]) {
def generateClusterCalculations(clusters: Vector[Cluster], distances: Map[(String, String), Double], pairData: PairData) = {
def loop(clusters: Vector[Cluster], futureResults: List[Future[(Map[(String, String), Double], PairData)]]): List[Future[(Map[(String, String), Double], PairData)]] =
clusters match {
case Vector() => futureResults
case x +: xs => loop(xs, Future[(Map[(String, String), Double], PairData)] { computeDistances(clusters.dropWhile { y => y.id != x.id}, distances, x, pairData) } +: futureResults)
}
loop(clusters, List())
}
def updateMostSimilar(acc: (Map[(String, String), Double], PairData), futureResult: (Map[(String, String), Double], PairData)) = {
val distanceAsSeq = acc._1.toSeq ++ futureResult._1.toSeq
val newUniqueDistances = Map(distanceAsSeq.groupBy(_._1).mapValues(_.head._2).toSeq: _*)
if (futureResult._2.distance < acc._2.distance) { (newUniqueDistances, futureResult._2) } else { (newUniqueDistances, acc._2) }
}
def start(starterIndex: Int, clusters:Vector[Cluster] = clusters): Cluster = {
val (vec1, vec2) = (clusters.head, clusters.tail.head)
val pairData = new PairData((vec1.id, vec2.id), Distance.pearsonCorrelationScore( vec1.vec, vec2.vec))
val calculationFutures = generateClusterCalculations(clusters, distances, pairData)
// Here I'm assuming all the futures will return successfully since they are simply calculations
val futureResults = Future.sequence(calculationFutures)
val x = Await.result(futureResults, 600.seconds)
val result = x.foldLeft( (Map[(String, String), Double](), pairData) )((acc, y) => updateMostSimilar(acc, y))
val nearestClusters = clusters.filter( x => x.id == result._2.clusterSetId._1 || x.id == result._2.clusterSetId._2 )
if (nearestClusters.length != 2) {
throw new Exception("the closest neighbour clusters were not found ")
} else {
val mergedVector = nearestClusters.head.vec.merge(nearestClusters.tail.head.vec)
val newCluster = Cluster(mergedVector, nearestClusters.head, nearestClusters.tail.head, result._2.distance , starterIndex.toString())
val updatingClusters = newCluster +: clusters.filter( x => !nearestClusters.map(x => x.id).contains(x.id))
if (updatingClusters.length == 1) {
println(updatingClusters.head)
updatingClusters.head
} else {
println(updatingClusters.length + " remaining .....")
start(starterIndex-1, updatingClusters)
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment