Created
March 6, 2018 16:50
-
-
Save anthonynsimon/351ff8122c63fc492fcc396d904fb012 to your computer and use it in GitHub Desktop.
Sorted Merge Join in both FP and OOP
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import scala.collection.mutable | |
object SortedRelation { | |
def apply[A, B](seq: Seq[A], key: (A) => B)(implicit ordering: Ordering[B]): SortedRelation[A, B] = new SortedRelation(seq, key) | |
} | |
class SortedRelation[A, B](val seq: Seq[A], val key: (A) => B)(implicit ordering: Ordering[B]) { | |
private val sorted = seq.sortBy(key) | |
private var pointer = 0 | |
private val len = seq.length | |
def advance(): Unit = pointer += 1 | |
def hasRemaining: Boolean = pointer < len | |
def hasNext: Boolean = pointer + 1 < len | |
def peekNext(): A = sorted(pointer + 1) | |
def peek(): A = sorted(pointer) | |
def reset(): Unit = pointer = 0 | |
} | |
def join[A, B, C](leftRelation: SortedRelation[A, C], rightRelation: SortedRelation[B, C])(implicit ordering: Ordering[C]): Seq[(A, B)] = { | |
val result: mutable.ListBuffer[(A, B)] = mutable.ListBuffer.empty[(A, B)] | |
def isMatch(left: A, right: B): Boolean = ordering.equiv(leftRelation.key(left), rightRelation.key(right)) | |
while (leftRelation.hasRemaining && rightRelation.hasRemaining) { | |
val left = leftRelation.peek() | |
val right = rightRelation.peek() | |
if (isMatch(left, right)) { | |
result += ((left, right)) | |
if (leftRelation.hasNext && isMatch(leftRelation.peekNext(), right)) { | |
leftRelation.advance() | |
} else { | |
rightRelation.advance() | |
} | |
} else if (ordering.lt(leftRelation.key(left), rightRelation.key(right))) { | |
leftRelation.advance() | |
} else { | |
rightRelation.advance() | |
} | |
} | |
result | |
} | |
// Assumes both relations are already sorted | |
def join[A, B, C](leftRelation: Seq[A], rightRelation: Seq[B], leftKey: A => C, rightKey: B => C)(implicit ordering: Ordering[C]): Seq[(A, B)] = { | |
def isMatch(left: A, right: B): Boolean = ordering.equiv(leftKey(left), rightKey(right)) | |
// Pick first element of both relations if exists | |
(leftRelation.headOption, rightRelation.headOption) match { | |
// Both relations have remaining elements, try to join | |
case (Some(left), Some(right)) => { | |
// Both heads match on join key | |
if (isMatch(left, right)) { | |
val result = Seq((left, right)) | |
// Try to join the remaining elements of the relations | |
val restJoin = leftRelation.tail.headOption match { | |
// Lookahead on the left | |
case Some(nextLeft) => { | |
// Left side has a next element | |
if (isMatch(nextLeft, right)) { | |
// If next left matches on join key with current right, | |
// then advance left relation only | |
join(leftRelation.tail, rightRelation, leftKey, rightKey) | |
} else { | |
// Otherwise advance right side and leave left relation at current head | |
join(leftRelation, rightRelation.tail, leftKey, rightKey) | |
} | |
} | |
// Left side has no next element, no join possible from remaining elements in relations | |
case _ => Seq.empty | |
} | |
// Result is current heads plus rest of relations joined | |
result ++ restJoin | |
} else if (ordering.lt(leftKey(left), rightKey(right))) { | |
join(leftRelation.tail, rightRelation, leftKey, rightKey) | |
} else { | |
join(leftRelation, rightRelation.tail, leftKey, rightKey) | |
} | |
} | |
// One or both relations have no elements, no join possible so result is empty | |
case _ => Seq.empty | |
} | |
} | |
case class Restaurant(name: String, city: String) | |
case class City(name: String, country: String) | |
val restaurants = Seq( | |
Restaurant("Dudu", "Munich"), | |
Restaurant("Prime 51", "Detroit"), | |
Restaurant("The Kitchen", "Ko Lanta"), | |
Restaurant("La Bruja de Escazu", "San Jose"), | |
Restaurant("McDonald's", "Tokyo"), | |
) | |
val cities = Seq( | |
City("Tokyo", "Japan"), | |
City("Osaka", "Japan"), | |
City("Rome", "Italy"), | |
City("Munich", "Germany"), | |
City("Detroit", "USA"), | |
City("San Jose", "Costa Rica"), | |
City("Stuttgart", "Germany"), | |
) | |
val restaurantsRelation = SortedRelation[Restaurant, String](restaurants, _.city) | |
val citiesRelation = SortedRelation[City, String](cities, _.name) | |
join[Restaurant, City, String](restaurantsRelation, citiesRelation).foreach(println) | |
join[Restaurant, City, String](restaurants.sortBy(_.city), cities.sortBy(_.name), (x: Restaurant) => x.city, (x: City) => x.name).foreach(println) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment