Skip to content

Instantly share code, notes, and snippets.

@anthonynsimon
Created March 6, 2018 16:50
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save anthonynsimon/351ff8122c63fc492fcc396d904fb012 to your computer and use it in GitHub Desktop.
Save anthonynsimon/351ff8122c63fc492fcc396d904fb012 to your computer and use it in GitHub Desktop.
Sorted Merge Join in both FP and OOP
import scala.collection.mutable
object SortedRelation {
def apply[A, B](seq: Seq[A], key: (A) => B)(implicit ordering: Ordering[B]): SortedRelation[A, B] = new SortedRelation(seq, key)
}
class SortedRelation[A, B](val seq: Seq[A], val key: (A) => B)(implicit ordering: Ordering[B]) {
private val sorted = seq.sortBy(key)
private var pointer = 0
private val len = seq.length
def advance(): Unit = pointer += 1
def hasRemaining: Boolean = pointer < len
def hasNext: Boolean = pointer + 1 < len
def peekNext(): A = sorted(pointer + 1)
def peek(): A = sorted(pointer)
def reset(): Unit = pointer = 0
}
def join[A, B, C](leftRelation: SortedRelation[A, C], rightRelation: SortedRelation[B, C])(implicit ordering: Ordering[C]): Seq[(A, B)] = {
val result: mutable.ListBuffer[(A, B)] = mutable.ListBuffer.empty[(A, B)]
def isMatch(left: A, right: B): Boolean = ordering.equiv(leftRelation.key(left), rightRelation.key(right))
while (leftRelation.hasRemaining && rightRelation.hasRemaining) {
val left = leftRelation.peek()
val right = rightRelation.peek()
if (isMatch(left, right)) {
result += ((left, right))
if (leftRelation.hasNext && isMatch(leftRelation.peekNext(), right)) {
leftRelation.advance()
} else {
rightRelation.advance()
}
} else if (ordering.lt(leftRelation.key(left), rightRelation.key(right))) {
leftRelation.advance()
} else {
rightRelation.advance()
}
}
result
}
// Assumes both relations are already sorted
def join[A, B, C](leftRelation: Seq[A], rightRelation: Seq[B], leftKey: A => C, rightKey: B => C)(implicit ordering: Ordering[C]): Seq[(A, B)] = {
def isMatch(left: A, right: B): Boolean = ordering.equiv(leftKey(left), rightKey(right))
// Pick first element of both relations if exists
(leftRelation.headOption, rightRelation.headOption) match {
// Both relations have remaining elements, try to join
case (Some(left), Some(right)) => {
// Both heads match on join key
if (isMatch(left, right)) {
val result = Seq((left, right))
// Try to join the remaining elements of the relations
val restJoin = leftRelation.tail.headOption match {
// Lookahead on the left
case Some(nextLeft) => {
// Left side has a next element
if (isMatch(nextLeft, right)) {
// If next left matches on join key with current right,
// then advance left relation only
join(leftRelation.tail, rightRelation, leftKey, rightKey)
} else {
// Otherwise advance right side and leave left relation at current head
join(leftRelation, rightRelation.tail, leftKey, rightKey)
}
}
// Left side has no next element, no join possible from remaining elements in relations
case _ => Seq.empty
}
// Result is current heads plus rest of relations joined
result ++ restJoin
} else if (ordering.lt(leftKey(left), rightKey(right))) {
join(leftRelation.tail, rightRelation, leftKey, rightKey)
} else {
join(leftRelation, rightRelation.tail, leftKey, rightKey)
}
}
// One or both relations have no elements, no join possible so result is empty
case _ => Seq.empty
}
}
case class Restaurant(name: String, city: String)
case class City(name: String, country: String)
val restaurants = Seq(
Restaurant("Dudu", "Munich"),
Restaurant("Prime 51", "Detroit"),
Restaurant("The Kitchen", "Ko Lanta"),
Restaurant("La Bruja de Escazu", "San Jose"),
Restaurant("McDonald's", "Tokyo"),
)
val cities = Seq(
City("Tokyo", "Japan"),
City("Osaka", "Japan"),
City("Rome", "Italy"),
City("Munich", "Germany"),
City("Detroit", "USA"),
City("San Jose", "Costa Rica"),
City("Stuttgart", "Germany"),
)
val restaurantsRelation = SortedRelation[Restaurant, String](restaurants, _.city)
val citiesRelation = SortedRelation[City, String](cities, _.name)
join[Restaurant, City, String](restaurantsRelation, citiesRelation).foreach(println)
join[Restaurant, City, String](restaurants.sortBy(_.city), cities.sortBy(_.name), (x: Restaurant) => x.city, (x: City) => x.name).foreach(println)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment