Skip to content

Instantly share code, notes, and snippets.

@eric-maynard
Last active March 20, 2018 16:03
Show Gist options
  • Save eric-maynard/137bae453b5af7ac08c6d1c73acea944 to your computer and use it in GitHub Desktop.
Save eric-maynard/137bae453b5af7ac08c6d1c73acea944 to your computer and use it in GitHub Desktop.
//command line args:
val buckets = 5
val myBucket = 1//method 2 only
//helper functions
case class TableWithHash(tableName: String, hash: Int)
def hashTable(string: tableName): TableWithHash = {
TableWithHash(tableName, (tableName.hashCode % buckets).toInt)
}
//filler functions:
def getAllTables(): Seq[String] = {
Seq("foo", "bar", "baz")
}
def printCount(tableName: String): Unit = {
println("-1")
}
def countInBackground(tables: Seq[TableWithHash]): Unit = {
import sys.process._
"spark-submit . . . Somehow.Count tables".!!
}
//method 1:
{
val allTables = getAllTables()
val tables = allTables.map(hashTable)
val groupedTables = tables.groupBy(_.hash)
groupedTables.foreach(group => {
countInBackground(group._2)
})
}
//method 2:
{
val allTables = getAllTables()
val tables = allTables.map(hashTable)
tables.foreach(table => {
if(table.hash == myBucket)
printCount(table.tableName)
})
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment