-
-
Save AndriiStefaniv/902938c71440c3f25769528e5c730d4f to your computer and use it in GitHub Desktop.
trait Processor { | |
def process(oraOptionDfConfig: DataFrameReader, sparkSession: SparkSession): Unit | |
} | |
class BMValsProcessor extends Processor { | |
def process(oraOptionDfConfig: DataFrameReader, sparkSession: SparkSession): Unit = { | |
println("in BMValsProcessor") ///actual business logic | |
} | |
} | |
class BMValsProcessor2 extends Processor { | |
def process(oraOptionDfConfig: DataFrameReader, sparkSession: SparkSession): Unit = { | |
println("in BMValsProcessor2") ///actual business logic | |
} | |
} | |
object ValProcessor { | |
def main(args: Array[String]): Unit = { | |
val df: DataFrameReader = null; // initialization of DataFrameReader | |
val spark: SparkSession = null; // initialization of SparkSession | |
val procs: Map[String, (DataFrameReader, SparkSession) => Unit] = getAllDefinedProcessors() | |
procs.values.foreach( | |
proc => proc(df, spark) | |
) | |
} | |
def getAllDefinedProcessors(): Map[String, (DataFrameReader, SparkSession) => Unit] = { | |
Map( | |
"bm_vals" -> new BMValsProcessor().process, | |
"bm_vals2" -> new BMValsProcessor2().process | |
) | |
} | |
} |
-
new BMValsProcessor().process
is link to function. Function takes(DataFrameReader, SparkSession)
and returnsUnit
. So type is(DataFrameReader, SparkSession) => Unit
-
() => (new ProcessorOne()).process
is function. It takes nothinks()
and returns function from1)
. So type is() => (DataFrameReader, SparkSession) => Unit
-
new ProcessorOne().process()
is funcation call. If you use.process()
then you call funcation without params. If you use.process
then you return link to function
You could learn more about this from Official docs or Scala exercises
trait Processor {
def processOne(oraOptionDfConfig: DataFrameReader, sparkSession: SparkSession): Unit
def processTwo(oraOptionDfConfig: DataFrameReader, sparkSession: SparkSession): Unit
}
class BMValsProcessor extends Processor {
def processOne (oraOptionDfConfig: DataFrameReader, sparkSession: SparkSession): Unit = {
println("in BMValsProcessor processOne ") ///actual business logic
}
def processTwo (oraOptionDfConfig: DataFrameReader, sparkSession: SparkSession): Unit = {
println("in BMValsProcessor processTwo ") ///actual business logic
}
}
class BMValsProcessor2 extends Processor {
def processOne(oraOptionDfConfig: DataFrameReader, sparkSession: SparkSession): Unit = {
println("in BMValsProcessor2 processOne") ///actual business logic
}
def processTwo(oraOptionDfConfig: DataFrameReader, sparkSession: SparkSession): Unit = {
println("in BMValsProcessor2 processTwo") ///actual business logic
}
}
object ValProcessor {
def main(args: Array[String]): Unit = {
val df: DataFrameReader = null; // initialization of DataFrameReader
val spark: SparkSession = null; // initialization of SparkSession
val procs: Map[String, (DataFrameReader, SparkSession) => Unit] = getAllDefinedProcessors()
procs.values.foreach(
proc => proc(df, spark)
)
}
def getAllDefinedProcessors(): Map[String, (DataFrameReader, SparkSession) => Unit] = {
Map(
"bm_vals" -> new BMValsProcessor().processOne, /// How to register/point and call processTwo
"bm_vals2" -> new BMValsProcessor2().processOne /// How to register/point and call processTwo
)
}
}
You could try something like
trait Processor {
def processOne(oraOptionDfConfig: DataFrameReader, sparkSession: SparkSession): Unit
def processTwo(oraOptionDfConfig: DataFrameReader, sparkSession: SparkSession): Unit
}
class BMValsProcessor extends Processor {
def processOne(oraOptionDfConfig: DataFrameReader, sparkSession: SparkSession): Unit = {
println("in BMValsProcessor processOne ") ///actual business logic
}
def processTwo(oraOptionDfConfig: DataFrameReader, sparkSession: SparkSession): Unit = {
println("in BMValsProcessor processTwo ") ///actual business logic
}
}
class BMValsProcessor2 extends Processor {
def processOne(oraOptionDfConfig: DataFrameReader, sparkSession: SparkSession): Unit = {
println("in BMValsProcessor2 processOne") ///actual business logic
}
def processTwo(oraOptionDfConfig: DataFrameReader, sparkSession: SparkSession): Unit = {
println("in BMValsProcessor2 processTwo") ///actual business logic
}
}
object ValProcessor {
def main(args: Array[String]): Unit = {
val df: DataFrameReader = null; // initialization of DataFrameReader
val spark: SparkSession = null; // initialization of SparkSession
val procs: Map[String, Processor] = getAllDefinedProcessors()
procs.values.foreach {
proc =>
proc.processOne(df, spark)
proc.processTwo(df, spark)
}
}
def getAllDefinedProcessors(): Map[String, Processor] = {
Map(
"bm_vals" -> new BMValsProcessor(),
"bm_vals2" -> new BMValsProcessor2()
)
}
}
Thank you so much sir , you made my day .... Sir I am confused in one aspect earlier this worked https://gist.github.com/shatestest/1e2a1eac4de10199a1fbc693a7d48c09
Now I have confusion when to use
1 ) "new BMValsProcessor().process"
2) "(() => (new ProcessorOne()).process)
3) Why not "new ProcessorOne().process()" Really really confused ... please help me understand the difference sir