Skip to content

Instantly share code, notes, and snippets.

@saisgit
Last active May 19, 2020 14:59
Show Gist options
  • Save saisgit/b0ac528d0c136ff2cbaf4732d29eef48 to your computer and use it in GitHub Desktop.
Save saisgit/b0ac528d0c136ff2cbaf4732d29eef48 to your computer and use it in GitHub Desktop.
import org.apache.spark._
import org.apache.spark.SparkConf
import org.apache.spark.sql.hive.HiveContext
import com.databricks.sparck.csv
Object Solution extends App {
val conf = new SparkConf().setAppName("Problem_Execution")
val sc = new SparkContext(conf)
val hiveContext = new HiveContext(sc)
val readData = hiveContext.read.format("com.databricks.spark.csv").option("header","true").load("/user/sai/file.csv")
readData.registerTempTable("readTable")
val formattedData = hiveContext.sql("select Name,`Position Title` as Title, Department, substring(`Employee Annual Salary`,2) as Salary from readTable")
formattedData.registerTempTable("formattedTable")
val solution = hiveContext.sql("select Name,Title,Department,Salary,rank from (select Name, Title, Department, Salary,
dense_rank() over(Partition by Department order by Salary) as rank from formattedTable) temp where rank <= 2")
solution.coalesce(1).write.mode("overwrite").option("header"."true").format("com.databricks.spark.csv").option("delimiter","|").save("/user/sai/output.csv")
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment