jomoespe/mergeParquet.scala

## mergeParquet.scala
val partitions = 5;   // this value depends on data and volumes. Will be different in every case.
val df = sqlContext.read.json(“URI://path/to/parquet/files/")
df.createOrReplaceTempView("df")
val df_output = spark
  .sql("SELECT DISTINCT * FROM df") // this removes duplicates. If it's not needed, simply remove this line
  .coalesce(partitions)
df_output.write.parquet("URI://path/to/destination")
	val partitions = 5; // this value depends on data and volumes. Will be different in every case.
	val df = sqlContext.read.json(“URI://path/to/parquet/files/")
	df.createOrReplaceTempView("df")
	val df_output = spark
	.sql("SELECT DISTINCT * FROM df") // this removes duplicates. If it's not needed, simply remove this line
	.coalesce(partitions)
	df_output.write.parquet("URI://path/to/destination")