Skip to content

Instantly share code, notes, and snippets.

@animeshtrivedi
Last active January 11, 2017 12:59
Show Gist options
  • Save animeshtrivedi/8fab18a325a5817a11437a5f6f7437f3 to your computer and use it in GitHub Desktop.
Save animeshtrivedi/8fab18a325a5817a11437a5f6f7437f3 to your computer and use it in GitHub Desktop.
AtrFileFormat which creates a null sink writer (AtrOutputWriter)
package org.apache.spark.sql.execution.datasources.atr
import org.apache.hadoop.fs.FileStatus
import org.apache.hadoop.mapreduce.{TaskAttemptContext, Job}
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.execution.datasources.{OutputWriter, OutputWriterFactory, FileFormat}
import org.apache.spark.sql.sources.DataSourceRegister
import org.apache.spark.sql.types.StructType
import scala.tools.jline_embedded.internal.Log
/**
* Created by atr on 16.12.16.
*/
class AtrFileFormat extends FileFormat
with DataSourceRegister
with Serializable {
override def shortName(): String = "atr"
override def toString: String = "atr"
override def inferSchema(sparkSession: SparkSession, options: Map[String, String], files: Seq[FileStatus]): Option[StructType] = None
override def prepareWrite(sparkSession: SparkSession, job: Job, options: Map[String, String], dataSchema: StructType): OutputWriterFactory = {
new OutputWriterFactory {
override def newInstance(
path: String,
dataSchema: StructType,
context: TaskAttemptContext): OutputWriter = {
Log.info("allocating a new Atr(NULL)OutputWriter to file: " + path)
new AtrOutputWriter(path, context)
}
override def getFileExtension(context: TaskAttemptContext): String = {
".atr"
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment