View override committer
def main(args: Array[String]): Unit = { | |
val sc = SparkSession | |
.builder() | |
.appName(JOB_NAME) | |
.getOrCreate() | |
.sparkContext | |
val ssc = new StreamingContext(sc, Seconds(BATCH_DURATION_SECONDS)) |
View write javadoc
object SparkHadoopWriter extends Logging { | |
… … | |
/** | |
* Basic work flow of this command is: | |
* 1. Driver side setup, prepare the data source and hadoop configuration for the write job to | |
* be issued. | |
* 2. Issues a write job consists of one or more executor side tasks, each of which writes all | |
* rows within an RDD partition. | |
* 3. If no exception is thrown in a task, commits that task, otherwise aborts that task; If any |
View save to hadoop
recordsRDD | |
.map(x => someFunc(x)) //drop the timestamp | |
.saveAsHadoopFile(s"$datumBasePath") |
View FileOutputCommitter.java
package com.outbrain.datainfra.jester; | |
import org.apache.commons.logging.Log; | |
import org.apache.commons.logging.LogFactory; | |
import org.apache.hadoop.classification.InterfaceAudience; | |
import org.apache.hadoop.classification.InterfaceAudience.Private; | |
import org.apache.hadoop.classification.InterfaceStability; | |
import org.apache.hadoop.fs.FileStatus; | |
import org.apache.hadoop.fs.FileSystem; | |
import org.apache.hadoop.fs.Path; |
View GcpCreateNotification.java
import com.google.api.client.googleapis.auth.oauth2.GoogleCredential; | |
import com.google.api.client.http.javanet.NetHttpTransport; | |
import com.google.api.client.json.jackson.JacksonFactory; | |
import com.google.api.services.storage.Storage; | |
import com.google.api.services.storage.StorageScopes; | |
import com.google.api.services.storage.model.Notification; | |
import com.google.cloud.storage.Bucket; | |
import com.google.gson.JsonObject; | |
import java.io.FileInputStream; | |
import java.util.Collections; |