Created
January 18, 2021 10:50
-
-
Save gaborbarna/00f0ee4bb7bbb74837d90f3f453c48e6 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
org.apache.spark.SparkException: Job aborted due to stage failure: Task 2 in stage 50967.0 failed 30 times, most recent failure: Lost task 2.29 in stage 50967.0 (TID 611666, ip-10-134-0-32.us-west-2.compute.internal, executor 2): java.lang.NullPointerException | |
at java.util.concurrent.ConcurrentHashMap.putVal(ConcurrentHashMap.java:1011) | |
at java.util.concurrent.ConcurrentHashMap.putAll(ConcurrentHashMap.java:1084) | |
at org.apache.spark.sql.execution.streaming.state.HDFSBackedStateStoreProvider.getStore(HDFSBackedStateStoreProvider.scala:204) | |
at org.apache.spark.sql.execution.streaming.state.StateStore$.get(StateStore.scala:371) | |
at org.apache.spark.sql.execution.streaming.state.StateStoreRDD.compute(StateStoreRDD.scala:88) | |
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) | |
at org.apache.spark.rdd.RDD.iterator(RDD.scala:288) | |
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) | |
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) | |
at org.apache.spark.rdd.RDD.iterator(RDD.scala:288) | |
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) | |
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) | |
at org.apache.spark.rdd.RDD.iterator(RDD.scala:288) | |
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) | |
at org.apache.spark.scheduler.Task.run(Task.scala:123) | |
at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408) | |
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360) | |
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414) | |
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) | |
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) | |
at java.lang.Thread.run(Thread.java:748) | |
Driver stacktrace: | |
at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:2041) | |
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:2029) | |
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:2028) | |
at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) | |
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48) | |
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2028) | |
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:966) | |
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:966) | |
at scala.Option.foreach(Option.scala:257) | |
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:966) | |
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2262) | |
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2211) | |
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2200) | |
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49) | |
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:777) | |
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2061) | |
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2082) | |
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2101) | |
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2126) | |
at org.apache.spark.rdd.RDD$$anonfun$foreachPartition$1.apply(RDD.scala:935) | |
at org.apache.spark.rdd.RDD$$anonfun$foreachPartition$1.apply(RDD.scala:933) | |
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) | |
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112) | |
at org.apache.spark.rdd.RDD.withScope(RDD.scala:363) | |
at org.apache.spark.rdd.RDD.foreachPartition(RDD.scala:933) | |
at org.apache.spark.sql.kinesis.KinesisWriter$$anonfun$write$1.apply$mcV$sp(KinesisWriter.scala:38) | |
at org.apache.spark.sql.kinesis.KinesisWriter$$anonfun$write$1.apply(KinesisWriter.scala:38) | |
at org.apache.spark.sql.kinesis.KinesisWriter$$anonfun$write$1.apply(KinesisWriter.scala:38) | |
at org.apache.spark.sql.execution.SQLExecution$$anonfun$withNewExecutionId$1.apply(SQLExecution.scala:78) | |
at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:125) | |
at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:73) | |
at org.apache.spark.sql.kinesis.KinesisWriter$.write(KinesisWriter.scala:37) | |
at org.apache.spark.sql.kinesis.KinesisSink.addBatch(KinesisSink.scala:38) | |
at org.apache.spark.sql.execution.streaming.MicroBatchExecution$$anonfun$org$apache$spark$sql$execution$streaming$MicroBatchExecution$$runBatch$5$$anonfun$apply$17.apply(MicroBatchExecution.scala:537) | |
at org.apache.spark.sql.execution.SQLExecution$$anonfun$withNewExecutionId$1.apply(SQLExecution.scala:78) | |
at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:125) | |
at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:73) | |
at org.apache.spark.sql.execution.streaming.MicroBatchExecution$$anonfun$org$apache$spark$sql$execution$streaming$MicroBatchExecution$$runBatch$5.apply(MicroBatchExecution.scala:535) | |
at org.apache.spark.sql.execution.streaming.ProgressReporter$class.reportTimeTaken(ProgressReporter.scala:351) | |
at org.apache.spark.sql.execution.streaming.StreamExecution.reportTimeTaken(StreamExecution.scala:58) | |
at org.apache.spark.sql.execution.streaming.MicroBatchExecution.org$apache$spark$sql$execution$streaming$MicroBatchExecution$$runBatch(MicroBatchExecution.scala:534) | |
at org.apache.spark.sql.execution.streaming.MicroBatchExecution$$anonfun$runActivatedStream$1$$anonfun$apply$mcZ$sp$1.apply$mcV$sp(MicroBatchExecution.scala:198) | |
at org.apache.spark.sql.execution.streaming.MicroBatchExecution$$anonfun$runActivatedStream$1$$anonfun$apply$mcZ$sp$1.apply(MicroBatchExecution.scala:166) | |
at org.apache.spark.sql.execution.streaming.MicroBatchExecution$$anonfun$runActivatedStream$1$$anonfun$apply$mcZ$sp$1.apply(MicroBatchExecution.scala:166) | |
at org.apache.spark.sql.execution.streaming.ProgressReporter$class.reportTimeTaken(ProgressReporter.scala:351) | |
at org.apache.spark.sql.execution.streaming.StreamExecution.reportTimeTaken(StreamExecution.scala:58) | |
at org.apache.spark.sql.execution.streaming.MicroBatchExecution$$anonfun$runActivatedStream$1.apply$mcZ$sp(MicroBatchExecution.scala:166) | |
at org.apache.spark.sql.execution.streaming.ProcessingTimeExecutor.execute(TriggerExecutor.scala:56) | |
at org.apache.spark.sql.execution.streaming.MicroBatchExecution.runActivatedStream(MicroBatchExecution.scala:160) | |
at org.apache.spark.sql.execution.streaming.StreamExecution.org$apache$spark$sql$execution$streaming$StreamExecution$$runStream(StreamExecution.scala:281) | |
at org.apache.spark.sql.execution.streaming.StreamExecution$$anon$1.run(StreamExecution.scala:193) | |
Caused by: java.lang.NullPointerException | |
at java.util.concurrent.ConcurrentHashMap.putVal(ConcurrentHashMap.java:1011) | |
at java.util.concurrent.ConcurrentHashMap.putAll(ConcurrentHashMap.java:1084) | |
at org.apache.spark.sql.execution.streaming.state.HDFSBackedStateStoreProvider.getStore(HDFSBackedStateStoreProvider.scala:204) | |
at org.apache.spark.sql.execution.streaming.state.StateStore$.get(StateStore.scala:371) | |
at org.apache.spark.sql.execution.streaming.state.StateStoreRDD.compute(StateStoreRDD.scala:88) | |
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) | |
at org.apache.spark.rdd.RDD.iterator(RDD.scala:288) | |
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) | |
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) | |
at org.apache.spark.rdd.RDD.iterator(RDD.scala:288) | |
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) | |
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) | |
at org.apache.spark.rdd.RDD.iterator(RDD.scala:288) | |
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) | |
at org.apache.spark.scheduler.Task.run(Task.scala:123) | |
at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408) | |
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360) | |
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414) | |
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) | |
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) | |
at java.lang.Thread.run(Thread.java:748) | |
Exception in thread "main" org.apache.spark.sql.streaming.StreamingQueryException: Job aborted due to stage failure: Task 2 in stage 50967.0 failed 30 times, most recent failure: Lost task 2.29 in stage 50967.0 (TID 611666, ip-10-134-0-32.us-west-2.compute.internal, executor 2): java.lang.NullPointerException | |
at java.util.concurrent.ConcurrentHashMap.putVal(ConcurrentHashMap.java:1011) | |
at java.util.concurrent.ConcurrentHashMap.putAll(ConcurrentHashMap.java:1084) | |
at org.apache.spark.sql.execution.streaming.state.HDFSBackedStateStoreProvider.getStore(HDFSBackedStateStoreProvider.scala:204) | |
at org.apache.spark.sql.execution.streaming.state.StateStore$.get(StateStore.scala:371) | |
at org.apache.spark.sql.execution.streaming.state.StateStoreRDD.compute(StateStoreRDD.scala:88) | |
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) | |
at org.apache.spark.rdd.RDD.iterator(RDD.scala:288) | |
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) | |
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) | |
at org.apache.spark.rdd.RDD.iterator(RDD.scala:288) | |
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) | |
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) | |
at org.apache.spark.rdd.RDD.iterator(RDD.scala:288) | |
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) | |
at org.apache.spark.scheduler.Task.run(Task.scala:123) | |
at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408) | |
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360) | |
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414) | |
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) | |
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) | |
at java.lang.Thread.run(Thread.java:748) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment