Created
July 23, 2020 23:33
-
-
Save nssalian/203e20432c2ed237717be28642b1871a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
20/07/23 22:47:34 DEBUG scheduler.OutputCommitCoordinator: Ignoring task completion for completed stage | |
: An error occurred while calling o72.showString. | |
: org.apache.spark.SparkException: Job aborted due to stage failure: Task 10 in stage 0.0 failed 4 times, most recent failure: Lost task 10.3 in stage 0.0 (TID 33, ip-10-128-4-146.ec2.internal, executor 3): java.lang.NullPointerException | |
at org.apache.spark.rdd.NewHadoopRDD$$anon$1.liftedTree1$1(NewHadoopRDD.scala:216) | |
at org.apache.spark.rdd.NewHadoopRDD$$anon$1.<init>(NewHadoopRDD.scala:213) | |
at org.apache.spark.rdd.NewHadoopRDD.compute(NewHadoopRDD.scala:168) | |
at org.apache.spark.rdd.NewHadoopRDD.compute(NewHadoopRDD.scala:71) | |
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:349) | |
at org.apache.spark.rdd.RDD.iterator(RDD.scala:313) | |
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) | |
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:349) | |
at org.apache.spark.rdd.RDD.iterator(RDD.scala:313) | |
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) | |
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:349) | |
at org.apache.spark.rdd.RDD.iterator(RDD.scala:313) | |
at org.apache.spark.rdd.UnionRDD.compute(UnionRDD.scala:106) | |
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:349) | |
at org.apache.spark.rdd.RDD.iterator(RDD.scala:313) | |
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) | |
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:349) | |
at org.apache.spark.rdd.RDD.iterator(RDD.scala:313) | |
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) | |
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:349) | |
at org.apache.spark.rdd.RDD.iterator(RDD.scala:313) | |
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) | |
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:349) | |
at org.apache.spark.rdd.RDD.iterator(RDD.scala:313) | |
at org.apache.spark.shuffle.ShuffleWriteProcessor.write(ShuffleWriteProcessor.scala:59) | |
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:99) | |
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:52) | |
at org.apache.spark.scheduler.Task.run(Task.scala:127) | |
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:444) | |
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1377) | |
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:447) | |
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) | |
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) | |
at java.lang.Thread.run(Thread.java:748) | |
Driver stacktrace: | |
at org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2023) | |
at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:1972) | |
at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:1971) | |
at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62) | |
at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55) | |
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49) | |
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1971) | |
at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:950) | |
at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:950) | |
at scala.Option.foreach(Option.scala:407) | |
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:950) | |
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2203) | |
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2152) | |
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2141) | |
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49) | |
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:752) | |
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2093) | |
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2114) | |
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2133) | |
at org.apache.spark.sql.execution.SparkPlan.executeTake(SparkPlan.scala:467) | |
at org.apache.spark.sql.execution.SparkPlan.executeTake(SparkPlan.scala:420) | |
at org.apache.spark.sql.execution.CollectLimitExec.executeCollect(limit.scala:47) | |
at org.apache.spark.sql.Dataset.collectFromPlan(Dataset.scala:3625) | |
at org.apache.spark.sql.Dataset.$anonfun$head$1(Dataset.scala:2695) | |
at org.apache.spark.sql.Dataset.$anonfun$withAction$1(Dataset.scala:3616) | |
at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:100) | |
at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:160) | |
at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:87) | |
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:763) | |
at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64) | |
at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3614) | |
at org.apache.spark.sql.Dataset.head(Dataset.scala:2695) | |
at org.apache.spark.sql.Dataset.take(Dataset.scala:2902) | |
at org.apache.spark.sql.Dataset.getRows(Dataset.scala:300) | |
at org.apache.spark.sql.Dataset.showString(Dataset.scala:337) | |
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) | |
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) | |
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) | |
at java.lang.reflect.Method.invoke(Method.java:498) | |
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244) | |
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357) | |
at py4j.Gateway.invoke(Gateway.java:282) | |
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132) | |
at py4j.commands.CallCommand.execute(CallCommand.java:79) | |
at py4j.GatewayConnection.run(GatewayConnection.java:238) | |
at java.lang.Thread.run(Thread.java:748) | |
Caused by: java.lang.NullPointerException | |
at org.apache.spark.rdd.NewHadoopRDD$$anon$1.liftedTree1$1(NewHadoopRDD.scala:216) | |
at org.apache.spark.rdd.NewHadoopRDD$$anon$1.<init>(NewHadoopRDD.scala:213) | |
at org.apache.spark.rdd.NewHadoopRDD.compute(NewHadoopRDD.scala:168) | |
at org.apache.spark.rdd.NewHadoopRDD.compute(NewHadoopRDD.scala:71) | |
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:349) | |
at org.apache.spark.rdd.RDD.iterator(RDD.scala:313) | |
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) | |
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:349) | |
at org.apache.spark.rdd.RDD.iterator(RDD.scala:313) | |
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) | |
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:349) | |
at org.apache.spark.rdd.RDD.iterator(RDD.scala:313) | |
at org.apache.spark.rdd.UnionRDD.compute(UnionRDD.scala:106) | |
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:349) | |
at org.apache.spark.rdd.RDD.iterator(RDD.scala:313) | |
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) | |
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:349) | |
at org.apache.spark.rdd.RDD.iterator(RDD.scala:313) | |
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) | |
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:349) | |
at org.apache.spark.rdd.RDD.iterator(RDD.scala:313) | |
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) | |
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:349) | |
at org.apache.spark.rdd.RDD.iterator(RDD.scala:313) | |
at org.apache.spark.shuffle.ShuffleWriteProcessor.write(ShuffleWriteProcessor.scala:59) | |
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:99) | |
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:52) | |
at org.apache.spark.scheduler.Task.run(Task.scala:127) | |
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:444) | |
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1377) | |
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:447) | |
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) | |
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment