Last active
June 8, 2020 02:03
-
-
Save HyukjinKwon/188a4e03c844ed843d0e2dc2f8a0bb69 to your computer and use it in GitHub Desktop.
Python execution error message
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
>>> from pyspark.sql.functions import udf | |
>>> @udf | |
... def divide_by_zero(v): | |
... raise v / 0 | |
... | |
>>> spark.range(1).select(divide_by_zero("id")).show() | |
Traceback (most recent call last): | |
File "<stdin>", line 1, in <module> | |
File "/.../python/pyspark/sql/dataframe.py", line 427, in show | |
print(self._jdf.showString(n, 20, vertical)) | |
File "/.../python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py", line 1305, in __call__ | |
File "/.../python/pyspark/sql/utils.py", line 98, in deco | |
return f(*a, **kw) | |
File "/.../python/lib/py4j-0.10.9-src.zip/py4j/protocol.py", line 328, in get_return_value | |
py4j.protocol.Py4JJavaError: An error occurred while calling o45.showString. | |
: org.apache.spark.SparkException: Job aborted due to stage failure: Task 10 in stage 2.0 failed 4 times, most recent failure: Lost task 10.3 in stage 2.0 (TID 18, 192.168.35.193, executor 3): org.apache.spark.api.python.PythonException: Traceback (most recent call last): | |
File "/.../python/lib/pyspark.zip/pyspark/worker.py", line 605, in main | |
process() | |
File "/.../python/lib/pyspark.zip/pyspark/worker.py", line 597, in process | |
serializer.dump_stream(out_iter, outfile) | |
File "/.../python/lib/pyspark.zip/pyspark/serializers.py", line 223, in dump_stream | |
self.serializer.dump_stream(self._batched(iterator), stream) | |
File "/.../python/lib/pyspark.zip/pyspark/serializers.py", line 141, in dump_stream | |
for obj in iterator: | |
File "/.../python/lib/pyspark.zip/pyspark/serializers.py", line 212, in _batched | |
for item in iterator: | |
File "/.../python/lib/pyspark.zip/pyspark/worker.py", line 450, in mapper | |
result = tuple(f(*[a[o] for o in arg_offsets]) for (arg_offsets, f) in udfs) | |
File "/.../python/lib/pyspark.zip/pyspark/worker.py", line 450, in <genexpr> | |
result = tuple(f(*[a[o] for o in arg_offsets]) for (arg_offsets, f) in udfs) | |
File "/.../python/lib/pyspark.zip/pyspark/worker.py", line 90, in <lambda> | |
return lambda *a: f(*a) | |
File "/.../python/lib/pyspark.zip/pyspark/util.py", line 107, in wrapper | |
return f(*args, **kwargs) | |
File "<stdin>", line 3, in divide_by_zero | |
ZeroDivisionError: division by zero | |
at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.handlePythonException(PythonRunner.scala:516) | |
at org.apache.spark.sql.execution.python.PythonUDFRunner$$anon$2.read(PythonUDFRunner.scala:81) | |
at org.apache.spark.sql.execution.python.PythonUDFRunner$$anon$2.read(PythonUDFRunner.scala:64) | |
at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:469) | |
at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37) | |
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:489) | |
at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:458) | |
at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:458) | |
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage2.processNext(Unknown Source) | |
at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43) | |
at org.apache.spark.sql.execution.WholeStageCodegenExec$$anon$1.hasNext(WholeStageCodegenExec.scala:753) | |
at org.apache.spark.sql.execution.SparkPlan.$anonfun$getByteArrayRdd$1(SparkPlan.scala:340) | |
at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2(RDD.scala:898) | |
at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2$adapted(RDD.scala:898) | |
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) | |
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373) | |
at org.apache.spark.rdd.RDD.iterator(RDD.scala:337) | |
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) | |
at org.apache.spark.scheduler.Task.run(Task.scala:127) | |
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:469) | |
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1377) | |
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:472) | |
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) | |
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) | |
at java.lang.Thread.run(Thread.java:748) | |
Driver stacktrace: | |
at org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2117) | |
at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2066) | |
at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2065) | |
at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62) | |
at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55) | |
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49) | |
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2065) | |
at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1021) | |
at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1021) | |
at scala.Option.foreach(Option.scala:407) | |
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1021) | |
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2297) | |
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2246) | |
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2235) | |
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49) | |
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:823) | |
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2108) | |
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2129) | |
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2148) | |
at org.apache.spark.sql.execution.SparkPlan.executeTake(SparkPlan.scala:467) | |
at org.apache.spark.sql.execution.SparkPlan.executeTake(SparkPlan.scala:420) | |
at org.apache.spark.sql.execution.CollectLimitExec.executeCollect(limit.scala:47) | |
at org.apache.spark.sql.Dataset.collectFromPlan(Dataset.scala:3653) | |
at org.apache.spark.sql.Dataset.$anonfun$head$1(Dataset.scala:2695) | |
at org.apache.spark.sql.Dataset.$anonfun$withAction$1(Dataset.scala:3644) | |
at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103) | |
at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163) | |
at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90) | |
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:763) | |
at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64) | |
at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3642) | |
at org.apache.spark.sql.Dataset.head(Dataset.scala:2695) | |
at org.apache.spark.sql.Dataset.take(Dataset.scala:2902) | |
at org.apache.spark.sql.Dataset.getRows(Dataset.scala:300) | |
at org.apache.spark.sql.Dataset.showString(Dataset.scala:337) | |
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) | |
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) | |
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) | |
at java.lang.reflect.Method.invoke(Method.java:498) | |
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244) | |
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357) | |
at py4j.Gateway.invoke(Gateway.java:282) | |
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132) | |
at py4j.commands.CallCommand.execute(CallCommand.java:79) | |
at py4j.GatewayConnection.run(GatewayConnection.java:238) | |
at java.lang.Thread.run(Thread.java:748) | |
Caused by: org.apache.spark.api.python.PythonException: Traceback (most recent call last): | |
File "/.../python/lib/pyspark.zip/pyspark/worker.py", line 605, in main | |
process() | |
File "/.../python/lib/pyspark.zip/pyspark/worker.py", line 597, in process | |
serializer.dump_stream(out_iter, outfile) | |
File "/.../python/lib/pyspark.zip/pyspark/serializers.py", line 223, in dump_stream | |
self.serializer.dump_stream(self._batched(iterator), stream) | |
File "/.../python/lib/pyspark.zip/pyspark/serializers.py", line 141, in dump_stream | |
for obj in iterator: | |
File "/.../python/lib/pyspark.zip/pyspark/serializers.py", line 212, in _batched | |
for item in iterator: | |
File "/.../python/lib/pyspark.zip/pyspark/worker.py", line 450, in mapper | |
result = tuple(f(*[a[o] for o in arg_offsets]) for (arg_offsets, f) in udfs) | |
File "/.../python/lib/pyspark.zip/pyspark/worker.py", line 450, in <genexpr> | |
result = tuple(f(*[a[o] for o in arg_offsets]) for (arg_offsets, f) in udfs) | |
File "/.../python/lib/pyspark.zip/pyspark/worker.py", line 90, in <lambda> | |
return lambda *a: f(*a) | |
File "/.../python/lib/pyspark.zip/pyspark/util.py", line 107, in wrapper | |
return f(*args, **kwargs) | |
File "<stdin>", line 3, in divide_by_zero | |
ZeroDivisionError: division by zero | |
at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.handlePythonException(PythonRunner.scala:516) | |
at org.apache.spark.sql.execution.python.PythonUDFRunner$$anon$2.read(PythonUDFRunner.scala:81) | |
at org.apache.spark.sql.execution.python.PythonUDFRunner$$anon$2.read(PythonUDFRunner.scala:64) | |
at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:469) | |
at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37) | |
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:489) | |
at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:458) | |
at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:458) | |
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage2.processNext(Unknown Source) | |
at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43) | |
at org.apache.spark.sql.execution.WholeStageCodegenExec$$anon$1.hasNext(WholeStageCodegenExec.scala:753) | |
at org.apache.spark.sql.execution.SparkPlan.$anonfun$getByteArrayRdd$1(SparkPlan.scala:340) | |
at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2(RDD.scala:898) | |
at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2$adapted(RDD.scala:898) | |
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) | |
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373) | |
at org.apache.spark.rdd.RDD.iterator(RDD.scala:337) | |
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) | |
at org.apache.spark.scheduler.Task.run(Task.scala:127) | |
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:469) | |
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1377) | |
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:472) | |
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) | |
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) | |
... 1 more |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
>>> from pyspark.sql.functions import udf | |
>>> @udf | |
... def divide_by_zero(v): | |
... raise v / 0 | |
... | |
>>> spark.range(1).select(divide_by_zero("id")).show() | |
Traceback (most recent call last): | |
File "<stdin>", line 1, in <module> | |
File "/.../python/pyspark/sql/dataframe.py", line 427, in show | |
print(self._jdf.showString(n, 20, vertical)) | |
File "/.../python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py", line 1305, in __call__ | |
File "/.../python/pyspark/sql/utils.py", line 127, in deco | |
raise_from(converted) | |
File "<string>", line 3, in raise_from | |
pyspark.sql.utils.PythonException: | |
An exception was thrown from Python worker in the executor. The below is the Python worker stacktrace. | |
Traceback (most recent call last): | |
File "/.../python/lib/pyspark.zip/pyspark/worker.py", line 605, in main | |
process() | |
File "/.../python/lib/pyspark.zip/pyspark/worker.py", line 597, in process | |
serializer.dump_stream(out_iter, outfile) | |
File "/.../python/lib/pyspark.zip/pyspark/serializers.py", line 223, in dump_stream | |
self.serializer.dump_stream(self._batched(iterator), stream) | |
File "/.../python/lib/pyspark.zip/pyspark/serializers.py", line 141, in dump_stream | |
for obj in iterator: | |
File "/.../python/lib/pyspark.zip/pyspark/serializers.py", line 212, in _batched | |
for item in iterator: | |
File "/.../python/lib/pyspark.zip/pyspark/worker.py", line 450, in mapper | |
result = tuple(f(*[a[o] for o in arg_offsets]) for (arg_offsets, f) in udfs) | |
File "/.../python/lib/pyspark.zip/pyspark/worker.py", line 450, in <genexpr> | |
result = tuple(f(*[a[o] for o in arg_offsets]) for (arg_offsets, f) in udfs) | |
File "/.../python/lib/pyspark.zip/pyspark/worker.py", line 90, in <lambda> | |
return lambda *a: f(*a) | |
File "/.../python/lib/pyspark.zip/pyspark/util.py", line 107, in wrapper | |
return f(*args, **kwargs) | |
File "<stdin>", line 3, in divide_by_zero | |
ZeroDivisionError: division by zero |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
>>> spark.conf.set("spark.sql.pyspark.jvmStacktrace.enabled", True) | |
>>> from pyspark.sql.functions import udf | |
>>> @udf | |
... def divide_by_zero(v): | |
... raise v / 0 | |
... | |
>>> spark.range(1).select(divide_by_zero("id")).show() | |
Traceback (most recent call last): | |
File "<stdin>", line 1, in <module> | |
File "/.../python/pyspark/sql/dataframe.py", line 427, in show | |
print(self._jdf.showString(n, 20, vertical)) | |
File "/.../python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py", line 1305, in __call__ | |
File "/.../python/pyspark/sql/utils.py", line 131, in deco | |
raise_from(converted) | |
File "<string>", line 3, in raise_from | |
pyspark.sql.utils.PythonException: | |
An exception was thrown from Python worker in the executor. The below is the Python worker stacktrace. | |
Traceback (most recent call last): | |
File "/.../python/lib/pyspark.zip/pyspark/worker.py", line 605, in main | |
process() | |
File "/.../python/lib/pyspark.zip/pyspark/worker.py", line 597, in process | |
serializer.dump_stream(out_iter, outfile) | |
File "/.../python/lib/pyspark.zip/pyspark/serializers.py", line 223, in dump_stream | |
self.serializer.dump_stream(self._batched(iterator), stream) | |
File "/.../python/lib/pyspark.zip/pyspark/serializers.py", line 141, in dump_stream | |
for obj in iterator: | |
File "/.../python/lib/pyspark.zip/pyspark/serializers.py", line 212, in _batched | |
for item in iterator: | |
File "/.../python/lib/pyspark.zip/pyspark/worker.py", line 450, in mapper | |
result = tuple(f(*[a[o] for o in arg_offsets]) for (arg_offsets, f) in udfs) | |
File "/.../python/lib/pyspark.zip/pyspark/worker.py", line 450, in <genexpr> | |
result = tuple(f(*[a[o] for o in arg_offsets]) for (arg_offsets, f) in udfs) | |
File "/.../python/lib/pyspark.zip/pyspark/worker.py", line 90, in <lambda> | |
return lambda *a: f(*a) | |
File "/.../python/lib/pyspark.zip/pyspark/util.py", line 107, in wrapper | |
return f(*args, **kwargs) | |
File "<stdin>", line 3, in divide_by_zero | |
ZeroDivisionError: division by zero | |
JVM stacktrace: | |
org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 1.0 failed 4 times, most recent failure: Lost task 0.3 in stage 1.0 (TID 4, 192.168.35.193, executor 1): org.apache.spark.api.python.PythonException: Traceback (most recent call last): | |
File "/.../python/lib/pyspark.zip/pyspark/worker.py", line 605, in main | |
process() | |
File "/.../python/lib/pyspark.zip/pyspark/worker.py", line 597, in process | |
serializer.dump_stream(out_iter, outfile) | |
File "/.../python/lib/pyspark.zip/pyspark/serializers.py", line 223, in dump_stream | |
self.serializer.dump_stream(self._batched(iterator), stream) | |
File "/.../python/lib/pyspark.zip/pyspark/serializers.py", line 141, in dump_stream | |
for obj in iterator: | |
File "/.../python/lib/pyspark.zip/pyspark/serializers.py", line 212, in _batched | |
for item in iterator: | |
File "/.../python/lib/pyspark.zip/pyspark/worker.py", line 450, in mapper | |
result = tuple(f(*[a[o] for o in arg_offsets]) for (arg_offsets, f) in udfs) | |
File "/.../python/lib/pyspark.zip/pyspark/worker.py", line 450, in <genexpr> | |
result = tuple(f(*[a[o] for o in arg_offsets]) for (arg_offsets, f) in udfs) | |
File "/.../python/lib/pyspark.zip/pyspark/worker.py", line 90, in <lambda> | |
return lambda *a: f(*a) | |
File "/.../python/lib/pyspark.zip/pyspark/util.py", line 107, in wrapper | |
return f(*args, **kwargs) | |
File "<stdin>", line 3, in divide_by_zero | |
ZeroDivisionError: division by zero | |
at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.handlePythonException(PythonRunner.scala:516) | |
at org.apache.spark.sql.execution.python.PythonUDFRunner$$anon$2.read(PythonUDFRunner.scala:81) | |
at org.apache.spark.sql.execution.python.PythonUDFRunner$$anon$2.read(PythonUDFRunner.scala:64) | |
at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:469) | |
at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37) | |
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:489) | |
at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:458) | |
at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:458) | |
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage2.processNext(Unknown Source) | |
at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43) | |
at org.apache.spark.sql.execution.WholeStageCodegenExec$$anon$1.hasNext(WholeStageCodegenExec.scala:753) | |
at org.apache.spark.sql.execution.SparkPlan.$anonfun$getByteArrayRdd$1(SparkPlan.scala:340) | |
at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2(RDD.scala:898) | |
at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2$adapted(RDD.scala:898) | |
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) | |
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373) | |
at org.apache.spark.rdd.RDD.iterator(RDD.scala:337) | |
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) | |
at org.apache.spark.scheduler.Task.run(Task.scala:127) | |
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:469) | |
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1377) | |
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:472) | |
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) | |
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) | |
at java.lang.Thread.run(Thread.java:748) | |
Driver stacktrace: | |
at org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2117) | |
at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2066) | |
at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2065) | |
at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62) | |
at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55) | |
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49) | |
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2065) | |
at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1021) | |
at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1021) | |
at scala.Option.foreach(Option.scala:407) | |
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1021) | |
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2297) | |
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2246) | |
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2235) | |
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49) | |
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:823) | |
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2108) | |
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2129) | |
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2148) | |
at org.apache.spark.sql.execution.SparkPlan.executeTake(SparkPlan.scala:467) | |
at org.apache.spark.sql.execution.SparkPlan.executeTake(SparkPlan.scala:420) | |
at org.apache.spark.sql.execution.CollectLimitExec.executeCollect(limit.scala:47) | |
at org.apache.spark.sql.Dataset.collectFromPlan(Dataset.scala:3653) | |
at org.apache.spark.sql.Dataset.$anonfun$head$1(Dataset.scala:2695) | |
at org.apache.spark.sql.Dataset.$anonfun$withAction$1(Dataset.scala:3644) | |
at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103) | |
at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163) | |
at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90) | |
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:763) | |
at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64) | |
at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3642) | |
at org.apache.spark.sql.Dataset.head(Dataset.scala:2695) | |
at org.apache.spark.sql.Dataset.take(Dataset.scala:2902) | |
at org.apache.spark.sql.Dataset.getRows(Dataset.scala:300) | |
at org.apache.spark.sql.Dataset.showString(Dataset.scala:337) | |
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) | |
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) | |
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) | |
at java.lang.reflect.Method.invoke(Method.java:498) | |
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244) | |
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357) | |
at py4j.Gateway.invoke(Gateway.java:282) | |
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132) | |
at py4j.commands.CallCommand.execute(CallCommand.java:79) | |
at py4j.GatewayConnection.run(GatewayConnection.java:238) | |
at java.lang.Thread.run(Thread.java:748) | |
Caused by: org.apache.spark.api.python.PythonException: Traceback (most recent call last): | |
File "/.../python/lib/pyspark.zip/pyspark/worker.py", line 605, in main | |
process() | |
File "/.../python/lib/pyspark.zip/pyspark/worker.py", line 597, in process | |
serializer.dump_stream(out_iter, outfile) | |
File "/.../python/lib/pyspark.zip/pyspark/serializers.py", line 223, in dump_stream | |
self.serializer.dump_stream(self._batched(iterator), stream) | |
File "/.../python/lib/pyspark.zip/pyspark/serializers.py", line 141, in dump_stream | |
for obj in iterator: | |
File "/.../python/lib/pyspark.zip/pyspark/serializers.py", line 212, in _batched | |
for item in iterator: | |
File "/.../python/lib/pyspark.zip/pyspark/worker.py", line 450, in mapper | |
result = tuple(f(*[a[o] for o in arg_offsets]) for (arg_offsets, f) in udfs) | |
File "/.../python/lib/pyspark.zip/pyspark/worker.py", line 450, in <genexpr> | |
result = tuple(f(*[a[o] for o in arg_offsets]) for (arg_offsets, f) in udfs) | |
File "/.../python/lib/pyspark.zip/pyspark/worker.py", line 90, in <lambda> | |
return lambda *a: f(*a) | |
File "/.../python/lib/pyspark.zip/pyspark/util.py", line 107, in wrapper | |
return f(*args, **kwargs) | |
File "<stdin>", line 3, in divide_by_zero | |
ZeroDivisionError: division by zero | |
at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.handlePythonException(PythonRunner.scala:516) | |
at org.apache.spark.sql.execution.python.PythonUDFRunner$$anon$2.read(PythonUDFRunner.scala:81) | |
at org.apache.spark.sql.execution.python.PythonUDFRunner$$anon$2.read(PythonUDFRunner.scala:64) | |
at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:469) | |
at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37) | |
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:489) | |
at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:458) | |
at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:458) | |
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage2.processNext(Unknown Source) | |
at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43) | |
at org.apache.spark.sql.execution.WholeStageCodegenExec$$anon$1.hasNext(WholeStageCodegenExec.scala:753) | |
at org.apache.spark.sql.execution.SparkPlan.$anonfun$getByteArrayRdd$1(SparkPlan.scala:340) | |
at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2(RDD.scala:898) | |
at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2$adapted(RDD.scala:898) | |
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) | |
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373) | |
at org.apache.spark.rdd.RDD.iterator(RDD.scala:337) | |
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) | |
at org.apache.spark.scheduler.Task.run(Task.scala:127) | |
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:469) | |
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1377) | |
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:472) | |
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) | |
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) | |
... 1 more |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment