Created
January 14, 2019 10:31
-
-
Save afumagallireply/02d4c1355bc64a9d2129cdd6d0e9d9f3 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--------------------------------------------------------------------------- | |
Py4JJavaError Traceback (most recent call last) | |
<command-361418500356831> in <module>() | |
6 return df[df['first']=="9"] | |
7 | |
----> 8 return_df = df.groupby("second").apply(filter_pandas).count() | |
9 print(return_df) | |
/databricks/spark/python/pyspark/sql/dataframe.py in count(self) | |
455 2 | |
456 """ | |
--> 457 return int(self._jdf.count()) | |
458 | |
459 @ignore_unicode_prefix | |
/databricks/spark/python/lib/py4j-0.10.7-src.zip/py4j/java_gateway.py in __call__(self, *args) | |
1255 answer = self.gateway_client.send_command(command) | |
1256 return_value = get_return_value( | |
-> 1257 answer, self.gateway_client, self.target_id, self.name) | |
1258 | |
1259 for temp_arg in temp_args: | |
/databricks/spark/python/pyspark/sql/utils.py in deco(*a, **kw) | |
61 def deco(*a, **kw): | |
62 try: | |
---> 63 return f(*a, **kw) | |
64 except py4j.protocol.Py4JJavaError as e: | |
65 s = e.java_exception.toString() | |
/databricks/spark/python/lib/py4j-0.10.7-src.zip/py4j/protocol.py in get_return_value(answer, gateway_client, target_id, name) | |
326 raise Py4JJavaError( | |
327 "An error occurred while calling {0}{1}{2}.\n". | |
--> 328 format(target_id, ".", name), value) | |
329 else: | |
330 raise Py4JError( | |
Py4JJavaError: An error occurred while calling o256.count. | |
: org.apache.spark.SparkException: Job aborted due to stage failure: Task 18 in stage 83994.0 failed 4 times, most recent failure: Lost task 18.3 in stage 83994.0 (TID 2778705, 10.139.64.24, executor 11): org.apache.spark.SparkException: Python worker exited unexpectedly (crashed) | |
at org.apache.spark.api.python.BasePythonRunner$ReaderIterator$$anonfun$1.applyOrElse(PythonRunner.scala:352) | |
at org.apache.spark.api.python.BasePythonRunner$ReaderIterator$$anonfun$1.applyOrElse(PythonRunner.scala:341) | |
at scala.runtime.AbstractPartialFunction.apply(AbstractPartialFunction.scala:36) | |
at org.apache.spark.sql.execution.python.ArrowPythonRunner$$anon$1.read(ArrowPythonRunner.scala:180) | |
at org.apache.spark.sql.execution.python.ArrowPythonRunner$$anon$1.read(ArrowPythonRunner.scala:124) | |
at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:271) | |
at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37) | |
at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:439) | |
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) | |
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage3.agg_doAggregateWithoutKey_0$(Unknown Source) | |
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage3.processNext(Unknown Source) | |
at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43) | |
at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$10$$anon$1.hasNext(WholeStageCodegenExec.scala:620) | |
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) | |
at org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:125) | |
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:96) | |
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:53) | |
at org.apache.spark.scheduler.Task.run(Task.scala:112) | |
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:384) | |
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) | |
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) | |
at java.lang.Thread.run(Thread.java:748) | |
Caused by: java.io.EOFException | |
at java.io.DataInputStream.readInt(DataInputStream.java:392) | |
at org.apache.spark.sql.execution.python.ArrowPythonRunner$$anon$1.read(ArrowPythonRunner.scala:161) | |
... 18 more | |
Driver stacktrace: | |
at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1747) | |
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1735) | |
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1734) | |
at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) | |
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48) | |
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1734) | |
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:962) | |
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:962) | |
at scala.Option.foreach(Option.scala:257) | |
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:962) | |
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1970) | |
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1918) | |
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1906) | |
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49) | |
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:759) | |
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2141) | |
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2238) | |
at org.apache.spark.sql.execution.collect.Collector.runSparkJobs(Collector.scala:212) | |
at org.apache.spark.sql.execution.collect.Collector.collect(Collector.scala:247) | |
at org.apache.spark.sql.execution.collect.Collector$.collect(Collector.scala:64) | |
at org.apache.spark.sql.execution.collect.Collector$.collect(Collector.scala:70) | |
at org.apache.spark.sql.execution.ResultCacheManager.getOrComputeResult(ResultCacheManager.scala:497) | |
at org.apache.spark.sql.execution.ResultCacheManager.getOrComputeResult(ResultCacheManager.scala:469) | |
at org.apache.spark.sql.execution.SparkPlan.executeCollectResult(SparkPlan.scala:319) | |
at org.apache.spark.sql.execution.SparkPlan.executeCollect(SparkPlan.scala:295) | |
at org.apache.spark.sql.Dataset$$anonfun$count$1.apply(Dataset.scala:2810) | |
at org.apache.spark.sql.Dataset$$anonfun$count$1.apply(Dataset.scala:2809) | |
at org.apache.spark.sql.Dataset$$anonfun$53.apply(Dataset.scala:3334) | |
at org.apache.spark.sql.execution.SQLExecution$$anonfun$withCustomExecutionEnv$1.apply(SQLExecution.scala:89) | |
at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:175) | |
at org.apache.spark.sql.execution.SQLExecution$.withCustomExecutionEnv(SQLExecution.scala:84) | |
at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:126) | |
at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3333) | |
at org.apache.spark.sql.Dataset.count(Dataset.scala:2809) | |
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) | |
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) | |
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) | |
at java.lang.reflect.Method.invoke(Method.java:498) | |
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244) | |
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:380) | |
at py4j.Gateway.invoke(Gateway.java:295) | |
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132) | |
at py4j.commands.CallCommand.execute(CallCommand.java:79) | |
at py4j.GatewayConnection.run(GatewayConnection.java:251) | |
at java.lang.Thread.run(Thread.java:748) | |
Caused by: org.apache.spark.SparkException: Python worker exited unexpectedly (crashed) | |
at org.apache.spark.api.python.BasePythonRunner$ReaderIterator$$anonfun$1.applyOrElse(PythonRunner.scala:352) | |
at org.apache.spark.api.python.BasePythonRunner$ReaderIterator$$anonfun$1.applyOrElse(PythonRunner.scala:341) | |
at scala.runtime.AbstractPartialFunction.apply(AbstractPartialFunction.scala:36) | |
at org.apache.spark.sql.execution.python.ArrowPythonRunner$$anon$1.read(ArrowPythonRunner.scala:180) | |
at org.apache.spark.sql.execution.python.ArrowPythonRunner$$anon$1.read(ArrowPythonRunner.scala:124) | |
at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:271) | |
at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37) | |
at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:439) | |
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) | |
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage3.agg_doAggregateWithoutKey_0$(Unknown Source) | |
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage3.processNext(Unknown Source) | |
at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43) | |
at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$10$$anon$1.hasNext(WholeStageCodegenExec.scala:620) | |
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) | |
at org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:125) | |
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:96) | |
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:53) | |
at org.apache.spark.scheduler.Task.run(Task.scala:112) | |
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:384) | |
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) | |
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) | |
... 1 more | |
Caused by: java.io.EOFException | |
at java.io.DataInputStream.readInt(DataInputStream.java:392) | |
at org.apache.spark.sql.execution.python.ArrowPythonRunner$$anon$1.read(ArrowPythonRunner.scala:161) | |
... 18 more |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment