Created
October 7, 2019 12:16
-
-
Save rasmusab/aec0b035e98ad65b608a9f5c0fc1a05d to your computer and use it in GitHub Desktop.
A long spark error: org.apache.arrow.vector.util.OversizedAllocationException
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--------------------------------------------------------------------------- | |
Py4JJavaError Traceback (most recent call last) | |
in | |
----> 1 device_attack_result.count() | |
2 | |
3 | |
4 | |
/usr/lib/spark/python/pyspark/sql/dataframe.py in count(self) | |
520 2 | |
521 """ | |
--> 522 return int(self._jdf.count()) | |
523 | |
524 @ignore_unicode_prefix | |
/usr/lib/spark/python/lib/py4j-0.10.7-src.zip/py4j/java_gateway.py in __call__(self, *args) | |
1255 answer = self.gateway_client.send_command(command) | |
1256 return_value = get_return_value( | |
-> 1257 answer, self.gateway_client, self.target_id, self.name) | |
1258 | |
1259 for temp_arg in temp_args: | |
/usr/lib/spark/python/pyspark/sql/utils.py in deco(*a, **kw) | |
61 def deco(*a, **kw): | |
62 try: | |
---> 63 return f(*a, **kw) | |
64 except py4j.protocol.Py4JJavaError as e: | |
65 s = e.java_exception.toString() | |
/usr/lib/spark/python/lib/py4j-0.10.7-src.zip/py4j/protocol.py in get_return_value(answer, gateway_client, target_id, name) | |
326 raise Py4JJavaError( | |
327 "An error occurred while calling {0}{1}{2}.\n". | |
--> 328 format(target_id, ".", name), value) | |
329 else: | |
330 raise Py4JError( | |
Py4JJavaError: An error occurred while calling o818.count. | |
: org.apache.spark.SparkException: Job aborted due to stage failure: Task 102 in stage 27.0 failed 4 times, most recent failure: Lost task 102.3 in stage 27.0 (TID 3235, ip-172-31-111-163.ec2.internal, executor 1): org.apache.arrow.vector.util.OversizedAllocationException: Unable to expand the buffer | |
at org.apache.arrow.vector.BaseVariableWidthVector.reallocBufferHelper(BaseVariableWidthVector.java:547) | |
at org.apache.arrow.vector.BaseVariableWidthVector.reallocValidityAndOffsetBuffers(BaseVariableWidthVector.java:529) | |
at org.apache.arrow.vector.BaseVariableWidthVector.setValueCount(BaseVariableWidthVector.java:866) | |
at org.apache.spark.sql.execution.arrow.ArrowFieldWriter.finish(ArrowWriter.scala:127) | |
at org.apache.spark.sql.execution.arrow.ArrowWriter$$anonfun$finish$1.apply(ArrowWriter.scala:94) | |
at org.apache.spark.sql.execution.arrow.ArrowWriter$$anonfun$finish$1.apply(ArrowWriter.scala:94) | |
at scala.collection.IndexedSeqOptimized$class.foreach(IndexedSeqOptimized.scala:33) | |
at scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:186) | |
at org.apache.spark.sql.execution.arrow.ArrowWriter.finish(ArrowWriter.scala:94) | |
at org.apache.spark.sql.execution.python.ArrowPythonRunner$$anon$2$$anonfun$writeIteratorToStream$1.apply$mcV$sp(ArrowPythonRunner.scala:88) | |
at org.apache.spark.sql.execution.python.ArrowPythonRunner$$anon$2$$anonfun$writeIteratorToStream$1.apply(ArrowPythonRunner.scala:76) | |
at org.apache.spark.sql.execution.python.ArrowPythonRunner$$anon$2$$anonfun$writeIteratorToStream$1.apply(ArrowPythonRunner.scala:76) | |
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360) | |
at org.apache.spark.sql.execution.python.ArrowPythonRunner$$anon$2.writeIteratorToStream(ArrowPythonRunner.scala:96) | |
at org.apache.spark.api.python.BasePythonRunner$WriterThread$$anonfun$run$1.apply(PythonRunner.scala:345) | |
at org.apache.spark.util.Utils$.logUncaughtExceptions(Utils.scala:1945) | |
at org.apache.spark.api.python.BasePythonRunner$WriterThread.run(PythonRunner.scala:194) | |
Driver stacktrace: | |
at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:2041) | |
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:2029) | |
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:2028) | |
at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) | |
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48) | |
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2028) | |
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:966) | |
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:966) | |
at scala.Option.foreach(Option.scala:257) | |
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:966) | |
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2262) | |
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2211) | |
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2200) | |
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49) | |
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:777) | |
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2061) | |
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2082) | |
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2101) | |
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2126) | |
at org.apache.spark.rdd.RDD$$anonfun$collect$1.apply(RDD.scala:945) | |
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) | |
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112) | |
at org.apache.spark.rdd.RDD.withScope(RDD.scala:363) | |
at org.apache.spark.rdd.RDD.collect(RDD.scala:944) | |
at org.apache.spark.sql.execution.SparkPlan.executeCollect(SparkPlan.scala:335) | |
at org.apache.spark.sql.Dataset$$anonfun$count$1.apply(Dataset.scala:2830) | |
at org.apache.spark.sql.Dataset$$anonfun$count$1.apply(Dataset.scala:2829) | |
at org.apache.spark.sql.Dataset$$anonfun$53.apply(Dataset.scala:3364) | |
at org.apache.spark.sql.execution.SQLExecution$$anonfun$withNewExecutionId$1.apply(SQLExecution.scala:78) | |
at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:125) | |
at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:73) | |
at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3363) | |
at org.apache.spark.sql.Dataset.count(Dataset.scala:2829) | |
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) | |
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) | |
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) | |
at java.lang.reflect.Method.invoke(Method.java:498) | |
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244) | |
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357) | |
at py4j.Gateway.invoke(Gateway.java:282) | |
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132) | |
at py4j.commands.CallCommand.execute(CallCommand.java:79) | |
at py4j.GatewayConnection.run(GatewayConnection.java:238) | |
at java.lang.Thread.run(Thread.java:748) | |
Caused by: org.apache.arrow.vector.util.OversizedAllocationException: Unable to expand the buffer | |
at org.apache.arrow.vector.BaseVariableWidthVector.reallocBufferHelper(BaseVariableWidthVector.java:547) | |
at org.apache.arrow.vector.BaseVariableWidthVector.reallocValidityAndOffsetBuffers(BaseVariableWidthVector.java:529) | |
at org.apache.arrow.vector.BaseVariableWidthVector.setValueCount(BaseVariableWidthVector.java:866) | |
at org.apache.spark.sql.execution.arrow.ArrowFieldWriter.finish(ArrowWriter.scala:127) | |
at org.apache.spark.sql.execution.arrow.ArrowWriter$$anonfun$finish$1.apply(ArrowWriter.scala:94) | |
at org.apache.spark.sql.execution.arrow.ArrowWriter$$anonfun$finish$1.apply(ArrowWriter.scala:94) | |
at scala.collection.IndexedSeqOptimized$class.foreach(IndexedSeqOptimized.scala:33) | |
at scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:186) | |
at org.apache.spark.sql.execution.arrow.ArrowWriter.finish(ArrowWriter.scala:94) | |
at org.apache.spark.sql.execution.python.ArrowPythonRunner$$anon$2$$anonfun$writeIteratorToStream$1.apply$mcV$sp(ArrowPythonRunner.scala:88) | |
at org.apache.spark.sql.execution.python.ArrowPythonRunner$$anon$2$$anonfun$writeIteratorToStream$1.apply(ArrowPythonRunner.scala:76) | |
at org.apache.spark.sql.execution.python.ArrowPythonRunner$$anon$2$$anonfun$writeIteratorToStream$1.apply(ArrowPythonRunner.scala:76) | |
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360) | |
at org.apache.spark.sql.execution.python.ArrowPythonRunner$$anon$2.writeIteratorToStream(ArrowPythonRunner.scala:96) | |
at org.apache.spark.api.python.BasePythonRunner$WriterThread$$anonfun$run$1.apply(PythonRunner.scala:345) | |
at org.apache.spark.util.Utils$.logUncaughtExceptions(Utils.scala:1945) | |
at org.apache.spark.api.python.BasePythonRunner$WriterThread.run(PythonRunner.scala:194) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment