Skip to content

Instantly share code, notes, and snippets.

@zlobober
Created December 11, 2023 10:43
Show Gist options
  • Save zlobober/c5f782152cc19f10232730401b4b436f to your computer and use it in GitHub Desktop.
Save zlobober/c5f782152cc19f10232730401b4b436f to your computer and use it in GitHub Desktop.
"java.lang.IllegalStateException: Listing dynamic tables on executors is not supported"
023-12-11 09:12:30,530 - INFO - root - create_session
2023-12-11 09:12:35,285 - INFO - spyt.client - SPYT Cluster version: 1.72.0
2023-12-11 09:12:35,285 - INFO - spyt.client - SPYT library version: 1.72.0
2023-12-11 09:12:35,340 - INFO - spyt.client - SHS link: http://eu-north1-c-2ct2-30b.yt.infra.nemax.nebiuscloud.net:27011/history/app-20231211091233-1756/jobs/
2023-12-11 09:12:35,344 - INFO - root - Read data: //home/dwh/data/raw/yds/billing/billing_records/1d/billing-billing-enricher-var-output
2023-12-11 09:12:42,365 - ERROR - spyt.client - Shutdown SparkSession after exception: An error occurred while calling o205.load.
: org.apache.spark.SparkException: Job aborted due to stage failure: Task 29 in stage 0.0 failed 4 times, most recent failure: Lost task 29.3 in stage 0.0 (TID 76) (eu-north1-c-2ct2-5a.yt.infra.nemax.nebiuscloud.net executor 1): java.lang.IllegalStateException: Listing dynamic tables on executors is not supported
at tech.ytsaurus.spyt.fs.YtTableFileSystem.listStatus(YtTableFileSystem.scala:38)
at org.apache.spark.sql.v2.YtInMemoryFileIndex$.listLeafFiles(YtInMemoryFileIndex.scala:353)
at org.apache.spark.sql.v2.YtInMemoryFileIndex$.$anonfun$bulkListLeafFiles$6(YtInMemoryFileIndex.scala:268)
at scala.collection.immutable.Stream.map(Stream.scala:418)
at org.apache.spark.sql.v2.YtInMemoryFileIndex$.$anonfun$bulkListLeafFiles$4(YtInMemoryFileIndex.scala:260)
at org.apache.spark.rdd.RDD.$anonfun$mapPartitions$2(RDD.scala:863)
at org.apache.spark.rdd.RDD.$anonfun$mapPartitions$2$adapted(RDD.scala:863)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:337)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:337)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:131)
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1463)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
at java.base/java.lang.Thread.run(Thread.java:829)
Driver stacktrace:
at org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2454)
at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2403)
at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2402)
at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2402)
at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1160)
at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1160)
at scala.Option.foreach(Option.scala:407)
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1160)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2642)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2584)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2573)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:938)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2214)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2235)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2254)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2279)
at org.apache.spark.rdd.RDD.$anonfun$collect$1(RDD.scala:1030)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:414)
at org.apache.spark.rdd.RDD.collect(RDD.scala:1029)
at org.apache.spark.sql.v2.YtInMemoryFileIndex$.bulkListLeafFiles(YtInMemoryFileIndex.scala:271)
at org.apache.spark.sql.v2.YtInMemoryFileIndex$.listLeafFiles(YtInMemoryFileIndex.scala:391)
at org.apache.spark.sql.v2.YtInMemoryFileIndex$.$anonfun$bulkListLeafFiles$1(YtInMemoryFileIndex.scala:226)
at scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:286)
at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
at scala.collection.TraversableLike.map(TraversableLike.scala:286)
at scala.collection.TraversableLike.map$(TraversableLike.scala:279)
at scala.collection.AbstractTraversable.map(Traversable.scala:108)
at org.apache.spark.sql.v2.YtInMemoryFileIndex$.bulkListLeafFiles(YtInMemoryFileIndex.scala:218)
at org.apache.spark.sql.v2.YtInMemoryFileIndex.listLeafFiles(YtInMemoryFileIndex.scala:120)
at org.apache.spark.sql.v2.YtInMemoryFileIndex.refresh0(YtInMemoryFileIndex.scala:82)
at org.apache.spark.sql.v2.YtInMemoryFileIndex.<init>(YtInMemoryFileIndex.scala:47)
at org.apache.spark.sql.v2.YtTable.fileIndex$lzycompute(YtTable.scala:31)
at org.apache.spark.sql.v2.YtTable.fileIndex(YtTable.scala:23)
at org.apache.spark.sql.v2.YtTable.fileIndex(YtTable.scala:15)
at org.apache.spark.sql.execution.datasources.v2.FileTable.$anonfun$dataSchema$4(FileTable.scala:71)
at scala.Option.orElse(Option.scala:447)
at org.apache.spark.sql.execution.datasources.v2.FileTable.dataSchema$lzycompute(FileTable.scala:71)
at org.apache.spark.sql.execution.datasources.v2.FileTable.dataSchema(FileTable.scala:65)
at org.apache.spark.sql.execution.datasources.v2.FileTable.schema$lzycompute(FileTable.scala:85)
at org.apache.spark.sql.execution.datasources.v2.FileTable.schema(FileTable.scala:83)
at org.apache.spark.sql.execution.datasources.v2.FileDataSourceV2.inferSchema(FileDataSourceV2.scala:94)
at org.apache.spark.sql.execution.datasources.v2.FileDataSourceV2.inferSchema$(FileDataSourceV2.scala:92)
at org.apache.spark.sql.v2.YtDataSourceV2.inferSchema(YtDataSourceV2.scala:13)
at org.apache.spark.sql.execution.datasources.v2.DataSourceV2Utils$.getTableFromProvider(DataSourceV2Utils.scala:81)
at org.apache.spark.sql.DataFrameReader.$anonfun$load$1(DataFrameReader.scala:233)
at scala.Option.map(Option.scala:230)
at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:210)
at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.base/java.lang.reflect.Method.invoke(Method.java:566)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:282)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182)
at py4j.ClientServerConnection.run(ClientServerConnection.java:106)
at java.base/java.lang.Thread.run(Thread.java:829)
Caused by: java.lang.IllegalStateException: Listing dynamic tables on executors is not supported
at tech.ytsaurus.spyt.fs.YtTableFileSystem.listStatus(YtTableFileSystem.scala:38)
at org.apache.spark.sql.v2.YtInMemoryFileIndex$.listLeafFiles(YtInMemoryFileIndex.scala:353)
at org.apache.spark.sql.v2.YtInMemoryFileIndex$.$anonfun$bulkListLeafFiles$6(YtInMemoryFileIndex.scala:268)
at scala.collection.immutable.Stream.map(Stream.scala:418)
at org.apache.spark.sql.v2.YtInMemoryFileIndex$.$anonfun$bulkListLeafFiles$4(YtInMemoryFileIndex.scala:260)
at org.apache.spark.rdd.RDD.$anonfun$mapPartitions$2(RDD.scala:863)
at org.apache.spark.rdd.RDD.$anonfun$mapPartitions$2$adapted(RDD.scala:863)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:337)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:337)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:131)
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1463)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
... 1 more
Traceback (most recent call last):
File "/yt/hdd3/slots/1/sandbox/tmpfs/spark/work/driver-20231211091222-1756/spyt.zip/spyt/client.py", line 103, in spark_session
File "/yt/hdd3/slots/1/sandbox/tmpfs/spark/work/driver-20231211091222-1756/client.py", line 15, in _create_session
main()
File "/yt/hdd3/slots/1/sandbox/tmpfs/spark/work/driver-20231211091222-1756/base_client.py", line 80, in main
self.read_datasets()
File "/yt/hdd3/slots/1/sandbox/././tmpfs/spark/work/driver-20231211091222-1756/script.py", line 11, in read_datasets
df = self._read_dataset(source_path=source_path)
File "/yt/hdd3/slots/1/sandbox/tmpfs/spark/work/driver-20231211091222-1756/client.py", line 24, in _read_dataset
df = self.spark.read.option("parsing_type_v3", "true").yt(source_path)
File "/yt/hdd3/slots/1/sandbox/tmpfs/spark/python/lib/pyspark.zip/pyspark/sql/readwriter.py", line 306, in yt
File "/yt/hdd3/slots/1/sandbox/tmpfs/spark/python/lib/pyspark.zip/pyspark/sql/readwriter.py", line 162, in load
File "/yt/hdd3/slots/1/sandbox/tmpfs/spark/python/lib/py4j-0.10.9.5-src.zip/py4j/java_gateway.py", line 1321, in __call__
File "/yt/hdd3/slots/1/sandbox/tmpfs/spark/python/lib/pyspark.zip/pyspark/sql/utils.py", line 111, in deco
File "/yt/hdd3/slots/1/sandbox/tmpfs/spark/python/lib/py4j-0.10.9.5-src.zip/py4j/protocol.py", line 326, in get_return_value
py4j.protocol.Py4JJavaError: An error occurred while calling o205.load.
: org.apache.spark.SparkException: Job aborted due to stage failure: Task 29 in stage 0.0 failed 4 times, most recent failure: Lost task 29.3 in stage 0.0 (TID 76) (eu-north1-c-2ct2-5a.yt.infra.nemax.nebiuscloud.net executor 1): java.lang.IllegalStateException: Listing dynamic tables on executors is not supported
at tech.ytsaurus.spyt.fs.YtTableFileSystem.listStatus(YtTableFileSystem.scala:38)
at org.apache.spark.sql.v2.YtInMemoryFileIndex$.listLeafFiles(YtInMemoryFileIndex.scala:353)
at org.apache.spark.sql.v2.YtInMemoryFileIndex$.$anonfun$bulkListLeafFiles$6(YtInMemoryFileIndex.scala:268)
at scala.collection.immutable.Stream.map(Stream.scala:418)
at org.apache.spark.sql.v2.YtInMemoryFileIndex$.$anonfun$bulkListLeafFiles$4(YtInMemoryFileIndex.scala:260)
at org.apache.spark.rdd.RDD.$anonfun$mapPartitions$2(RDD.scala:863)
at org.apache.spark.rdd.RDD.$anonfun$mapPartitions$2$adapted(RDD.scala:863)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:337)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:337)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:131)
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1463)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
at java.base/java.lang.Thread.run(Thread.java:829)
Driver stacktrace:
at org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2454)
at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2403)
at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2402)
at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2402)
at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1160)
at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1160)
at scala.Option.foreach(Option.scala:407)
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1160)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2642)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2584)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2573)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:938)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2214)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2235)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2254)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2279)
at org.apache.spark.rdd.RDD.$anonfun$collect$1(RDD.scala:1030)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:414)
at org.apache.spark.rdd.RDD.collect(RDD.scala:1029)
at org.apache.spark.sql.v2.YtInMemoryFileIndex$.bulkListLeafFiles(YtInMemoryFileIndex.scala:271)
at org.apache.spark.sql.v2.YtInMemoryFileIndex$.listLeafFiles(YtInMemoryFileIndex.scala:391)
at org.apache.spark.sql.v2.YtInMemoryFileIndex$.$anonfun$bulkListLeafFiles$1(YtInMemoryFileIndex.scala:226)
at scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:286)
at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
at scala.collection.TraversableLike.map(TraversableLike.scala:286)
at scala.collection.TraversableLike.map$(TraversableLike.scala:279)
at scala.collection.AbstractTraversable.map(Traversable.scala:108)
at org.apache.spark.sql.v2.YtInMemoryFileIndex$.bulkListLeafFiles(YtInMemoryFileIndex.scala:218)
at org.apache.spark.sql.v2.YtInMemoryFileIndex.listLeafFiles(YtInMemoryFileIndex.scala:120)
at org.apache.spark.sql.v2.YtInMemoryFileIndex.refresh0(YtInMemoryFileIndex.scala:82)
at org.apache.spark.sql.v2.YtInMemoryFileIndex.<init>(YtInMemoryFileIndex.scala:47)
at org.apache.spark.sql.v2.YtTable.fileIndex$lzycompute(YtTable.scala:31)
at org.apache.spark.sql.v2.YtTable.fileIndex(YtTable.scala:23)
at org.apache.spark.sql.v2.YtTable.fileIndex(YtTable.scala:15)
at org.apache.spark.sql.execution.datasources.v2.FileTable.$anonfun$dataSchema$4(FileTable.scala:71)
at scala.Option.orElse(Option.scala:447)
at org.apache.spark.sql.execution.datasources.v2.FileTable.dataSchema$lzycompute(FileTable.scala:71)
at org.apache.spark.sql.execution.datasources.v2.FileTable.dataSchema(FileTable.scala:65)
at org.apache.spark.sql.execution.datasources.v2.FileTable.schema$lzycompute(FileTable.scala:85)
at org.apache.spark.sql.execution.datasources.v2.FileTable.schema(FileTable.scala:83)
at org.apache.spark.sql.execution.datasources.v2.FileDataSourceV2.inferSchema(FileDataSourceV2.scala:94)
at org.apache.spark.sql.execution.datasources.v2.FileDataSourceV2.inferSchema$(FileDataSourceV2.scala:92)
at org.apache.spark.sql.v2.YtDataSourceV2.inferSchema(YtDataSourceV2.scala:13)
at org.apache.spark.sql.execution.datasources.v2.DataSourceV2Utils$.getTableFromProvider(DataSourceV2Utils.scala:81)
at org.apache.spark.sql.DataFrameReader.$anonfun$load$1(DataFrameReader.scala:233)
at scala.Option.map(Option.scala:230)
at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:210)
at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.base/java.lang.reflect.Method.invoke(Method.java:566)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:282)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182)
at py4j.ClientServerConnection.run(ClientServerConnection.java:106)
at java.base/java.lang.Thread.run(Thread.java:829)
Caused by: java.lang.IllegalStateException: Listing dynamic tables on executors is not supported
at tech.ytsaurus.spyt.fs.YtTableFileSystem.listStatus(YtTableFileSystem.scala:38)
at org.apache.spark.sql.v2.YtInMemoryFileIndex$.listLeafFiles(YtInMemoryFileIndex.scala:353)
at org.apache.spark.sql.v2.YtInMemoryFileIndex$.$anonfun$bulkListLeafFiles$6(YtInMemoryFileIndex.scala:268)
at scala.collection.immutable.Stream.map(Stream.scala:418)
at org.apache.spark.sql.v2.YtInMemoryFileIndex$.$anonfun$bulkListLeafFiles$4(YtInMemoryFileIndex.scala:260)
at org.apache.spark.rdd.RDD.$anonfun$mapPartitions$2(RDD.scala:863)
at org.apache.spark.rdd.RDD.$anonfun$mapPartitions$2$adapted(RDD.scala:863)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:337)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:337)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:131)
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1463)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
... 1 more
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/yt/hdd3/slots/1/sandbox/././tmpfs/spark/work/driver-20231211091222-1756/script.py", line 31, in <module>
main()
File "/yt/hdd3/slots/1/sandbox/././tmpfs/spark/work/driver-20231211091222-1756/script.py", line 27, in main
client.run()
File "/yt/hdd3/slots/1/sandbox/tmpfs/spark/work/driver-20231211091222-1756/base_client.py", line 89, in run
self._create_session(main=self.main)
File "/yt/hdd3/slots/1/sandbox/tmpfs/spark/work/driver-20231211091222-1756/client.py", line 10, in _create_session
with spark_session() as spark:
File "/usr/lib/python3.10/contextlib.py", line 153, in __exit__
self.gen.throw(typ, value, traceback)
File "/yt/hdd3/slots/1/sandbox/tmpfs/spark/work/driver-20231211091222-1756/spyt.zip/spyt/client.py", line 110, in spark_session
File "/yt/hdd3/slots/1/sandbox/tmpfs/spark/work/driver-20231211091222-1756/spyt.zip/spyt/client.py", line 375, in stop
File "/yt/hdd3/slots/1/sandbox/tmpfs/spark/work/driver-20231211091222-1756/spyt.zip/spyt/client.py", line 103, in spark_session
File "/yt/hdd3/slots/1/sandbox/tmpfs/spark/work/driver-20231211091222-1756/client.py", line 15, in _create_session
main()
File "/yt/hdd3/slots/1/sandbox/tmpfs/spark/work/driver-20231211091222-1756/base_client.py", line 80, in main
self.read_datasets()
File "/yt/hdd3/slots/1/sandbox/././tmpfs/spark/work/driver-20231211091222-1756/script.py", line 11, in read_datasets
df = self._read_dataset(source_path=source_path)
File "/yt/hdd3/slots/1/sandbox/tmpfs/spark/work/driver-20231211091222-1756/client.py", line 24, in _read_dataset
df = self.spark.read.option("parsing_type_v3", "true").yt(source_path)
File "/yt/hdd3/slots/1/sandbox/tmpfs/spark/python/lib/pyspark.zip/pyspark/sql/readwriter.py", line 306, in yt
File "/yt/hdd3/slots/1/sandbox/tmpfs/spark/python/lib/pyspark.zip/pyspark/sql/readwriter.py", line 162, in load
File "/yt/hdd3/slots/1/sandbox/tmpfs/spark/python/lib/py4j-0.10.9.5-src.zip/py4j/java_gateway.py", line 1321, in __call__
File "/yt/hdd3/slots/1/sandbox/tmpfs/spark/python/lib/pyspark.zip/pyspark/sql/utils.py", line 111, in deco
File "/yt/hdd3/slots/1/sandbox/tmpfs/spark/python/lib/py4j-0.10.9.5-src.zip/py4j/protocol.py", line 326, in get_return_value
spyt.client.CachedPy4JError: An error occurred while calling o205.load.
: org.apache.spark.SparkException: Job aborted due to stage failure: Task 29 in stage 0.0 failed 4 times, most recent failure: Lost task 29.3 in stage 0.0 (TID 76) (eu-north1-c-2ct2-5a.yt.infra.nemax.nebiuscloud.net executor 1): java.lang.IllegalStateException: Listing dynamic tables on executors is not supported
at tech.ytsaurus.spyt.fs.YtTableFileSystem.listStatus(YtTableFileSystem.scala:38)
at org.apache.spark.sql.v2.YtInMemoryFileIndex$.listLeafFiles(YtInMemoryFileIndex.scala:353)
at org.apache.spark.sql.v2.YtInMemoryFileIndex$.$anonfun$bulkListLeafFiles$6(YtInMemoryFileIndex.scala:268)
at scala.collection.immutable.Stream.map(Stream.scala:418)
at org.apache.spark.sql.v2.YtInMemoryFileIndex$.$anonfun$bulkListLeafFiles$4(YtInMemoryFileIndex.scala:260)
at org.apache.spark.rdd.RDD.$anonfun$mapPartitions$2(RDD.scala:863)
at org.apache.spark.rdd.RDD.$anonfun$mapPartitions$2$adapted(RDD.scala:863)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:337)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:337)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:131)
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1463)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
at java.base/java.lang.Thread.run(Thread.java:829)
Driver stacktrace:
at org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2454)
at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2403)
at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2402)
at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2402)
at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1160)
at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1160)
at scala.Option.foreach(Option.scala:407)
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1160)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2642)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2584)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2573)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:938)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2214)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2235)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2254)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2279)
at org.apache.spark.rdd.RDD.$anonfun$collect$1(RDD.scala:1030)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:414)
at org.apache.spark.rdd.RDD.collect(RDD.scala:1029)
at org.apache.spark.sql.v2.YtInMemoryFileIndex$.bulkListLeafFiles(YtInMemoryFileIndex.scala:271)
at org.apache.spark.sql.v2.YtInMemoryFileIndex$.listLeafFiles(YtInMemoryFileIndex.scala:391)
at org.apache.spark.sql.v2.YtInMemoryFileIndex$.$anonfun$bulkListLeafFiles$1(YtInMemoryFileIndex.scala:226)
at scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:286)
at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
at scala.collection.TraversableLike.map(TraversableLike.scala:286)
at scala.collection.TraversableLike.map$(TraversableLike.scala:279)
at scala.collection.AbstractTraversable.map(Traversable.scala:108)
at org.apache.spark.sql.v2.YtInMemoryFileIndex$.bulkListLeafFiles(YtInMemoryFileIndex.scala:218)
at org.apache.spark.sql.v2.YtInMemoryFileIndex.listLeafFiles(YtInMemoryFileIndex.scala:120)
at org.apache.spark.sql.v2.YtInMemoryFileIndex.refresh0(YtInMemoryFileIndex.scala:82)
at org.apache.spark.sql.v2.YtInMemoryFileIndex.<init>(YtInMemoryFileIndex.scala:47)
at org.apache.spark.sql.v2.YtTable.fileIndex$lzycompute(YtTable.scala:31)
at org.apache.spark.sql.v2.YtTable.fileIndex(YtTable.scala:23)
at org.apache.spark.sql.v2.YtTable.fileIndex(YtTable.scala:15)
at org.apache.spark.sql.execution.datasources.v2.FileTable.$anonfun$dataSchema$4(FileTable.scala:71)
at scala.Option.orElse(Option.scala:447)
at org.apache.spark.sql.execution.datasources.v2.FileTable.dataSchema$lzycompute(FileTable.scala:71)
at org.apache.spark.sql.execution.datasources.v2.FileTable.dataSchema(FileTable.scala:65)
at org.apache.spark.sql.execution.datasources.v2.FileTable.schema$lzycompute(FileTable.scala:85)
at org.apache.spark.sql.execution.datasources.v2.FileTable.schema(FileTable.scala:83)
at org.apache.spark.sql.execution.datasources.v2.FileDataSourceV2.inferSchema(FileDataSourceV2.scala:94)
at org.apache.spark.sql.execution.datasources.v2.FileDataSourceV2.inferSchema$(FileDataSourceV2.scala:92)
at org.apache.spark.sql.v2.YtDataSourceV2.inferSchema(YtDataSourceV2.scala:13)
at org.apache.spark.sql.execution.datasources.v2.DataSourceV2Utils$.getTableFromProvider(DataSourceV2Utils.scala:81)
at org.apache.spark.sql.DataFrameReader.$anonfun$load$1(DataFrameReader.scala:233)
at scala.Option.map(Option.scala:230)
at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:210)
at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.base/java.lang.reflect.Method.invoke(Method.java:566)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:282)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182)
at py4j.ClientServerConnection.run(ClientServerConnection.java:106)
at java.base/java.lang.Thread.run(Thread.java:829)
Caused by: java.lang.IllegalStateException: Listing dynamic tables on executors is not supported
at tech.ytsaurus.spyt.fs.YtTableFileSystem.listStatus(YtTableFileSystem.scala:38)
at org.apache.spark.sql.v2.YtInMemoryFileIndex$.listLeafFiles(YtInMemoryFileIndex.scala:353)
at org.apache.spark.sql.v2.YtInMemoryFileIndex$.$anonfun$bulkListLeafFiles$6(YtInMemoryFileIndex.scala:268)
at scala.collection.immutable.Stream.map(Stream.scala:418)
at org.apache.spark.sql.v2.YtInMemoryFileIndex$.$anonfun$bulkListLeafFiles$4(YtInMemoryFileIndex.scala:260)
at org.apache.spark.rdd.RDD.$anonfun$mapPartitions$2(RDD.scala:863)
at org.apache.spark.rdd.RDD.$anonfun$mapPartitions$2$adapted(RDD.scala:863)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:337)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:337)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:131)
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1463)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
... 1 more
2023-12-11 09:12:42,847 - INFO - py4j.clientserver - Closing down clientserver connection
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment