Created
December 11, 2023 10:43
-
-
Save zlobober/c5f782152cc19f10232730401b4b436f to your computer and use it in GitHub Desktop.
"java.lang.IllegalStateException: Listing dynamic tables on executors is not supported"
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
023-12-11 09:12:30,530 - INFO - root - create_session | |
2023-12-11 09:12:35,285 - INFO - spyt.client - SPYT Cluster version: 1.72.0 | |
2023-12-11 09:12:35,285 - INFO - spyt.client - SPYT library version: 1.72.0 | |
2023-12-11 09:12:35,340 - INFO - spyt.client - SHS link: http://eu-north1-c-2ct2-30b.yt.infra.nemax.nebiuscloud.net:27011/history/app-20231211091233-1756/jobs/ | |
2023-12-11 09:12:35,344 - INFO - root - Read data: //home/dwh/data/raw/yds/billing/billing_records/1d/billing-billing-enricher-var-output | |
2023-12-11 09:12:42,365 - ERROR - spyt.client - Shutdown SparkSession after exception: An error occurred while calling o205.load. | |
: org.apache.spark.SparkException: Job aborted due to stage failure: Task 29 in stage 0.0 failed 4 times, most recent failure: Lost task 29.3 in stage 0.0 (TID 76) (eu-north1-c-2ct2-5a.yt.infra.nemax.nebiuscloud.net executor 1): java.lang.IllegalStateException: Listing dynamic tables on executors is not supported | |
at tech.ytsaurus.spyt.fs.YtTableFileSystem.listStatus(YtTableFileSystem.scala:38) | |
at org.apache.spark.sql.v2.YtInMemoryFileIndex$.listLeafFiles(YtInMemoryFileIndex.scala:353) | |
at org.apache.spark.sql.v2.YtInMemoryFileIndex$.$anonfun$bulkListLeafFiles$6(YtInMemoryFileIndex.scala:268) | |
at scala.collection.immutable.Stream.map(Stream.scala:418) | |
at org.apache.spark.sql.v2.YtInMemoryFileIndex$.$anonfun$bulkListLeafFiles$4(YtInMemoryFileIndex.scala:260) | |
at org.apache.spark.rdd.RDD.$anonfun$mapPartitions$2(RDD.scala:863) | |
at org.apache.spark.rdd.RDD.$anonfun$mapPartitions$2$adapted(RDD.scala:863) | |
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) | |
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373) | |
at org.apache.spark.rdd.RDD.iterator(RDD.scala:337) | |
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) | |
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373) | |
at org.apache.spark.rdd.RDD.iterator(RDD.scala:337) | |
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) | |
at org.apache.spark.scheduler.Task.run(Task.scala:131) | |
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506) | |
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1463) | |
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509) | |
at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128) | |
at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628) | |
at java.base/java.lang.Thread.run(Thread.java:829) | |
Driver stacktrace: | |
at org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2454) | |
at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2403) | |
at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2402) | |
at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62) | |
at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55) | |
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49) | |
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2402) | |
at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1160) | |
at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1160) | |
at scala.Option.foreach(Option.scala:407) | |
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1160) | |
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2642) | |
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2584) | |
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2573) | |
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49) | |
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:938) | |
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2214) | |
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2235) | |
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2254) | |
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2279) | |
at org.apache.spark.rdd.RDD.$anonfun$collect$1(RDD.scala:1030) | |
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) | |
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112) | |
at org.apache.spark.rdd.RDD.withScope(RDD.scala:414) | |
at org.apache.spark.rdd.RDD.collect(RDD.scala:1029) | |
at org.apache.spark.sql.v2.YtInMemoryFileIndex$.bulkListLeafFiles(YtInMemoryFileIndex.scala:271) | |
at org.apache.spark.sql.v2.YtInMemoryFileIndex$.listLeafFiles(YtInMemoryFileIndex.scala:391) | |
at org.apache.spark.sql.v2.YtInMemoryFileIndex$.$anonfun$bulkListLeafFiles$1(YtInMemoryFileIndex.scala:226) | |
at scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:286) | |
at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62) | |
at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55) | |
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49) | |
at scala.collection.TraversableLike.map(TraversableLike.scala:286) | |
at scala.collection.TraversableLike.map$(TraversableLike.scala:279) | |
at scala.collection.AbstractTraversable.map(Traversable.scala:108) | |
at org.apache.spark.sql.v2.YtInMemoryFileIndex$.bulkListLeafFiles(YtInMemoryFileIndex.scala:218) | |
at org.apache.spark.sql.v2.YtInMemoryFileIndex.listLeafFiles(YtInMemoryFileIndex.scala:120) | |
at org.apache.spark.sql.v2.YtInMemoryFileIndex.refresh0(YtInMemoryFileIndex.scala:82) | |
at org.apache.spark.sql.v2.YtInMemoryFileIndex.<init>(YtInMemoryFileIndex.scala:47) | |
at org.apache.spark.sql.v2.YtTable.fileIndex$lzycompute(YtTable.scala:31) | |
at org.apache.spark.sql.v2.YtTable.fileIndex(YtTable.scala:23) | |
at org.apache.spark.sql.v2.YtTable.fileIndex(YtTable.scala:15) | |
at org.apache.spark.sql.execution.datasources.v2.FileTable.$anonfun$dataSchema$4(FileTable.scala:71) | |
at scala.Option.orElse(Option.scala:447) | |
at org.apache.spark.sql.execution.datasources.v2.FileTable.dataSchema$lzycompute(FileTable.scala:71) | |
at org.apache.spark.sql.execution.datasources.v2.FileTable.dataSchema(FileTable.scala:65) | |
at org.apache.spark.sql.execution.datasources.v2.FileTable.schema$lzycompute(FileTable.scala:85) | |
at org.apache.spark.sql.execution.datasources.v2.FileTable.schema(FileTable.scala:83) | |
at org.apache.spark.sql.execution.datasources.v2.FileDataSourceV2.inferSchema(FileDataSourceV2.scala:94) | |
at org.apache.spark.sql.execution.datasources.v2.FileDataSourceV2.inferSchema$(FileDataSourceV2.scala:92) | |
at org.apache.spark.sql.v2.YtDataSourceV2.inferSchema(YtDataSourceV2.scala:13) | |
at org.apache.spark.sql.execution.datasources.v2.DataSourceV2Utils$.getTableFromProvider(DataSourceV2Utils.scala:81) | |
at org.apache.spark.sql.DataFrameReader.$anonfun$load$1(DataFrameReader.scala:233) | |
at scala.Option.map(Option.scala:230) | |
at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:210) | |
at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method) | |
at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) | |
at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) | |
at java.base/java.lang.reflect.Method.invoke(Method.java:566) | |
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244) | |
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357) | |
at py4j.Gateway.invoke(Gateway.java:282) | |
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132) | |
at py4j.commands.CallCommand.execute(CallCommand.java:79) | |
at py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182) | |
at py4j.ClientServerConnection.run(ClientServerConnection.java:106) | |
at java.base/java.lang.Thread.run(Thread.java:829) | |
Caused by: java.lang.IllegalStateException: Listing dynamic tables on executors is not supported | |
at tech.ytsaurus.spyt.fs.YtTableFileSystem.listStatus(YtTableFileSystem.scala:38) | |
at org.apache.spark.sql.v2.YtInMemoryFileIndex$.listLeafFiles(YtInMemoryFileIndex.scala:353) | |
at org.apache.spark.sql.v2.YtInMemoryFileIndex$.$anonfun$bulkListLeafFiles$6(YtInMemoryFileIndex.scala:268) | |
at scala.collection.immutable.Stream.map(Stream.scala:418) | |
at org.apache.spark.sql.v2.YtInMemoryFileIndex$.$anonfun$bulkListLeafFiles$4(YtInMemoryFileIndex.scala:260) | |
at org.apache.spark.rdd.RDD.$anonfun$mapPartitions$2(RDD.scala:863) | |
at org.apache.spark.rdd.RDD.$anonfun$mapPartitions$2$adapted(RDD.scala:863) | |
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) | |
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373) | |
at org.apache.spark.rdd.RDD.iterator(RDD.scala:337) | |
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) | |
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373) | |
at org.apache.spark.rdd.RDD.iterator(RDD.scala:337) | |
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) | |
at org.apache.spark.scheduler.Task.run(Task.scala:131) | |
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506) | |
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1463) | |
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509) | |
at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128) | |
at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628) | |
... 1 more | |
Traceback (most recent call last): | |
File "/yt/hdd3/slots/1/sandbox/tmpfs/spark/work/driver-20231211091222-1756/spyt.zip/spyt/client.py", line 103, in spark_session | |
File "/yt/hdd3/slots/1/sandbox/tmpfs/spark/work/driver-20231211091222-1756/client.py", line 15, in _create_session | |
main() | |
File "/yt/hdd3/slots/1/sandbox/tmpfs/spark/work/driver-20231211091222-1756/base_client.py", line 80, in main | |
self.read_datasets() | |
File "/yt/hdd3/slots/1/sandbox/././tmpfs/spark/work/driver-20231211091222-1756/script.py", line 11, in read_datasets | |
df = self._read_dataset(source_path=source_path) | |
File "/yt/hdd3/slots/1/sandbox/tmpfs/spark/work/driver-20231211091222-1756/client.py", line 24, in _read_dataset | |
df = self.spark.read.option("parsing_type_v3", "true").yt(source_path) | |
File "/yt/hdd3/slots/1/sandbox/tmpfs/spark/python/lib/pyspark.zip/pyspark/sql/readwriter.py", line 306, in yt | |
File "/yt/hdd3/slots/1/sandbox/tmpfs/spark/python/lib/pyspark.zip/pyspark/sql/readwriter.py", line 162, in load | |
File "/yt/hdd3/slots/1/sandbox/tmpfs/spark/python/lib/py4j-0.10.9.5-src.zip/py4j/java_gateway.py", line 1321, in __call__ | |
File "/yt/hdd3/slots/1/sandbox/tmpfs/spark/python/lib/pyspark.zip/pyspark/sql/utils.py", line 111, in deco | |
File "/yt/hdd3/slots/1/sandbox/tmpfs/spark/python/lib/py4j-0.10.9.5-src.zip/py4j/protocol.py", line 326, in get_return_value | |
py4j.protocol.Py4JJavaError: An error occurred while calling o205.load. | |
: org.apache.spark.SparkException: Job aborted due to stage failure: Task 29 in stage 0.0 failed 4 times, most recent failure: Lost task 29.3 in stage 0.0 (TID 76) (eu-north1-c-2ct2-5a.yt.infra.nemax.nebiuscloud.net executor 1): java.lang.IllegalStateException: Listing dynamic tables on executors is not supported | |
at tech.ytsaurus.spyt.fs.YtTableFileSystem.listStatus(YtTableFileSystem.scala:38) | |
at org.apache.spark.sql.v2.YtInMemoryFileIndex$.listLeafFiles(YtInMemoryFileIndex.scala:353) | |
at org.apache.spark.sql.v2.YtInMemoryFileIndex$.$anonfun$bulkListLeafFiles$6(YtInMemoryFileIndex.scala:268) | |
at scala.collection.immutable.Stream.map(Stream.scala:418) | |
at org.apache.spark.sql.v2.YtInMemoryFileIndex$.$anonfun$bulkListLeafFiles$4(YtInMemoryFileIndex.scala:260) | |
at org.apache.spark.rdd.RDD.$anonfun$mapPartitions$2(RDD.scala:863) | |
at org.apache.spark.rdd.RDD.$anonfun$mapPartitions$2$adapted(RDD.scala:863) | |
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) | |
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373) | |
at org.apache.spark.rdd.RDD.iterator(RDD.scala:337) | |
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) | |
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373) | |
at org.apache.spark.rdd.RDD.iterator(RDD.scala:337) | |
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) | |
at org.apache.spark.scheduler.Task.run(Task.scala:131) | |
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506) | |
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1463) | |
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509) | |
at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128) | |
at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628) | |
at java.base/java.lang.Thread.run(Thread.java:829) | |
Driver stacktrace: | |
at org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2454) | |
at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2403) | |
at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2402) | |
at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62) | |
at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55) | |
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49) | |
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2402) | |
at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1160) | |
at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1160) | |
at scala.Option.foreach(Option.scala:407) | |
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1160) | |
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2642) | |
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2584) | |
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2573) | |
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49) | |
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:938) | |
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2214) | |
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2235) | |
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2254) | |
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2279) | |
at org.apache.spark.rdd.RDD.$anonfun$collect$1(RDD.scala:1030) | |
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) | |
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112) | |
at org.apache.spark.rdd.RDD.withScope(RDD.scala:414) | |
at org.apache.spark.rdd.RDD.collect(RDD.scala:1029) | |
at org.apache.spark.sql.v2.YtInMemoryFileIndex$.bulkListLeafFiles(YtInMemoryFileIndex.scala:271) | |
at org.apache.spark.sql.v2.YtInMemoryFileIndex$.listLeafFiles(YtInMemoryFileIndex.scala:391) | |
at org.apache.spark.sql.v2.YtInMemoryFileIndex$.$anonfun$bulkListLeafFiles$1(YtInMemoryFileIndex.scala:226) | |
at scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:286) | |
at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62) | |
at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55) | |
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49) | |
at scala.collection.TraversableLike.map(TraversableLike.scala:286) | |
at scala.collection.TraversableLike.map$(TraversableLike.scala:279) | |
at scala.collection.AbstractTraversable.map(Traversable.scala:108) | |
at org.apache.spark.sql.v2.YtInMemoryFileIndex$.bulkListLeafFiles(YtInMemoryFileIndex.scala:218) | |
at org.apache.spark.sql.v2.YtInMemoryFileIndex.listLeafFiles(YtInMemoryFileIndex.scala:120) | |
at org.apache.spark.sql.v2.YtInMemoryFileIndex.refresh0(YtInMemoryFileIndex.scala:82) | |
at org.apache.spark.sql.v2.YtInMemoryFileIndex.<init>(YtInMemoryFileIndex.scala:47) | |
at org.apache.spark.sql.v2.YtTable.fileIndex$lzycompute(YtTable.scala:31) | |
at org.apache.spark.sql.v2.YtTable.fileIndex(YtTable.scala:23) | |
at org.apache.spark.sql.v2.YtTable.fileIndex(YtTable.scala:15) | |
at org.apache.spark.sql.execution.datasources.v2.FileTable.$anonfun$dataSchema$4(FileTable.scala:71) | |
at scala.Option.orElse(Option.scala:447) | |
at org.apache.spark.sql.execution.datasources.v2.FileTable.dataSchema$lzycompute(FileTable.scala:71) | |
at org.apache.spark.sql.execution.datasources.v2.FileTable.dataSchema(FileTable.scala:65) | |
at org.apache.spark.sql.execution.datasources.v2.FileTable.schema$lzycompute(FileTable.scala:85) | |
at org.apache.spark.sql.execution.datasources.v2.FileTable.schema(FileTable.scala:83) | |
at org.apache.spark.sql.execution.datasources.v2.FileDataSourceV2.inferSchema(FileDataSourceV2.scala:94) | |
at org.apache.spark.sql.execution.datasources.v2.FileDataSourceV2.inferSchema$(FileDataSourceV2.scala:92) | |
at org.apache.spark.sql.v2.YtDataSourceV2.inferSchema(YtDataSourceV2.scala:13) | |
at org.apache.spark.sql.execution.datasources.v2.DataSourceV2Utils$.getTableFromProvider(DataSourceV2Utils.scala:81) | |
at org.apache.spark.sql.DataFrameReader.$anonfun$load$1(DataFrameReader.scala:233) | |
at scala.Option.map(Option.scala:230) | |
at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:210) | |
at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method) | |
at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) | |
at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) | |
at java.base/java.lang.reflect.Method.invoke(Method.java:566) | |
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244) | |
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357) | |
at py4j.Gateway.invoke(Gateway.java:282) | |
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132) | |
at py4j.commands.CallCommand.execute(CallCommand.java:79) | |
at py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182) | |
at py4j.ClientServerConnection.run(ClientServerConnection.java:106) | |
at java.base/java.lang.Thread.run(Thread.java:829) | |
Caused by: java.lang.IllegalStateException: Listing dynamic tables on executors is not supported | |
at tech.ytsaurus.spyt.fs.YtTableFileSystem.listStatus(YtTableFileSystem.scala:38) | |
at org.apache.spark.sql.v2.YtInMemoryFileIndex$.listLeafFiles(YtInMemoryFileIndex.scala:353) | |
at org.apache.spark.sql.v2.YtInMemoryFileIndex$.$anonfun$bulkListLeafFiles$6(YtInMemoryFileIndex.scala:268) | |
at scala.collection.immutable.Stream.map(Stream.scala:418) | |
at org.apache.spark.sql.v2.YtInMemoryFileIndex$.$anonfun$bulkListLeafFiles$4(YtInMemoryFileIndex.scala:260) | |
at org.apache.spark.rdd.RDD.$anonfun$mapPartitions$2(RDD.scala:863) | |
at org.apache.spark.rdd.RDD.$anonfun$mapPartitions$2$adapted(RDD.scala:863) | |
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) | |
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373) | |
at org.apache.spark.rdd.RDD.iterator(RDD.scala:337) | |
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) | |
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373) | |
at org.apache.spark.rdd.RDD.iterator(RDD.scala:337) | |
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) | |
at org.apache.spark.scheduler.Task.run(Task.scala:131) | |
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506) | |
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1463) | |
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509) | |
at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128) | |
at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628) | |
... 1 more | |
During handling of the above exception, another exception occurred: | |
Traceback (most recent call last): | |
File "/yt/hdd3/slots/1/sandbox/././tmpfs/spark/work/driver-20231211091222-1756/script.py", line 31, in <module> | |
main() | |
File "/yt/hdd3/slots/1/sandbox/././tmpfs/spark/work/driver-20231211091222-1756/script.py", line 27, in main | |
client.run() | |
File "/yt/hdd3/slots/1/sandbox/tmpfs/spark/work/driver-20231211091222-1756/base_client.py", line 89, in run | |
self._create_session(main=self.main) | |
File "/yt/hdd3/slots/1/sandbox/tmpfs/spark/work/driver-20231211091222-1756/client.py", line 10, in _create_session | |
with spark_session() as spark: | |
File "/usr/lib/python3.10/contextlib.py", line 153, in __exit__ | |
self.gen.throw(typ, value, traceback) | |
File "/yt/hdd3/slots/1/sandbox/tmpfs/spark/work/driver-20231211091222-1756/spyt.zip/spyt/client.py", line 110, in spark_session | |
File "/yt/hdd3/slots/1/sandbox/tmpfs/spark/work/driver-20231211091222-1756/spyt.zip/spyt/client.py", line 375, in stop | |
File "/yt/hdd3/slots/1/sandbox/tmpfs/spark/work/driver-20231211091222-1756/spyt.zip/spyt/client.py", line 103, in spark_session | |
File "/yt/hdd3/slots/1/sandbox/tmpfs/spark/work/driver-20231211091222-1756/client.py", line 15, in _create_session | |
main() | |
File "/yt/hdd3/slots/1/sandbox/tmpfs/spark/work/driver-20231211091222-1756/base_client.py", line 80, in main | |
self.read_datasets() | |
File "/yt/hdd3/slots/1/sandbox/././tmpfs/spark/work/driver-20231211091222-1756/script.py", line 11, in read_datasets | |
df = self._read_dataset(source_path=source_path) | |
File "/yt/hdd3/slots/1/sandbox/tmpfs/spark/work/driver-20231211091222-1756/client.py", line 24, in _read_dataset | |
df = self.spark.read.option("parsing_type_v3", "true").yt(source_path) | |
File "/yt/hdd3/slots/1/sandbox/tmpfs/spark/python/lib/pyspark.zip/pyspark/sql/readwriter.py", line 306, in yt | |
File "/yt/hdd3/slots/1/sandbox/tmpfs/spark/python/lib/pyspark.zip/pyspark/sql/readwriter.py", line 162, in load | |
File "/yt/hdd3/slots/1/sandbox/tmpfs/spark/python/lib/py4j-0.10.9.5-src.zip/py4j/java_gateway.py", line 1321, in __call__ | |
File "/yt/hdd3/slots/1/sandbox/tmpfs/spark/python/lib/pyspark.zip/pyspark/sql/utils.py", line 111, in deco | |
File "/yt/hdd3/slots/1/sandbox/tmpfs/spark/python/lib/py4j-0.10.9.5-src.zip/py4j/protocol.py", line 326, in get_return_value | |
spyt.client.CachedPy4JError: An error occurred while calling o205.load. | |
: org.apache.spark.SparkException: Job aborted due to stage failure: Task 29 in stage 0.0 failed 4 times, most recent failure: Lost task 29.3 in stage 0.0 (TID 76) (eu-north1-c-2ct2-5a.yt.infra.nemax.nebiuscloud.net executor 1): java.lang.IllegalStateException: Listing dynamic tables on executors is not supported | |
at tech.ytsaurus.spyt.fs.YtTableFileSystem.listStatus(YtTableFileSystem.scala:38) | |
at org.apache.spark.sql.v2.YtInMemoryFileIndex$.listLeafFiles(YtInMemoryFileIndex.scala:353) | |
at org.apache.spark.sql.v2.YtInMemoryFileIndex$.$anonfun$bulkListLeafFiles$6(YtInMemoryFileIndex.scala:268) | |
at scala.collection.immutable.Stream.map(Stream.scala:418) | |
at org.apache.spark.sql.v2.YtInMemoryFileIndex$.$anonfun$bulkListLeafFiles$4(YtInMemoryFileIndex.scala:260) | |
at org.apache.spark.rdd.RDD.$anonfun$mapPartitions$2(RDD.scala:863) | |
at org.apache.spark.rdd.RDD.$anonfun$mapPartitions$2$adapted(RDD.scala:863) | |
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) | |
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373) | |
at org.apache.spark.rdd.RDD.iterator(RDD.scala:337) | |
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) | |
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373) | |
at org.apache.spark.rdd.RDD.iterator(RDD.scala:337) | |
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) | |
at org.apache.spark.scheduler.Task.run(Task.scala:131) | |
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506) | |
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1463) | |
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509) | |
at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128) | |
at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628) | |
at java.base/java.lang.Thread.run(Thread.java:829) | |
Driver stacktrace: | |
at org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2454) | |
at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2403) | |
at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2402) | |
at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62) | |
at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55) | |
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49) | |
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2402) | |
at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1160) | |
at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1160) | |
at scala.Option.foreach(Option.scala:407) | |
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1160) | |
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2642) | |
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2584) | |
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2573) | |
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49) | |
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:938) | |
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2214) | |
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2235) | |
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2254) | |
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2279) | |
at org.apache.spark.rdd.RDD.$anonfun$collect$1(RDD.scala:1030) | |
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) | |
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112) | |
at org.apache.spark.rdd.RDD.withScope(RDD.scala:414) | |
at org.apache.spark.rdd.RDD.collect(RDD.scala:1029) | |
at org.apache.spark.sql.v2.YtInMemoryFileIndex$.bulkListLeafFiles(YtInMemoryFileIndex.scala:271) | |
at org.apache.spark.sql.v2.YtInMemoryFileIndex$.listLeafFiles(YtInMemoryFileIndex.scala:391) | |
at org.apache.spark.sql.v2.YtInMemoryFileIndex$.$anonfun$bulkListLeafFiles$1(YtInMemoryFileIndex.scala:226) | |
at scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:286) | |
at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62) | |
at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55) | |
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49) | |
at scala.collection.TraversableLike.map(TraversableLike.scala:286) | |
at scala.collection.TraversableLike.map$(TraversableLike.scala:279) | |
at scala.collection.AbstractTraversable.map(Traversable.scala:108) | |
at org.apache.spark.sql.v2.YtInMemoryFileIndex$.bulkListLeafFiles(YtInMemoryFileIndex.scala:218) | |
at org.apache.spark.sql.v2.YtInMemoryFileIndex.listLeafFiles(YtInMemoryFileIndex.scala:120) | |
at org.apache.spark.sql.v2.YtInMemoryFileIndex.refresh0(YtInMemoryFileIndex.scala:82) | |
at org.apache.spark.sql.v2.YtInMemoryFileIndex.<init>(YtInMemoryFileIndex.scala:47) | |
at org.apache.spark.sql.v2.YtTable.fileIndex$lzycompute(YtTable.scala:31) | |
at org.apache.spark.sql.v2.YtTable.fileIndex(YtTable.scala:23) | |
at org.apache.spark.sql.v2.YtTable.fileIndex(YtTable.scala:15) | |
at org.apache.spark.sql.execution.datasources.v2.FileTable.$anonfun$dataSchema$4(FileTable.scala:71) | |
at scala.Option.orElse(Option.scala:447) | |
at org.apache.spark.sql.execution.datasources.v2.FileTable.dataSchema$lzycompute(FileTable.scala:71) | |
at org.apache.spark.sql.execution.datasources.v2.FileTable.dataSchema(FileTable.scala:65) | |
at org.apache.spark.sql.execution.datasources.v2.FileTable.schema$lzycompute(FileTable.scala:85) | |
at org.apache.spark.sql.execution.datasources.v2.FileTable.schema(FileTable.scala:83) | |
at org.apache.spark.sql.execution.datasources.v2.FileDataSourceV2.inferSchema(FileDataSourceV2.scala:94) | |
at org.apache.spark.sql.execution.datasources.v2.FileDataSourceV2.inferSchema$(FileDataSourceV2.scala:92) | |
at org.apache.spark.sql.v2.YtDataSourceV2.inferSchema(YtDataSourceV2.scala:13) | |
at org.apache.spark.sql.execution.datasources.v2.DataSourceV2Utils$.getTableFromProvider(DataSourceV2Utils.scala:81) | |
at org.apache.spark.sql.DataFrameReader.$anonfun$load$1(DataFrameReader.scala:233) | |
at scala.Option.map(Option.scala:230) | |
at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:210) | |
at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method) | |
at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) | |
at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) | |
at java.base/java.lang.reflect.Method.invoke(Method.java:566) | |
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244) | |
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357) | |
at py4j.Gateway.invoke(Gateway.java:282) | |
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132) | |
at py4j.commands.CallCommand.execute(CallCommand.java:79) | |
at py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182) | |
at py4j.ClientServerConnection.run(ClientServerConnection.java:106) | |
at java.base/java.lang.Thread.run(Thread.java:829) | |
Caused by: java.lang.IllegalStateException: Listing dynamic tables on executors is not supported | |
at tech.ytsaurus.spyt.fs.YtTableFileSystem.listStatus(YtTableFileSystem.scala:38) | |
at org.apache.spark.sql.v2.YtInMemoryFileIndex$.listLeafFiles(YtInMemoryFileIndex.scala:353) | |
at org.apache.spark.sql.v2.YtInMemoryFileIndex$.$anonfun$bulkListLeafFiles$6(YtInMemoryFileIndex.scala:268) | |
at scala.collection.immutable.Stream.map(Stream.scala:418) | |
at org.apache.spark.sql.v2.YtInMemoryFileIndex$.$anonfun$bulkListLeafFiles$4(YtInMemoryFileIndex.scala:260) | |
at org.apache.spark.rdd.RDD.$anonfun$mapPartitions$2(RDD.scala:863) | |
at org.apache.spark.rdd.RDD.$anonfun$mapPartitions$2$adapted(RDD.scala:863) | |
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) | |
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373) | |
at org.apache.spark.rdd.RDD.iterator(RDD.scala:337) | |
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) | |
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373) | |
at org.apache.spark.rdd.RDD.iterator(RDD.scala:337) | |
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) | |
at org.apache.spark.scheduler.Task.run(Task.scala:131) | |
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506) | |
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1463) | |
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509) | |
at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128) | |
at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628) | |
... 1 more | |
2023-12-11 09:12:42,847 - INFO - py4j.clientserver - Closing down clientserver connection |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment