Skip to content

Instantly share code, notes, and snippets.

@sujitpal
Created February 18, 2022 20:12
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sujitpal/edb9a205ad2b4a993be8b3d7aa4f1dcf to your computer and use it in GitHub Desktop.
Save sujitpal/edb9a205ad2b4a993be8b3d7aa4f1dcf to your computer and use it in GitHub Desktop.
org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 12.0 failed 4 times, most recent failure: Lost task 0.3 in stage 12.0 (TID 31) (10.0.230.73 executor 0): com.databricks.sql.io.FileReadException: Error while reading file dbfs:/mnt/els/labs/projects/sparknlp-cm3-test/inputs/sparknlp-input.tsv.
at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1$$anon$2.logFileNameAndThrow(FileScanRDD.scala:396)
at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1$$anon$2.getNext(FileScanRDD.scala:369)
at org.apache.spark.util.NextIterator.hasNext(NextIterator.scala:73)
at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.$anonfun$hasNext$1(FileScanRDD.scala:301)
at scala.runtime.java8.JFunction0$mcZ$sp.apply(JFunction0$mcZ$sp.java:23)
at com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)
at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:296)
at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source)
at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
at org.apache.spark.sql.execution.WholeStageCodegenExec$$anon$1.hasNext(WholeStageCodegenExec.scala:758)
at org.apache.spark.sql.execution.collect.UnsafeRowBatchUtils$.encodeUnsafeRows(UnsafeRowBatchUtils.scala:80)
at org.apache.spark.sql.execution.collect.Collector.$anonfun$processFunc$1(Collector.scala:155)
at org.apache.spark.scheduler.ResultTask.$anonfun$runTask$3(ResultTask.scala:75)
at com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)
at org.apache.spark.scheduler.ResultTask.$anonfun$runTask$1(ResultTask.scala:75)
at com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:55)
at org.apache.spark.scheduler.Task.doRunTask(Task.scala:153)
at org.apache.spark.scheduler.Task.$anonfun$run$1(Task.scala:122)
at com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)
at org.apache.spark.scheduler.Task.run(Task.scala:93)
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$13(Executor.scala:824)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1621)
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$4(Executor.scala:827)
at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
at com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:683)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Caused by: javax.net.ssl.SSLException: Connection reset; Request ID: 7K3FPMC72ER8N3XB, Extended Request ID: juXj2gH6UpuracEt2p3UjLm8SBK5sW0wEAEtWEiTSigyFYvu9s4he2FTGKt5YkJre430tAgrjuo=, Cloud Provider: AWS, Instance ID: i-058cb792d755ca2dd
at sun.security.ssl.Alert.createSSLException(Alert.java:127)
at sun.security.ssl.TransportContext.fatal(TransportContext.java:348)
at sun.security.ssl.TransportContext.fatal(TransportContext.java:291)
at sun.security.ssl.TransportContext.fatal(TransportContext.java:286)
at sun.security.ssl.SSLTransport.decode(SSLTransport.java:142)
at sun.security.ssl.SSLSocketImpl.decode(SSLSocketImpl.java:1421)
at sun.security.ssl.SSLSocketImpl.readApplicationRecord(SSLSocketImpl.java:1386)
at sun.security.ssl.SSLSocketImpl.access$300(SSLSocketImpl.java:73)
at sun.security.ssl.SSLSocketImpl$AppInputStream.read(SSLSocketImpl.java:973)
at org.apache.http.impl.io.SessionInputBufferImpl.streamRead(SessionInputBufferImpl.java:137)
at org.apache.http.impl.io.SessionInputBufferImpl.read(SessionInputBufferImpl.java:197)
at org.apache.http.impl.io.ContentLengthInputStream.read(ContentLengthInputStream.java:176)
at org.apache.http.conn.EofSensorInputStream.read(EofSensorInputStream.java:135)
at com.amazonaws.internal.SdkFilterInputStream.read(SdkFilterInputStream.java:90)
at com.amazonaws.event.ProgressInputStream.read(ProgressInputStream.java:180)
at com.amazonaws.internal.SdkFilterInputStream.read(SdkFilterInputStream.java:90)
at com.amazonaws.internal.SdkFilterInputStream.read(SdkFilterInputStream.java:90)
at com.amazonaws.internal.SdkFilterInputStream.read(SdkFilterInputStream.java:90)
at com.amazonaws.event.ProgressInputStream.read(ProgressInputStream.java:180)
at com.amazonaws.internal.SdkFilterInputStream.read(SdkFilterInputStream.java:90)
at com.amazonaws.util.LengthCheckInputStream.read(LengthCheckInputStream.java:107)
at com.amazonaws.internal.SdkFilterInputStream.read(SdkFilterInputStream.java:90)
at com.amazonaws.services.s3.internal.S3AbortableInputStream.read(S3AbortableInputStream.java:125)
at com.amazonaws.internal.SdkFilterInputStream.read(SdkFilterInputStream.java:90)
at com.amazonaws.internal.SdkFilterInputStream.read(SdkFilterInputStream.java:90)
at com.databricks.s3a.aws.proxy.DatabricksS3ObjectInputStream.read(DatabricksS3ProxyUtils.scala:158)
at com.databricks.s3a.S3AInputStream.read(S3AInputStream.java:247)
at com.databricks.s3a.DatabricksS3AInputStream.super$read(DatabricksStreamUtils.scala:45)
at com.databricks.s3a.DatabricksS3AInputStream.$anonfun$read$2(DatabricksStreamUtils.scala:45)
at scala.runtime.java8.JFunction0$mcI$sp.apply(JFunction0$mcI$sp.java:23)
at com.databricks.s3a.S3AExceptionUtils$.convertAWSExceptionToJavaIOException(DatabricksStreamUtils.scala:66)
at com.databricks.s3a.DatabricksS3AInputStream.read(DatabricksStreamUtils.scala:45)
at java.io.DataInputStream.read(DataInputStream.java:149)
at com.databricks.spark.metrics.FSInputStreamWithMetrics.$anonfun$read$3(FileSystemWithMetrics.scala:218)
at scala.runtime.java8.JFunction0$mcI$sp.apply(JFunction0$mcI$sp.java:23)
at com.databricks.spark.metrics.ExtendedTaskIOMetrics.withTimeMetric(FileSystemWithMetrics.scala:155)
at com.databricks.spark.metrics.ExtendedTaskIOMetrics.withTimeAndBytesMetric(FileSystemWithMetrics.scala:175)
at com.databricks.spark.metrics.ExtendedTaskIOMetrics.$anonfun$withTimeAndBytesReadMetric$1(FileSystemWithMetrics.scala:189)
at scala.runtime.java8.JFunction0$mcI$sp.apply(JFunction0$mcI$sp.java:23)
at com.databricks.spark.metrics.SamplerWithPeriod.sample(FileSystemWithMetrics.scala:82)
at com.databricks.spark.metrics.ExtendedTaskIOMetrics.withTimeAndBytesReadMetric(FileSystemWithMetrics.scala:189)
at com.databricks.spark.metrics.ExtendedTaskIOMetrics.withTimeAndBytesReadMetric$(FileSystemWithMetrics.scala:188)
at com.databricks.spark.metrics.FSInputStreamWithMetrics.withTimeAndBytesReadMetric(FileSystemWithMetrics.scala:196)
at com.databricks.spark.metrics.FSInputStreamWithMetrics.read(FileSystemWithMetrics.scala:218)
at java.io.DataInputStream.read(DataInputStream.java:149)
at org.apache.hadoop.mapreduce.lib.input.UncompressedSplitLineReader.fillBuffer(UncompressedSplitLineReader.java:62)
at org.apache.hadoop.util.LineReader.readDefaultLine(LineReader.java:227)
at org.apache.hadoop.util.LineReader.readLine(LineReader.java:185)
at org.apache.hadoop.mapreduce.lib.input.UncompressedSplitLineReader.readLine(UncompressedSplitLineReader.java:94)
at org.apache.hadoop.mapreduce.lib.input.LineRecordReader.nextKeyValue(LineRecordReader.java:200)
at org.apache.spark.sql.execution.datasources.RecordReaderIterator.hasNext(RecordReaderIterator.scala:41)
at org.apache.spark.sql.execution.datasources.HadoopFileLinesReader.hasNext(HadoopFileLinesReader.scala:69)
at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)
at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:513)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:491)
at org.apache.spark.util.CompletionIterator.hasNext(CompletionIterator.scala:31)
at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)
at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1$$anon$2.getNext(FileScanRDD.scala:334)
... 29 more
Suppressed: java.net.SocketException: Broken pipe (Write failed)
at java.net.SocketOutputStream.socketWrite0(Native Method)
at java.net.SocketOutputStream.socketWrite(SocketOutputStream.java:111)
at java.net.SocketOutputStream.write(SocketOutputStream.java:155)
at sun.security.ssl.SSLSocketOutputRecord.encodeAlert(SSLSocketOutputRecord.java:81)
at sun.security.ssl.TransportContext.fatal(TransportContext.java:379)
... 85 more
Caused by: java.net.SocketException: Connection reset
at java.net.SocketInputStream.read(SocketInputStream.java:210)
at java.net.SocketInputStream.read(SocketInputStream.java:141)
at sun.security.ssl.SSLSocketInputRecord.read(SSLSocketInputRecord.java:476)
at sun.security.ssl.SSLSocketInputRecord.readFully(SSLSocketInputRecord.java:459)
at sun.security.ssl.SSLSocketInputRecord.decodeInputRecord(SSLSocketInputRecord.java:243)
at sun.security.ssl.SSLSocketInputRecord.decode(SSLSocketInputRecord.java:181)
at sun.security.ssl.SSLTransport.decode(SSLTransport.java:110)
... 82 more
Driver stacktrace:
at org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2973)
at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2920)
at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2914)
at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2914)
at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1334)
at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1334)
at scala.Option.foreach(Option.scala:407)
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1334)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:3182)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:3123)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:3111)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:1096)
at org.apache.spark.SparkContext.runJobInternal(SparkContext.scala:2494)
at org.apache.spark.sql.execution.collect.Collector.runSparkJobs(Collector.scala:266)
at org.apache.spark.sql.execution.collect.Collector.collect(Collector.scala:276)
at org.apache.spark.sql.execution.collect.Collector$.collect(Collector.scala:81)
at org.apache.spark.sql.execution.collect.Collector$.collect(Collector.scala:87)
at org.apache.spark.sql.execution.collect.InternalRowFormat$.collect(cachedSparkResults.scala:75)
at org.apache.spark.sql.execution.collect.InternalRowFormat$.collect(cachedSparkResults.scala:62)
at org.apache.spark.sql.execution.ResultCacheManager.collectResult$1(ResultCacheManager.scala:543)
at org.apache.spark.sql.execution.ResultCacheManager.computeResult(ResultCacheManager.scala:552)
at org.apache.spark.sql.execution.ResultCacheManager.$anonfun$getOrComputeResultInternal$1(ResultCacheManager.scala:498)
at scala.Option.getOrElse(Option.scala:189)
at org.apache.spark.sql.execution.ResultCacheManager.getOrComputeResultInternal(ResultCacheManager.scala:497)
at org.apache.spark.sql.execution.ResultCacheManager.getOrComputeResult(ResultCacheManager.scala:394)
at org.apache.spark.sql.execution.ResultCacheManager.getOrComputeResult(ResultCacheManager.scala:373)
at org.apache.spark.sql.execution.SparkPlan.executeCollectResult(SparkPlan.scala:408)
at org.apache.spark.sql.Dataset.collectResult(Dataset.scala:3102)
at org.apache.spark.sql.Dataset.$anonfun$collectResult$1(Dataset.scala:3093)
at org.apache.spark.sql.Dataset.$anonfun$withAction$1(Dataset.scala:3900)
at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withCustomExecutionEnv$5(SQLExecution.scala:172)
at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:319)
at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withCustomExecutionEnv$1(SQLExecution.scala:146)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:854)
at org.apache.spark.sql.execution.SQLExecution$.withCustomExecutionEnv(SQLExecution.scala:113)
at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:269)
at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3898)
at org.apache.spark.sql.Dataset.collectResult(Dataset.scala:3092)
at com.databricks.backend.daemon.driver.OutputAggregator$.withOutputAggregation0(OutputAggregator.scala:268)
at com.databricks.backend.daemon.driver.OutputAggregator$.withOutputAggregation(OutputAggregator.scala:102)
at com.databricks.backend.daemon.driver.PythonDriverLocalBase.generateTableResult(PythonDriverLocalBase.scala:586)
at com.databricks.backend.daemon.driver.PythonDriverLocal.computeListResultsItem(PythonDriverLocal.scala:619)
at com.databricks.backend.daemon.driver.PythonDriverLocalBase.genListResults(PythonDriverLocalBase.scala:493)
at com.databricks.backend.daemon.driver.PythonDriverLocal.$anonfun$getResultBufferInternal$1(PythonDriverLocal.scala:674)
at com.databricks.backend.daemon.driver.PythonDriverLocal.withInterpLock(PythonDriverLocal.scala:555)
at com.databricks.backend.daemon.driver.PythonDriverLocal.getResultBufferInternal(PythonDriverLocal.scala:634)
at com.databricks.backend.daemon.driver.DriverLocal.getResultBuffer(DriverLocal.scala:658)
at com.databricks.backend.daemon.driver.PythonDriverLocal.outputSuccess(PythonDriverLocal.scala:597)
at com.databricks.backend.daemon.driver.PythonDriverLocal.$anonfun$repl$6(PythonDriverLocal.scala:222)
at com.databricks.backend.daemon.driver.PythonDriverLocal.withInterpLock(PythonDriverLocal.scala:555)
at com.databricks.backend.daemon.driver.PythonDriverLocal.repl(PythonDriverLocal.scala:209)
at com.databricks.backend.daemon.driver.DriverLocal.$anonfun$execute$11(DriverLocal.scala:549)
at com.databricks.logging.UsageLogging.$anonfun$withAttributionContext$1(UsageLogging.scala:215)
at scala.util.DynamicVariable.withValue(DynamicVariable.scala:62)
at com.databricks.logging.UsageLogging.withAttributionContext(UsageLogging.scala:213)
at com.databricks.logging.UsageLogging.withAttributionContext$(UsageLogging.scala:210)
at com.databricks.backend.daemon.driver.DriverLocal.withAttributionContext(DriverLocal.scala:50)
at com.databricks.logging.UsageLogging.withAttributionTags(UsageLogging.scala:251)
at com.databricks.logging.UsageLogging.withAttributionTags$(UsageLogging.scala:243)
at com.databricks.backend.daemon.driver.DriverLocal.withAttributionTags(DriverLocal.scala:50)
at com.databricks.backend.daemon.driver.DriverLocal.execute(DriverLocal.scala:526)
at com.databricks.backend.daemon.driver.DriverWrapper.$anonfun$tryExecutingCommand$1(DriverWrapper.scala:693)
at scala.util.Try$.apply(Try.scala:213)
at com.databricks.backend.daemon.driver.DriverWrapper.tryExecutingCommand(DriverWrapper.scala:685)
at com.databricks.backend.daemon.driver.DriverWrapper.getCommandOutputAndError(DriverWrapper.scala:526)
at com.databricks.backend.daemon.driver.DriverWrapper.executeCommand(DriverWrapper.scala:638)
at com.databricks.backend.daemon.driver.DriverWrapper.runInnerLoop(DriverWrapper.scala:431)
at com.databricks.backend.daemon.driver.DriverWrapper.runInner(DriverWrapper.scala:374)
at com.databricks.backend.daemon.driver.DriverWrapper.run(DriverWrapper.scala:225)
at java.lang.Thread.run(Thread.java:748)
Caused by: com.databricks.sql.io.FileReadException: Error while reading file dbfs:/mnt/els/labs/projects/sparknlp-cm3-test/inputs/sparknlp-input.tsv.
at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1$$anon$2.logFileNameAndThrow(FileScanRDD.scala:396)
at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1$$anon$2.getNext(FileScanRDD.scala:369)
at org.apache.spark.util.NextIterator.hasNext(NextIterator.scala:73)
at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.$anonfun$hasNext$1(FileScanRDD.scala:301)
at scala.runtime.java8.JFunction0$mcZ$sp.apply(JFunction0$mcZ$sp.java:23)
at com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)
at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:296)
at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source)
at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
at org.apache.spark.sql.execution.WholeStageCodegenExec$$anon$1.hasNext(WholeStageCodegenExec.scala:758)
at org.apache.spark.sql.execution.collect.UnsafeRowBatchUtils$.encodeUnsafeRows(UnsafeRowBatchUtils.scala:80)
at org.apache.spark.sql.execution.collect.Collector.$anonfun$processFunc$1(Collector.scala:155)
at org.apache.spark.scheduler.ResultTask.$anonfun$runTask$3(ResultTask.scala:75)
at com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)
at org.apache.spark.scheduler.ResultTask.$anonfun$runTask$1(ResultTask.scala:75)
at com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:55)
at org.apache.spark.scheduler.Task.doRunTask(Task.scala:153)
at org.apache.spark.scheduler.Task.$anonfun$run$1(Task.scala:122)
at com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)
at org.apache.spark.scheduler.Task.run(Task.scala:93)
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$13(Executor.scala:824)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1621)
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$4(Executor.scala:827)
at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
at com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:683)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
... 1 more
Caused by: javax.net.ssl.SSLException: Connection reset; Request ID: 7K3FPMC72ER8N3XB, Extended Request ID: juXj2gH6UpuracEt2p3UjLm8SBK5sW0wEAEtWEiTSigyFYvu9s4he2FTGKt5YkJre430tAgrjuo=, Cloud Provider: AWS, Instance ID: i-058cb792d755ca2dd
at sun.security.ssl.Alert.createSSLException(Alert.java:127)
at sun.security.ssl.TransportContext.fatal(TransportContext.java:348)
at sun.security.ssl.TransportContext.fatal(TransportContext.java:291)
at sun.security.ssl.TransportContext.fatal(TransportContext.java:286)
at sun.security.ssl.SSLTransport.decode(SSLTransport.java:142)
at sun.security.ssl.SSLSocketImpl.decode(SSLSocketImpl.java:1421)
at sun.security.ssl.SSLSocketImpl.readApplicationRecord(SSLSocketImpl.java:1386)
at sun.security.ssl.SSLSocketImpl.access$300(SSLSocketImpl.java:73)
at sun.security.ssl.SSLSocketImpl$AppInputStream.read(SSLSocketImpl.java:973)
at org.apache.http.impl.io.SessionInputBufferImpl.streamRead(SessionInputBufferImpl.java:137)
at org.apache.http.impl.io.SessionInputBufferImpl.read(SessionInputBufferImpl.java:197)
at org.apache.http.impl.io.ContentLengthInputStream.read(ContentLengthInputStream.java:176)
at org.apache.http.conn.EofSensorInputStream.read(EofSensorInputStream.java:135)
at com.amazonaws.internal.SdkFilterInputStream.read(SdkFilterInputStream.java:90)
at com.amazonaws.event.ProgressInputStream.read(ProgressInputStream.java:180)
at com.amazonaws.internal.SdkFilterInputStream.read(SdkFilterInputStream.java:90)
at com.amazonaws.internal.SdkFilterInputStream.read(SdkFilterInputStream.java:90)
at com.amazonaws.internal.SdkFilterInputStream.read(SdkFilterInputStream.java:90)
at com.amazonaws.event.ProgressInputStream.read(ProgressInputStream.java:180)
at com.amazonaws.internal.SdkFilterInputStream.read(SdkFilterInputStream.java:90)
at com.amazonaws.util.LengthCheckInputStream.read(LengthCheckInputStream.java:107)
at com.amazonaws.internal.SdkFilterInputStream.read(SdkFilterInputStream.java:90)
at com.amazonaws.services.s3.internal.S3AbortableInputStream.read(S3AbortableInputStream.java:125)
at com.amazonaws.internal.SdkFilterInputStream.read(SdkFilterInputStream.java:90)
at com.amazonaws.internal.SdkFilterInputStream.read(SdkFilterInputStream.java:90)
at com.databricks.s3a.aws.proxy.DatabricksS3ObjectInputStream.read(DatabricksS3ProxyUtils.scala:158)
at com.databricks.s3a.S3AInputStream.read(S3AInputStream.java:247)
at com.databricks.s3a.DatabricksS3AInputStream.super$read(DatabricksStreamUtils.scala:45)
at com.databricks.s3a.DatabricksS3AInputStream.$anonfun$read$2(DatabricksStreamUtils.scala:45)
at scala.runtime.java8.JFunction0$mcI$sp.apply(JFunction0$mcI$sp.java:23)
at com.databricks.s3a.S3AExceptionUtils$.convertAWSExceptionToJavaIOException(DatabricksStreamUtils.scala:66)
at com.databricks.s3a.DatabricksS3AInputStream.read(DatabricksStreamUtils.scala:45)
at java.io.DataInputStream.read(DataInputStream.java:149)
at com.databricks.spark.metrics.FSInputStreamWithMetrics.$anonfun$read$3(FileSystemWithMetrics.scala:218)
at scala.runtime.java8.JFunction0$mcI$sp.apply(JFunction0$mcI$sp.java:23)
at com.databricks.spark.metrics.ExtendedTaskIOMetrics.withTimeMetric(FileSystemWithMetrics.scala:155)
at com.databricks.spark.metrics.ExtendedTaskIOMetrics.withTimeAndBytesMetric(FileSystemWithMetrics.scala:175)
at com.databricks.spark.metrics.ExtendedTaskIOMetrics.$anonfun$withTimeAndBytesReadMetric$1(FileSystemWithMetrics.scala:189)
at scala.runtime.java8.JFunction0$mcI$sp.apply(JFunction0$mcI$sp.java:23)
at com.databricks.spark.metrics.SamplerWithPeriod.sample(FileSystemWithMetrics.scala:82)
at com.databricks.spark.metrics.ExtendedTaskIOMetrics.withTimeAndBytesReadMetric(FileSystemWithMetrics.scala:189)
at com.databricks.spark.metrics.ExtendedTaskIOMetrics.withTimeAndBytesReadMetric$(FileSystemWithMetrics.scala:188)
at com.databricks.spark.metrics.FSInputStreamWithMetrics.withTimeAndBytesReadMetric(FileSystemWithMetrics.scala:196)
at com.databricks.spark.metrics.FSInputStreamWithMetrics.read(FileSystemWithMetrics.scala:218)
at java.io.DataInputStream.read(DataInputStream.java:149)
at org.apache.hadoop.mapreduce.lib.input.UncompressedSplitLineReader.fillBuffer(UncompressedSplitLineReader.java:62)
at org.apache.hadoop.util.LineReader.readDefaultLine(LineReader.java:227)
at org.apache.hadoop.util.LineReader.readLine(LineReader.java:185)
at org.apache.hadoop.mapreduce.lib.input.UncompressedSplitLineReader.readLine(UncompressedSplitLineReader.java:94)
at org.apache.hadoop.mapreduce.lib.input.LineRecordReader.nextKeyValue(LineRecordReader.java:200)
at org.apache.spark.sql.execution.datasources.RecordReaderIterator.hasNext(RecordReaderIterator.scala:41)
at org.apache.spark.sql.execution.datasources.HadoopFileLinesReader.hasNext(HadoopFileLinesReader.scala:69)
at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)
at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:513)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:491)
at org.apache.spark.util.CompletionIterator.hasNext(CompletionIterator.scala:31)
at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)
at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1$$anon$2.getNext(FileScanRDD.scala:334)
... 29 more
Suppressed: java.net.SocketException: Broken pipe (Write failed)
at java.net.SocketOutputStream.socketWrite0(Native Method)
at java.net.SocketOutputStream.socketWrite(SocketOutputStream.java:111)
at java.net.SocketOutputStream.write(SocketOutputStream.java:155)
at sun.security.ssl.SSLSocketOutputRecord.encodeAlert(SSLSocketOutputRecord.java:81)
at sun.security.ssl.TransportContext.fatal(TransportContext.java:379)
... 85 more
Caused by: java.net.SocketException: Connection reset
at java.net.SocketInputStream.read(SocketInputStream.java:210)
at java.net.SocketInputStream.read(SocketInputStream.java:141)
at sun.security.ssl.SSLSocketInputRecord.read(SSLSocketInputRecord.java:476)
at sun.security.ssl.SSLSocketInputRecord.readFully(SSLSocketInputRecord.java:459)
at sun.security.ssl.SSLSocketInputRecord.decodeInputRecord(SSLSocketInputRecord.java:243)
at sun.security.ssl.SSLSocketInputRecord.decode(SSLSocketInputRecord.java:181)
at sun.security.ssl.SSLTransport.decode(SSLTransport.java:110)
... 82 more
Comm
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment