Created
December 1, 2017 09:41
-
-
Save jerdeb/c30f65dc632fb997af289dac4d40c743 to your computer and use it in GitHub Desktop.
Spark (PySpark) File Already Exists Exception - Exception Code (https://stackoverflow.com/questions/47582853/spark-pyspark-file-already-exists-exception)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
org.apache.hadoop.fs.FileAlreadyExistsException: File already exists: file:/Users/jeremy/Desktop/so/_temporary/0/_temporary/attempt_20171201084120_0003_m_000002_0/context=<http%3A%2F%2Fdig.csail.mit.edu%2F2007%2Fwiki%2Ftabulator>/part-00002-7adb363e-a136-4666-94d6-dba0f6a3dfd2.c000.txt | |
at org.apache.hadoop.fs.RawLocalFileSystem.create(RawLocalFileSystem.java:289) | |
at org.apache.hadoop.fs.RawLocalFileSystem.create(RawLocalFileSystem.java:328) | |
at org.apache.hadoop.fs.ChecksumFileSystem$ChecksumFSOutputSummer.<init>(ChecksumFileSystem.java:398) | |
at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:461) | |
at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:440) | |
at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:911) | |
at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:892) | |
at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:789) | |
at org.apache.spark.sql.execution.datasources.CodecStreams$.createOutputStream(CodecStreams.scala:81) | |
at org.apache.spark.sql.execution.datasources.text.TextOutputWriter.<init>(TextFileFormat.scala:134) | |
at org.apache.spark.sql.execution.datasources.text.TextFileFormat$$anon$1.newInstance(TextFileFormat.scala:80) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$DynamicPartitionWriteTask.org$apache$spark$sql$execution$datasources$FileFormatWriter$DynamicPartitionWriteTask$$newOutputWriter(FileFormatWriter.scala:418) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$DynamicPartitionWriteTask$$anonfun$execute$2.apply(FileFormatWriter.scala:451) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$DynamicPartitionWriteTask$$anonfun$execute$2.apply(FileFormatWriter.scala:440) | |
at scala.collection.Iterator$class.foreach(Iterator.scala:893) | |
at org.apache.spark.sql.catalyst.util.AbstractScalaRowIterator.foreach(AbstractScalaRowIterator.scala:26) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$DynamicPartitionWriteTask.execute(FileFormatWriter.scala:440) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask$3.apply(FileFormatWriter.scala:258) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask$3.apply(FileFormatWriter.scala:256) | |
at org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1375) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$.org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask(FileFormatWriter.scala:261) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1$$anonfun$apply$mcV$sp$1.apply(FileFormatWriter.scala:191) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1$$anonfun$apply$mcV$sp$1.apply(FileFormatWriter.scala:190) | |
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87) | |
at org.apache.spark.scheduler.Task.run(Task.scala:108) | |
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:335) | |
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) | |
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) | |
at java.lang.Thread.run(Thread.java:748) | |
17/12/01 08:43:08 ERROR FileFormatWriter: Job job_20171201084120_0003 aborted. | |
17/12/01 08:43:08 ERROR Executor: Exception in task 2.0 in stage 3.0 (TID 9) | |
org.apache.spark.SparkException: Task failed while writing rows | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$.org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask(FileFormatWriter.scala:272) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1$$anonfun$apply$mcV$sp$1.apply(FileFormatWriter.scala:191) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1$$anonfun$apply$mcV$sp$1.apply(FileFormatWriter.scala:190) | |
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87) | |
at org.apache.spark.scheduler.Task.run(Task.scala:108) | |
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:335) | |
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) | |
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) | |
at java.lang.Thread.run(Thread.java:748) | |
Caused by: org.apache.hadoop.fs.FileAlreadyExistsException: File already exists: file:/Users/jeremy/Desktop/so/_temporary/0/_temporary/attempt_20171201084120_0003_m_000002_0/context=<http%3A%2F%2Fdig.csail.mit.edu%2F2007%2Fwiki%2Ftabulator>/part-00002-7adb363e-a136-4666-94d6-dba0f6a3dfd2.c000.txt | |
at org.apache.hadoop.fs.RawLocalFileSystem.create(RawLocalFileSystem.java:289) | |
at org.apache.hadoop.fs.RawLocalFileSystem.create(RawLocalFileSystem.java:328) | |
at org.apache.hadoop.fs.ChecksumFileSystem$ChecksumFSOutputSummer.<init>(ChecksumFileSystem.java:398) | |
at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:461) | |
at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:440) | |
at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:911) | |
at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:892) | |
at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:789) | |
at org.apache.spark.sql.execution.datasources.CodecStreams$.createOutputStream(CodecStreams.scala:81) | |
at org.apache.spark.sql.execution.datasources.text.TextOutputWriter.<init>(TextFileFormat.scala:134) | |
at org.apache.spark.sql.execution.datasources.text.TextFileFormat$$anon$1.newInstance(TextFileFormat.scala:80) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$DynamicPartitionWriteTask.org$apache$spark$sql$execution$datasources$FileFormatWriter$DynamicPartitionWriteTask$$newOutputWriter(FileFormatWriter.scala:418) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$DynamicPartitionWriteTask$$anonfun$execute$2.apply(FileFormatWriter.scala:451) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$DynamicPartitionWriteTask$$anonfun$execute$2.apply(FileFormatWriter.scala:440) | |
at scala.collection.Iterator$class.foreach(Iterator.scala:893) | |
at org.apache.spark.sql.catalyst.util.AbstractScalaRowIterator.foreach(AbstractScalaRowIterator.scala:26) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$DynamicPartitionWriteTask.execute(FileFormatWriter.scala:440) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask$3.apply(FileFormatWriter.scala:258) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask$3.apply(FileFormatWriter.scala:256) | |
at org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1375) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$.org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask(FileFormatWriter.scala:261) | |
... 8 more | |
17/12/01 08:43:08 WARN TaskSetManager: Lost task 2.0 in stage 3.0 (TID 9, localhost, executor driver): org.apache.spark.SparkException: Task failed while writing rows | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$.org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask(FileFormatWriter.scala:272) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1$$anonfun$apply$mcV$sp$1.apply(FileFormatWriter.scala:191) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1$$anonfun$apply$mcV$sp$1.apply(FileFormatWriter.scala:190) | |
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87) | |
at org.apache.spark.scheduler.Task.run(Task.scala:108) | |
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:335) | |
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) | |
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) | |
at java.lang.Thread.run(Thread.java:748) | |
Caused by: org.apache.hadoop.fs.FileAlreadyExistsException: File already exists: file:/Users/jeremy/Desktop/so/_temporary/0/_temporary/attempt_20171201084120_0003_m_000002_0/context=<http%3A%2F%2Fdig.csail.mit.edu%2F2007%2Fwiki%2Ftabulator>/part-00002-7adb363e-a136-4666-94d6-dba0f6a3dfd2.c000.txt | |
at org.apache.hadoop.fs.RawLocalFileSystem.create(RawLocalFileSystem.java:289) | |
at org.apache.hadoop.fs.RawLocalFileSystem.create(RawLocalFileSystem.java:328) | |
at org.apache.hadoop.fs.ChecksumFileSystem$ChecksumFSOutputSummer.<init>(ChecksumFileSystem.java:398) | |
at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:461) | |
at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:440) | |
at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:911) | |
at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:892) | |
at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:789) | |
at org.apache.spark.sql.execution.datasources.CodecStreams$.createOutputStream(CodecStreams.scala:81) | |
at org.apache.spark.sql.execution.datasources.text.TextOutputWriter.<init>(TextFileFormat.scala:134) | |
at org.apache.spark.sql.execution.datasources.text.TextFileFormat$$anon$1.newInstance(TextFileFormat.scala:80) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$DynamicPartitionWriteTask.org$apache$spark$sql$execution$datasources$FileFormatWriter$DynamicPartitionWriteTask$$newOutputWriter(FileFormatWriter.scala:418) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$DynamicPartitionWriteTask$$anonfun$execute$2.apply(FileFormatWriter.scala:451) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$DynamicPartitionWriteTask$$anonfun$execute$2.apply(FileFormatWriter.scala:440) | |
at scala.collection.Iterator$class.foreach(Iterator.scala:893) | |
at org.apache.spark.sql.catalyst.util.AbstractScalaRowIterator.foreach(AbstractScalaRowIterator.scala:26) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$DynamicPartitionWriteTask.execute(FileFormatWriter.scala:440) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask$3.apply(FileFormatWriter.scala:258) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask$3.apply(FileFormatWriter.scala:256) | |
at org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1375) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$.org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask(FileFormatWriter.scala:261) | |
... 8 more | |
17/12/01 08:43:08 ERROR TaskSetManager: Task 2 in stage 3.0 failed 1 times; aborting job | |
17/12/01 08:43:08 ERROR FileFormatWriter: Aborting job null. | |
org.apache.spark.SparkException: Job aborted due to stage failure: Task 2 in stage 3.0 failed 1 times, most recent failure: Lost task 2.0 in stage 3.0 (TID 9, localhost, executor driver): org.apache.spark.SparkException: Task failed while writing rows | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$.org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask(FileFormatWriter.scala:272) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1$$anonfun$apply$mcV$sp$1.apply(FileFormatWriter.scala:191) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1$$anonfun$apply$mcV$sp$1.apply(FileFormatWriter.scala:190) | |
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87) | |
at org.apache.spark.scheduler.Task.run(Task.scala:108) | |
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:335) | |
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) | |
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) | |
at java.lang.Thread.run(Thread.java:748) | |
Caused by: org.apache.hadoop.fs.FileAlreadyExistsException: File already exists: file:/Users/jeremy/Desktop/so/_temporary/0/_temporary/attempt_20171201084120_0003_m_000002_0/context=<http%3A%2F%2Fdig.csail.mit.edu%2F2007%2Fwiki%2Ftabulator>/part-00002-7adb363e-a136-4666-94d6-dba0f6a3dfd2.c000.txt | |
at org.apache.hadoop.fs.RawLocalFileSystem.create(RawLocalFileSystem.java:289) | |
at org.apache.hadoop.fs.RawLocalFileSystem.create(RawLocalFileSystem.java:328) | |
at org.apache.hadoop.fs.ChecksumFileSystem$ChecksumFSOutputSummer.<init>(ChecksumFileSystem.java:398) | |
at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:461) | |
at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:440) | |
at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:911) | |
at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:892) | |
at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:789) | |
at org.apache.spark.sql.execution.datasources.CodecStreams$.createOutputStream(CodecStreams.scala:81) | |
at org.apache.spark.sql.execution.datasources.text.TextOutputWriter.<init>(TextFileFormat.scala:134) | |
at org.apache.spark.sql.execution.datasources.text.TextFileFormat$$anon$1.newInstance(TextFileFormat.scala:80) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$DynamicPartitionWriteTask.org$apache$spark$sql$execution$datasources$FileFormatWriter$DynamicPartitionWriteTask$$newOutputWriter(FileFormatWriter.scala:418) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$DynamicPartitionWriteTask$$anonfun$execute$2.apply(FileFormatWriter.scala:451) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$DynamicPartitionWriteTask$$anonfun$execute$2.apply(FileFormatWriter.scala:440) | |
at scala.collection.Iterator$class.foreach(Iterator.scala:893) | |
at org.apache.spark.sql.catalyst.util.AbstractScalaRowIterator.foreach(AbstractScalaRowIterator.scala:26) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$DynamicPartitionWriteTask.execute(FileFormatWriter.scala:440) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask$3.apply(FileFormatWriter.scala:258) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask$3.apply(FileFormatWriter.scala:256) | |
at org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1375) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$.org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask(FileFormatWriter.scala:261) | |
... 8 more | |
Driver stacktrace: | |
at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1499) | |
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1487) | |
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1486) | |
at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) | |
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48) | |
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1486) | |
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:814) | |
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:814) | |
at scala.Option.foreach(Option.scala:257) | |
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:814) | |
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1714) | |
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1669) | |
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1658) | |
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48) | |
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:630) | |
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2022) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1.apply$mcV$sp(FileFormatWriter.scala:188) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1.apply(FileFormatWriter.scala:173) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1.apply(FileFormatWriter.scala:173) | |
at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:65) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:173) | |
at org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand.run(InsertIntoHadoopFsRelationCommand.scala:145) | |
at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:58) | |
at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:56) | |
at org.apache.spark.sql.execution.command.ExecutedCommandExec.doExecute(commands.scala:74) | |
at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:117) | |
at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:117) | |
at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:138) | |
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) | |
at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:135) | |
at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:116) | |
at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:92) | |
at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:92) | |
at org.apache.spark.sql.execution.datasources.DataSource.writeInFileFormat(DataSource.scala:438) | |
at org.apache.spark.sql.execution.datasources.DataSource.write(DataSource.scala:474) | |
at org.apache.spark.sql.execution.datasources.SaveIntoDataSourceCommand.run(SaveIntoDataSourceCommand.scala:48) | |
at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:58) | |
at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:56) | |
at org.apache.spark.sql.execution.command.ExecutedCommandExec.doExecute(commands.scala:74) | |
at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:117) | |
at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:117) | |
at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:138) | |
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) | |
at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:135) | |
at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:116) | |
at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:92) | |
at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:92) | |
at org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:610) | |
at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:233) | |
at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:217) | |
at org.apache.spark.sql.DataFrameWriter.text(DataFrameWriter.scala:555) | |
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) | |
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) | |
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) | |
at java.lang.reflect.Method.invoke(Method.java:498) | |
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244) | |
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357) | |
at py4j.Gateway.invoke(Gateway.java:280) | |
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132) | |
at py4j.commands.CallCommand.execute(CallCommand.java:79) | |
at py4j.GatewayConnection.run(GatewayConnection.java:214) | |
at java.lang.Thread.run(Thread.java:748) | |
Caused by: org.apache.spark.SparkException: Task failed while writing rows | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$.org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask(FileFormatWriter.scala:272) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1$$anonfun$apply$mcV$sp$1.apply(FileFormatWriter.scala:191) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1$$anonfun$apply$mcV$sp$1.apply(FileFormatWriter.scala:190) | |
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87) | |
at org.apache.spark.scheduler.Task.run(Task.scala:108) | |
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:335) | |
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) | |
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) | |
... 1 more | |
Caused by: org.apache.hadoop.fs.FileAlreadyExistsException: File already exists: file:/Users/jeremy/Desktop/so/_temporary/0/_temporary/attempt_20171201084120_0003_m_000002_0/context=<http%3A%2F%2Fdig.csail.mit.edu%2F2007%2Fwiki%2Ftabulator>/part-00002-7adb363e-a136-4666-94d6-dba0f6a3dfd2.c000.txt | |
at org.apache.hadoop.fs.RawLocalFileSystem.create(RawLocalFileSystem.java:289) | |
at org.apache.hadoop.fs.RawLocalFileSystem.create(RawLocalFileSystem.java:328) | |
at org.apache.hadoop.fs.ChecksumFileSystem$ChecksumFSOutputSummer.<init>(ChecksumFileSystem.java:398) | |
at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:461) | |
at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:440) | |
at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:911) | |
at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:892) | |
at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:789) | |
at org.apache.spark.sql.execution.datasources.CodecStreams$.createOutputStream(CodecStreams.scala:81) | |
at org.apache.spark.sql.execution.datasources.text.TextOutputWriter.<init>(TextFileFormat.scala:134) | |
at org.apache.spark.sql.execution.datasources.text.TextFileFormat$$anon$1.newInstance(TextFileFormat.scala:80) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$DynamicPartitionWriteTask.org$apache$spark$sql$execution$datasources$FileFormatWriter$DynamicPartitionWriteTask$$newOutputWriter(FileFormatWriter.scala:418) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$DynamicPartitionWriteTask$$anonfun$execute$2.apply(FileFormatWriter.scala:451) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$DynamicPartitionWriteTask$$anonfun$execute$2.apply(FileFormatWriter.scala:440) | |
at scala.collection.Iterator$class.foreach(Iterator.scala:893) | |
at org.apache.spark.sql.catalyst.util.AbstractScalaRowIterator.foreach(AbstractScalaRowIterator.scala:26) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$DynamicPartitionWriteTask.execute(FileFormatWriter.scala:440) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask$3.apply(FileFormatWriter.scala:258) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask$3.apply(FileFormatWriter.scala:256) | |
at org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1375) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$.org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask(FileFormatWriter.scala:261) | |
... 8 more | |
17/12/01 08:43:08 ERROR Utils: Aborting task | |
org.apache.spark.TaskKilledException | |
at org.apache.spark.TaskContextImpl.killTaskIfInterrupted(TaskContextImpl.scala:150) | |
at org.apache.spark.util.collection.unsafe.sort.UnsafeInMemorySorter$SortedIterator.loadNext(UnsafeInMemorySorter.java:294) | |
at org.apache.spark.util.collection.unsafe.sort.UnsafeExternalSorter$SpillableIterator.loadNext(UnsafeExternalSorter.java:567) | |
at org.apache.spark.sql.execution.UnsafeExternalRowSorter$1.next(UnsafeExternalRowSorter.java:161) | |
at org.apache.spark.sql.execution.UnsafeExternalRowSorter$1.next(UnsafeExternalRowSorter.java:148) | |
at scala.collection.Iterator$class.foreach(Iterator.scala:893) | |
at org.apache.spark.sql.catalyst.util.AbstractScalaRowIterator.foreach(AbstractScalaRowIterator.scala:26) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$DynamicPartitionWriteTask.execute(FileFormatWriter.scala:440) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask$3.apply(FileFormatWriter.scala:258) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask$3.apply(FileFormatWriter.scala:256) | |
at org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1375) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$.org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask(FileFormatWriter.scala:261) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1$$anonfun$apply$mcV$sp$1.apply(FileFormatWriter.scala:191) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1$$anonfun$apply$mcV$sp$1.apply(FileFormatWriter.scala:190) | |
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87) | |
at org.apache.spark.scheduler.Task.run(Task.scala:108) | |
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:335) | |
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) | |
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) | |
at java.lang.Thread.run(Thread.java:748) | |
17/12/01 08:43:08 ERROR Utils: Aborting task | |
org.apache.spark.TaskKilledException | |
at org.apache.spark.TaskContextImpl.killTaskIfInterrupted(TaskContextImpl.scala:150) | |
at org.apache.spark.util.collection.unsafe.sort.UnsafeInMemorySorter$SortedIterator.loadNext(UnsafeInMemorySorter.java:294) | |
at org.apache.spark.util.collection.unsafe.sort.UnsafeExternalSorter$SpillableIterator.loadNext(UnsafeExternalSorter.java:567) | |
at org.apache.spark.sql.execution.UnsafeExternalRowSorter$1.next(UnsafeExternalRowSorter.java:161) | |
at org.apache.spark.sql.execution.UnsafeExternalRowSorter$1.next(UnsafeExternalRowSorter.java:148) | |
at scala.collection.Iterator$class.foreach(Iterator.scala:893) | |
at org.apache.spark.sql.catalyst.util.AbstractScalaRowIterator.foreach(AbstractScalaRowIterator.scala:26) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$DynamicPartitionWriteTask.execute(FileFormatWriter.scala:440) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask$3.apply(FileFormatWriter.scala:258) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask$3.apply(FileFormatWriter.scala:256) | |
at org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1375) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$.org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask(FileFormatWriter.scala:261) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1$$anonfun$apply$mcV$sp$1.apply(FileFormatWriter.scala:191) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1$$anonfun$apply$mcV$sp$1.apply(FileFormatWriter.scala:190) | |
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87) | |
at org.apache.spark.scheduler.Task.run(Task.scala:108) | |
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:335) | |
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) | |
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) | |
at java.lang.Thread.run(Thread.java:748) | |
17/12/01 08:43:08 ERROR Utils: Aborting task | |
org.apache.spark.TaskKilledException | |
at org.apache.spark.TaskContextImpl.killTaskIfInterrupted(TaskContextImpl.scala:150) | |
at org.apache.spark.util.collection.unsafe.sort.UnsafeInMemorySorter$SortedIterator.loadNext(UnsafeInMemorySorter.java:294) | |
at org.apache.spark.util.collection.unsafe.sort.UnsafeExternalSorter$SpillableIterator.loadNext(UnsafeExternalSorter.java:567) | |
at org.apache.spark.sql.execution.UnsafeExternalRowSorter$1.next(UnsafeExternalRowSorter.java:161) | |
at org.apache.spark.sql.execution.UnsafeExternalRowSorter$1.next(UnsafeExternalRowSorter.java:148) | |
at scala.collection.Iterator$class.foreach(Iterator.scala:893) | |
at org.apache.spark.sql.catalyst.util.AbstractScalaRowIterator.foreach(AbstractScalaRowIterator.scala:26) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$DynamicPartitionWriteTask.execute(FileFormatWriter.scala:440) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask$3.apply(FileFormatWriter.scala:258) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask$3.apply(FileFormatWriter.scala:256) | |
at org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1375) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$.org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask(FileFormatWriter.scala:261) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1$$anonfun$apply$mcV$sp$1.apply(FileFormatWriter.scala:191) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1$$anonfun$apply$mcV$sp$1.apply(FileFormatWriter.scala:190) | |
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87) | |
at org.apache.spark.scheduler.Task.run(Task.scala:108) | |
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:335) | |
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) | |
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) | |
at java.lang.Thread.run(Thread.java:748) | |
17/12/01 08:43:08 WARN TaskSetManager: Lost task 4.0 in stage 3.0 (TID 11, localhost, executor driver): TaskKilled (stage cancelled) | |
17/12/01 08:43:09 WARN PythonRunner: Incomplete task interrupted: Attempting to kill Python Worker | |
17/12/01 08:43:09 WARN PythonRunner: Incomplete task interrupted: Attempting to kill Python Worker | |
17/12/01 08:43:09 WARN PythonRunner: Incomplete task interrupted: Attempting to kill Python Worker | |
17/12/01 08:43:09 ERROR FileFormatWriter: Job job_20171201084120_0003 aborted. | |
17/12/01 08:43:09 WARN TaskSetManager: Lost task 0.0 in stage 3.0 (TID 7, localhost, executor driver): TaskKilled (stage cancelled) | |
17/12/01 08:43:09 ERROR FileFormatWriter: Job job_20171201084120_0003 aborted. | |
17/12/01 08:43:09 WARN TaskSetManager: Lost task 1.0 in stage 3.0 (TID 8, localhost, executor driver): TaskKilled (stage cancelled) | |
17/12/01 08:43:09 ERROR FileFormatWriter: Job job_20171201084120_0003 aborted. | |
17/12/01 08:43:09 WARN TaskSetManager: Lost task 3.0 in stage 3.0 (TID 10, localhost, executor driver): TaskKilled (stage cancelled) | |
Traceback (most recent call last): | |
File "<stdin>", line 1, in <module> | |
File "/Users/jeremy/DevTools/spark-2.2.0-bin-hadoop2.7/python/pyspark/sql/readwriter.py", line 706, in text | |
self._jwrite.text(path) | |
File "/Library/Python/2.7/site-packages/py4j/java_gateway.py", line 1133, in __call__ | |
answer, self.gateway_client, self.target_id, self.name) | |
File "/Users/jeremy/DevTools/spark-2.2.0-bin-hadoop2.7/python/pyspark/sql/utils.py", line 63, in deco | |
return f(*a, **kw) | |
File "/Library/Python/2.7/site-packages/py4j/protocol.py", line 319, in get_return_value | |
format(target_id, ".", name), value) | |
py4j.protocol.Py4JJavaError: An error occurred while calling o69.text. | |
: org.apache.spark.SparkException: Job aborted. | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1.apply$mcV$sp(FileFormatWriter.scala:215) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1.apply(FileFormatWriter.scala:173) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1.apply(FileFormatWriter.scala:173) | |
at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:65) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:173) | |
at org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand.run(InsertIntoHadoopFsRelationCommand.scala:145) | |
at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:58) | |
at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:56) | |
at org.apache.spark.sql.execution.command.ExecutedCommandExec.doExecute(commands.scala:74) | |
at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:117) | |
at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:117) | |
at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:138) | |
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) | |
at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:135) | |
at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:116) | |
at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:92) | |
at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:92) | |
at org.apache.spark.sql.execution.datasources.DataSource.writeInFileFormat(DataSource.scala:438) | |
at org.apache.spark.sql.execution.datasources.DataSource.write(DataSource.scala:474) | |
at org.apache.spark.sql.execution.datasources.SaveIntoDataSourceCommand.run(SaveIntoDataSourceCommand.scala:48) | |
at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:58) | |
at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:56) | |
at org.apache.spark.sql.execution.command.ExecutedCommandExec.doExecute(commands.scala:74) | |
at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:117) | |
at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:117) | |
at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:138) | |
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) | |
at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:135) | |
at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:116) | |
at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:92) | |
at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:92) | |
at org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:610) | |
at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:233) | |
at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:217) | |
at org.apache.spark.sql.DataFrameWriter.text(DataFrameWriter.scala:555) | |
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) | |
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) | |
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) | |
at java.lang.reflect.Method.invoke(Method.java:498) | |
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244) | |
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357) | |
at py4j.Gateway.invoke(Gateway.java:280) | |
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132) | |
at py4j.commands.CallCommand.execute(CallCommand.java:79) | |
at py4j.GatewayConnection.run(GatewayConnection.java:214) | |
at java.lang.Thread.run(Thread.java:748) | |
Caused by: org.apache.spark.SparkException: Job aborted due to stage failure: Task 2 in stage 3.0 failed 1 times, most recent failure: Lost task 2.0 in stage 3.0 (TID 9, localhost, executor driver): org.apache.spark.SparkException: Task failed while writing rows | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$.org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask(FileFormatWriter.scala:272) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1$$anonfun$apply$mcV$sp$1.apply(FileFormatWriter.scala:191) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1$$anonfun$apply$mcV$sp$1.apply(FileFormatWriter.scala:190) | |
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87) | |
at org.apache.spark.scheduler.Task.run(Task.scala:108) | |
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:335) | |
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) | |
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) | |
at java.lang.Thread.run(Thread.java:748) | |
Caused by: org.apache.hadoop.fs.FileAlreadyExistsException: File already exists: file:/Users/jeremy/Desktop/so/_temporary/0/_temporary/attempt_20171201084120_0003_m_000002_0/context=<http%3A%2F%2Fdig.csail.mit.edu%2F2007%2Fwiki%2Ftabulator>/part-00002-7adb363e-a136-4666-94d6-dba0f6a3dfd2.c000.txt | |
at org.apache.hadoop.fs.RawLocalFileSystem.create(RawLocalFileSystem.java:289) | |
at org.apache.hadoop.fs.RawLocalFileSystem.create(RawLocalFileSystem.java:328) | |
at org.apache.hadoop.fs.ChecksumFileSystem$ChecksumFSOutputSummer.<init>(ChecksumFileSystem.java:398) | |
at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:461) | |
at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:440) | |
at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:911) | |
at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:892) | |
at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:789) | |
at org.apache.spark.sql.execution.datasources.CodecStreams$.createOutputStream(CodecStreams.scala:81) | |
at org.apache.spark.sql.execution.datasources.text.TextOutputWriter.<init>(TextFileFormat.scala:134) | |
at org.apache.spark.sql.execution.datasources.text.TextFileFormat$$anon$1.newInstance(TextFileFormat.scala:80) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$DynamicPartitionWriteTask.org$apache$spark$sql$execution$datasources$FileFormatWriter$DynamicPartitionWriteTask$$newOutputWriter(FileFormatWriter.scala:418) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$DynamicPartitionWriteTask$$anonfun$execute$2.apply(FileFormatWriter.scala:451) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$DynamicPartitionWriteTask$$anonfun$execute$2.apply(FileFormatWriter.scala:440) | |
at scala.collection.Iterator$class.foreach(Iterator.scala:893) | |
at org.apache.spark.sql.catalyst.util.AbstractScalaRowIterator.foreach(AbstractScalaRowIterator.scala:26) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$DynamicPartitionWriteTask.execute(FileFormatWriter.scala:440) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask$3.apply(FileFormatWriter.scala:258) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask$3.apply(FileFormatWriter.scala:256) | |
at org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1375) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$.org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask(FileFormatWriter.scala:261) | |
... 8 more | |
Driver stacktrace: | |
at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1499) | |
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1487) | |
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1486) | |
at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) | |
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48) | |
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1486) | |
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:814) | |
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:814) | |
at scala.Option.foreach(Option.scala:257) | |
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:814) | |
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1714) | |
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1669) | |
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1658) | |
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48) | |
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:630) | |
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2022) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1.apply$mcV$sp(FileFormatWriter.scala:188) | |
... 45 more | |
Caused by: org.apache.spark.SparkException: Task failed while writing rows | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$.org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask(FileFormatWriter.scala:272) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1$$anonfun$apply$mcV$sp$1.apply(FileFormatWriter.scala:191) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1$$anonfun$apply$mcV$sp$1.apply(FileFormatWriter.scala:190) | |
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87) | |
at org.apache.spark.scheduler.Task.run(Task.scala:108) | |
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:335) | |
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) | |
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) | |
... 1 more | |
Caused by: org.apache.hadoop.fs.FileAlreadyExistsException: File already exists: file:/Users/jeremy/Desktop/so/_temporary/0/_temporary/attempt_20171201084120_0003_m_000002_0/context=<http%3A%2F%2Fdig.csail.mit.edu%2F2007%2Fwiki%2Ftabulator>/part-00002-7adb363e-a136-4666-94d6-dba0f6a3dfd2.c000.txt | |
at org.apache.hadoop.fs.RawLocalFileSystem.create(RawLocalFileSystem.java:289) | |
at org.apache.hadoop.fs.RawLocalFileSystem.create(RawLocalFileSystem.java:328) | |
at org.apache.hadoop.fs.ChecksumFileSystem$ChecksumFSOutputSummer.<init>(ChecksumFileSystem.java:398) | |
at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:461) | |
at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:440) | |
at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:911) | |
at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:892) | |
at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:789) | |
at org.apache.spark.sql.execution.datasources.CodecStreams$.createOutputStream(CodecStreams.scala:81) | |
at org.apache.spark.sql.execution.datasources.text.TextOutputWriter.<init>(TextFileFormat.scala:134) | |
at org.apache.spark.sql.execution.datasources.text.TextFileFormat$$anon$1.newInstance(TextFileFormat.scala:80) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$DynamicPartitionWriteTask.org$apache$spark$sql$execution$datasources$FileFormatWriter$DynamicPartitionWriteTask$$newOutputWriter(FileFormatWriter.scala:418) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$DynamicPartitionWriteTask$$anonfun$execute$2.apply(FileFormatWriter.scala:451) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$DynamicPartitionWriteTask$$anonfun$execute$2.apply(FileFormatWriter.scala:440) | |
at scala.collection.Iterator$class.foreach(Iterator.scala:893) | |
at org.apache.spark.sql.catalyst.util.AbstractScalaRowIterator.foreach(AbstractScalaRowIterator.scala:26) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$DynamicPartitionWriteTask.execute(FileFormatWriter.scala:440) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask$3.apply(FileFormatWriter.scala:258) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask$3.apply(FileFormatWriter.scala:256) | |
at org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1375) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$.org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask(FileFormatWriter.scala:261) | |
... 8 more |
Anyone able to fix this issue? I am still having this and cant figure out why its happening
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Did you ever get to the bottom of why this was happening? Running into the same problem and am at a loss as to what the reason could be.