Created
July 10, 2019 13:48
-
-
Save ianmilligan1/7de467768cb3fb9fe548bec7294da520 to your computer and use it in GitHub Desktop.
PySpark Explosion
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
ianmilligan1@Ians-MacBook-Pro-3:~/dropbox/git/aut$ python ~/dropbox/git/aut/src/main/python/tf/detect.py --web_archive "/Users/ianmilligan1/dropbox/git/aut-resources/Sample-Data/ARCHIVEIT-227-UOFTORONTO-CANPOLPINT-20060622205612-00009-crawling025.archive.org.arc.gz" --aut_jar /Users/ianmilligan1/dropbox/git/aut/target/aut-0.17.1-SNAPSHOT-fatjar.jar --spark /Users/ianmilligan1/dropbox/spark-2.4.3-bin-hadoop2.7/bin --master spark://Ians-MacBook-Pro-3.local:7077 --img_model ssd --filter_size 50 50 --output_path /Users/ianmilligan1/desktop/aut-image-tf-testing | |
19/07/10 15:44:55 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable | |
Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties | |
Setting default log level to "WARN". | |
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel). | |
height >= 50 and width >= 50 | |
[Stage 0:> (0 + 1) / 1]19/07/10 15:45:34 WARN TaskSetManager: Lost task 0.0 in stage 0.0 (TID 0, 192.168.60.14, executor 0): java.net.SocketException: Connection reset | |
at java.net.SocketInputStream.read(SocketInputStream.java:209) | |
at java.net.SocketInputStream.read(SocketInputStream.java:141) | |
at java.io.BufferedInputStream.fill(BufferedInputStream.java:246) | |
at java.io.BufferedInputStream.read(BufferedInputStream.java:265) | |
at java.io.DataInputStream.readInt(DataInputStream.java:387) | |
at org.apache.spark.sql.execution.python.ArrowPythonRunner$$anon$1.read(ArrowPythonRunner.scala:159) | |
at org.apache.spark.sql.execution.python.ArrowPythonRunner$$anon$1.read(ArrowPythonRunner.scala:122) | |
at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:406) | |
at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37) | |
at org.apache.spark.sql.execution.python.ArrowEvalPythonExec$$anon$2.<init>(ArrowEvalPythonExec.scala:98) | |
at org.apache.spark.sql.execution.python.ArrowEvalPythonExec.evaluate(ArrowEvalPythonExec.scala:96) | |
at org.apache.spark.sql.execution.python.EvalPythonExec$$anonfun$doExecute$1.apply(EvalPythonExec.scala:127) | |
at org.apache.spark.sql.execution.python.EvalPythonExec$$anonfun$doExecute$1.apply(EvalPythonExec.scala:89) | |
at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$23.apply(RDD.scala:801) | |
at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$23.apply(RDD.scala:801) | |
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) | |
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) | |
at org.apache.spark.rdd.RDD.iterator(RDD.scala:288) | |
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) | |
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) | |
at org.apache.spark.rdd.RDD.iterator(RDD.scala:288) | |
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) | |
at org.apache.spark.scheduler.Task.run(Task.scala:121) | |
at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408) | |
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360) | |
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414) | |
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) | |
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) | |
at java.lang.Thread.run(Thread.java:745) | |
19/07/10 15:45:57 WARN TaskSetManager: Lost task 0.1 in stage 0.0 (TID 1, 192.168.60.14, executor 0): java.net.SocketException: Broken pipe | |
at java.net.SocketOutputStream.socketWrite0(Native Method) | |
at java.net.SocketOutputStream.socketWrite(SocketOutputStream.java:109) | |
at java.net.SocketOutputStream.write(SocketOutputStream.java:153) | |
at java.io.BufferedOutputStream.flushBuffer(BufferedOutputStream.java:82) | |
at java.io.BufferedOutputStream.write(BufferedOutputStream.java:126) | |
at java.io.DataOutputStream.write(DataOutputStream.java:107) | |
at java.nio.channels.Channels$WritableByteChannelImpl.write(Channels.java:458) | |
at org.apache.arrow.vector.ipc.WriteChannel.write(WriteChannel.java:75) | |
at org.apache.arrow.vector.ipc.WriteChannel.write(WriteChannel.java:88) | |
at org.apache.arrow.vector.ipc.message.MessageSerializer.writeBatchBuffers(MessageSerializer.java:225) | |
at org.apache.arrow.vector.ipc.message.MessageSerializer.serialize(MessageSerializer.java:198) | |
at org.apache.arrow.vector.ipc.ArrowWriter.writeRecordBatch(ArrowWriter.java:115) | |
at org.apache.arrow.vector.ipc.ArrowWriter.writeBatch(ArrowWriter.java:103) | |
at org.apache.spark.sql.execution.python.ArrowPythonRunner$$anon$2$$anonfun$writeIteratorToStream$1.apply$mcV$sp(ArrowPythonRunner.scala:89) | |
at org.apache.spark.sql.execution.python.ArrowPythonRunner$$anon$2$$anonfun$writeIteratorToStream$1.apply(ArrowPythonRunner.scala:76) | |
at org.apache.spark.sql.execution.python.ArrowPythonRunner$$anon$2$$anonfun$writeIteratorToStream$1.apply(ArrowPythonRunner.scala:76) | |
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360) | |
at org.apache.spark.sql.execution.python.ArrowPythonRunner$$anon$2.writeIteratorToStream(ArrowPythonRunner.scala:96) | |
at org.apache.spark.api.python.BasePythonRunner$WriterThread$$anonfun$run$1.apply(PythonRunner.scala:345) | |
at org.apache.spark.util.Utils$.logUncaughtExceptions(Utils.scala:1945) | |
at org.apache.spark.api.python.BasePythonRunner$WriterThread.run(PythonRunner.scala:194) | |
[Stage 0:> (0 + 1) / 1]19/07/10 15:46:20 WARN TaskSetManager: Lost task 0.2 in stage 0.0 (TID 2, 192.168.60.14, executor 0): java.net.SocketException: Connection reset | |
at java.net.SocketInputStream.read(SocketInputStream.java:209) | |
at java.net.SocketInputStream.read(SocketInputStream.java:141) | |
at java.io.BufferedInputStream.fill(BufferedInputStream.java:246) | |
at java.io.BufferedInputStream.read(BufferedInputStream.java:265) | |
at java.io.DataInputStream.readInt(DataInputStream.java:387) | |
at org.apache.spark.sql.execution.python.ArrowPythonRunner$$anon$1.read(ArrowPythonRunner.scala:159) | |
at org.apache.spark.sql.execution.python.ArrowPythonRunner$$anon$1.read(ArrowPythonRunner.scala:122) | |
at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:406) | |
at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37) | |
at org.apache.spark.sql.execution.python.ArrowEvalPythonExec$$anon$2.<init>(ArrowEvalPythonExec.scala:98) | |
at org.apache.spark.sql.execution.python.ArrowEvalPythonExec.evaluate(ArrowEvalPythonExec.scala:96) | |
at org.apache.spark.sql.execution.python.EvalPythonExec$$anonfun$doExecute$1.apply(EvalPythonExec.scala:127) | |
at org.apache.spark.sql.execution.python.EvalPythonExec$$anonfun$doExecute$1.apply(EvalPythonExec.scala:89) | |
at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$23.apply(RDD.scala:801) | |
at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$23.apply(RDD.scala:801) | |
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) | |
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) | |
at org.apache.spark.rdd.RDD.iterator(RDD.scala:288) | |
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) | |
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) | |
at org.apache.spark.rdd.RDD.iterator(RDD.scala:288) | |
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) | |
at org.apache.spark.scheduler.Task.run(Task.scala:121) | |
at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408) | |
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360) | |
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414) | |
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) | |
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) | |
at java.lang.Thread.run(Thread.java:745) | |
19/07/10 15:46:44 WARN TaskSetManager: Lost task 0.3 in stage 0.0 (TID 3, 192.168.60.14, executor 0): java.net.SocketException: Connection reset | |
at java.net.SocketInputStream.read(SocketInputStream.java:209) | |
at java.net.SocketInputStream.read(SocketInputStream.java:141) | |
at java.io.BufferedInputStream.fill(BufferedInputStream.java:246) | |
at java.io.BufferedInputStream.read(BufferedInputStream.java:265) | |
at java.io.DataInputStream.readInt(DataInputStream.java:387) | |
at org.apache.spark.sql.execution.python.ArrowPythonRunner$$anon$1.read(ArrowPythonRunner.scala:159) | |
at org.apache.spark.sql.execution.python.ArrowPythonRunner$$anon$1.read(ArrowPythonRunner.scala:122) | |
at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:406) | |
at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37) | |
at org.apache.spark.sql.execution.python.ArrowEvalPythonExec$$anon$2.<init>(ArrowEvalPythonExec.scala:98) | |
at org.apache.spark.sql.execution.python.ArrowEvalPythonExec.evaluate(ArrowEvalPythonExec.scala:96) | |
at org.apache.spark.sql.execution.python.EvalPythonExec$$anonfun$doExecute$1.apply(EvalPythonExec.scala:127) | |
at org.apache.spark.sql.execution.python.EvalPythonExec$$anonfun$doExecute$1.apply(EvalPythonExec.scala:89) | |
at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$23.apply(RDD.scala:801) | |
at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$23.apply(RDD.scala:801) | |
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) | |
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) | |
at org.apache.spark.rdd.RDD.iterator(RDD.scala:288) | |
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) | |
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) | |
at org.apache.spark.rdd.RDD.iterator(RDD.scala:288) | |
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) | |
at org.apache.spark.scheduler.Task.run(Task.scala:121) | |
at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408) | |
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360) | |
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414) | |
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) | |
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) | |
at java.lang.Thread.run(Thread.java:745) | |
19/07/10 15:46:44 ERROR TaskSetManager: Task 0 in stage 0.0 failed 4 times; aborting job | |
19/07/10 15:46:44 ERROR FileFormatWriter: Aborting job ef18ff95-e07c-496c-91bd-d1c8f64c08a7. | |
org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 0.0 failed 4 times, most recent failure: Lost task 0.3 in stage 0.0 (TID 3, 192.168.60.14, executor 0): java.net.SocketException: Connection reset | |
at java.net.SocketInputStream.read(SocketInputStream.java:209) | |
at java.net.SocketInputStream.read(SocketInputStream.java:141) | |
at java.io.BufferedInputStream.fill(BufferedInputStream.java:246) | |
at java.io.BufferedInputStream.read(BufferedInputStream.java:265) | |
at java.io.DataInputStream.readInt(DataInputStream.java:387) | |
at org.apache.spark.sql.execution.python.ArrowPythonRunner$$anon$1.read(ArrowPythonRunner.scala:159) | |
at org.apache.spark.sql.execution.python.ArrowPythonRunner$$anon$1.read(ArrowPythonRunner.scala:122) | |
at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:406) | |
at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37) | |
at org.apache.spark.sql.execution.python.ArrowEvalPythonExec$$anon$2.<init>(ArrowEvalPythonExec.scala:98) | |
at org.apache.spark.sql.execution.python.ArrowEvalPythonExec.evaluate(ArrowEvalPythonExec.scala:96) | |
at org.apache.spark.sql.execution.python.EvalPythonExec$$anonfun$doExecute$1.apply(EvalPythonExec.scala:127) | |
at org.apache.spark.sql.execution.python.EvalPythonExec$$anonfun$doExecute$1.apply(EvalPythonExec.scala:89) | |
at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$23.apply(RDD.scala:801) | |
at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$23.apply(RDD.scala:801) | |
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) | |
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) | |
at org.apache.spark.rdd.RDD.iterator(RDD.scala:288) | |
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) | |
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) | |
at org.apache.spark.rdd.RDD.iterator(RDD.scala:288) | |
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) | |
at org.apache.spark.scheduler.Task.run(Task.scala:121) | |
at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408) | |
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360) | |
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414) | |
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) | |
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) | |
at java.lang.Thread.run(Thread.java:745) | |
Driver stacktrace: | |
at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1889) | |
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1877) | |
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1876) | |
at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) | |
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48) | |
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1876) | |
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:926) | |
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:926) | |
at scala.Option.foreach(Option.scala:257) | |
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:926) | |
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2110) | |
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2059) | |
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2048) | |
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49) | |
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:737) | |
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2061) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:167) | |
at org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand.run(InsertIntoHadoopFsRelationCommand.scala:159) | |
at org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult$lzycompute(commands.scala:104) | |
at org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult(commands.scala:102) | |
at org.apache.spark.sql.execution.command.DataWritingCommandExec.doExecute(commands.scala:122) | |
at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:131) | |
at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:127) | |
at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:155) | |
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) | |
at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:152) | |
at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:127) | |
at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:80) | |
at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:80) | |
at org.apache.spark.sql.DataFrameWriter$$anonfun$runCommand$1.apply(DataFrameWriter.scala:676) | |
at org.apache.spark.sql.DataFrameWriter$$anonfun$runCommand$1.apply(DataFrameWriter.scala:676) | |
at org.apache.spark.sql.execution.SQLExecution$$anonfun$withNewExecutionId$1.apply(SQLExecution.scala:78) | |
at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:125) | |
at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:73) | |
at org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:676) | |
at org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:285) | |
at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:271) | |
at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:229) | |
at org.apache.spark.sql.DataFrameWriter.json(DataFrameWriter.scala:545) | |
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) | |
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) | |
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) | |
at java.lang.reflect.Method.invoke(Method.java:498) | |
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244) | |
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357) | |
at py4j.Gateway.invoke(Gateway.java:282) | |
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132) | |
at py4j.commands.CallCommand.execute(CallCommand.java:79) | |
at py4j.GatewayConnection.run(GatewayConnection.java:238) | |
at java.lang.Thread.run(Thread.java:745) | |
Caused by: java.net.SocketException: Connection reset | |
at java.net.SocketInputStream.read(SocketInputStream.java:209) | |
at java.net.SocketInputStream.read(SocketInputStream.java:141) | |
at java.io.BufferedInputStream.fill(BufferedInputStream.java:246) | |
at java.io.BufferedInputStream.read(BufferedInputStream.java:265) | |
at java.io.DataInputStream.readInt(DataInputStream.java:387) | |
at org.apache.spark.sql.execution.python.ArrowPythonRunner$$anon$1.read(ArrowPythonRunner.scala:159) | |
at org.apache.spark.sql.execution.python.ArrowPythonRunner$$anon$1.read(ArrowPythonRunner.scala:122) | |
at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:406) | |
at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37) | |
at org.apache.spark.sql.execution.python.ArrowEvalPythonExec$$anon$2.<init>(ArrowEvalPythonExec.scala:98) | |
at org.apache.spark.sql.execution.python.ArrowEvalPythonExec.evaluate(ArrowEvalPythonExec.scala:96) | |
at org.apache.spark.sql.execution.python.EvalPythonExec$$anonfun$doExecute$1.apply(EvalPythonExec.scala:127) | |
at org.apache.spark.sql.execution.python.EvalPythonExec$$anonfun$doExecute$1.apply(EvalPythonExec.scala:89) | |
at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$23.apply(RDD.scala:801) | |
at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$23.apply(RDD.scala:801) | |
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) | |
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) | |
at org.apache.spark.rdd.RDD.iterator(RDD.scala:288) | |
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) | |
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) | |
at org.apache.spark.rdd.RDD.iterator(RDD.scala:288) | |
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) | |
at org.apache.spark.scheduler.Task.run(Task.scala:121) | |
at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408) | |
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360) | |
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414) | |
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) | |
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) | |
... 1 more | |
Traceback (most recent call last): | |
File "/Users/ianmilligan1/dropbox/git/aut/src/main/python/tf/detect.py", line 37, in <module> | |
res.write.json(args.output_path) | |
File "/Users/ianmilligan1/anaconda/lib/python3.5/site-packages/pyspark/sql/readwriter.py", line 812, in json | |
self._jwrite.json(path) | |
File "/Users/ianmilligan1/anaconda/lib/python3.5/site-packages/py4j/java_gateway.py", line 1257, in __call__ | |
answer, self.gateway_client, self.target_id, self.name) | |
File "/Users/ianmilligan1/anaconda/lib/python3.5/site-packages/pyspark/sql/utils.py", line 63, in deco | |
return f(*a, **kw) | |
File "/Users/ianmilligan1/anaconda/lib/python3.5/site-packages/py4j/protocol.py", line 328, in get_return_value | |
format(target_id, ".", name), value) | |
py4j.protocol.Py4JJavaError: An error occurred while calling o65.json. | |
: org.apache.spark.SparkException: Job aborted. | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:198) | |
at org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand.run(InsertIntoHadoopFsRelationCommand.scala:159) | |
at org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult$lzycompute(commands.scala:104) | |
at org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult(commands.scala:102) | |
at org.apache.spark.sql.execution.command.DataWritingCommandExec.doExecute(commands.scala:122) | |
at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:131) | |
at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:127) | |
at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:155) | |
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) | |
at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:152) | |
at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:127) | |
at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:80) | |
at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:80) | |
at org.apache.spark.sql.DataFrameWriter$$anonfun$runCommand$1.apply(DataFrameWriter.scala:676) | |
at org.apache.spark.sql.DataFrameWriter$$anonfun$runCommand$1.apply(DataFrameWriter.scala:676) | |
at org.apache.spark.sql.execution.SQLExecution$$anonfun$withNewExecutionId$1.apply(SQLExecution.scala:78) | |
at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:125) | |
at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:73) | |
at org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:676) | |
at org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:285) | |
at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:271) | |
at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:229) | |
at org.apache.spark.sql.DataFrameWriter.json(DataFrameWriter.scala:545) | |
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) | |
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) | |
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) | |
at java.lang.reflect.Method.invoke(Method.java:498) | |
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244) | |
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357) | |
at py4j.Gateway.invoke(Gateway.java:282) | |
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132) | |
at py4j.commands.CallCommand.execute(CallCommand.java:79) | |
at py4j.GatewayConnection.run(GatewayConnection.java:238) | |
at java.lang.Thread.run(Thread.java:745) | |
Caused by: org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 0.0 failed 4 times, most recent failure: Lost task 0.3 in stage 0.0 (TID 3, 192.168.60.14, executor 0): java.net.SocketException: Connection reset | |
at java.net.SocketInputStream.read(SocketInputStream.java:209) | |
at java.net.SocketInputStream.read(SocketInputStream.java:141) | |
at java.io.BufferedInputStream.fill(BufferedInputStream.java:246) | |
at java.io.BufferedInputStream.read(BufferedInputStream.java:265) | |
at java.io.DataInputStream.readInt(DataInputStream.java:387) | |
at org.apache.spark.sql.execution.python.ArrowPythonRunner$$anon$1.read(ArrowPythonRunner.scala:159) | |
at org.apache.spark.sql.execution.python.ArrowPythonRunner$$anon$1.read(ArrowPythonRunner.scala:122) | |
at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:406) | |
at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37) | |
at org.apache.spark.sql.execution.python.ArrowEvalPythonExec$$anon$2.<init>(ArrowEvalPythonExec.scala:98) | |
at org.apache.spark.sql.execution.python.ArrowEvalPythonExec.evaluate(ArrowEvalPythonExec.scala:96) | |
at org.apache.spark.sql.execution.python.EvalPythonExec$$anonfun$doExecute$1.apply(EvalPythonExec.scala:127) | |
at org.apache.spark.sql.execution.python.EvalPythonExec$$anonfun$doExecute$1.apply(EvalPythonExec.scala:89) | |
at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$23.apply(RDD.scala:801) | |
at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$23.apply(RDD.scala:801) | |
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) | |
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) | |
at org.apache.spark.rdd.RDD.iterator(RDD.scala:288) | |
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) | |
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) | |
at org.apache.spark.rdd.RDD.iterator(RDD.scala:288) | |
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) | |
at org.apache.spark.scheduler.Task.run(Task.scala:121) | |
at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408) | |
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360) | |
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414) | |
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) | |
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) | |
at java.lang.Thread.run(Thread.java:745) | |
Driver stacktrace: | |
at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1889) | |
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1877) | |
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1876) | |
at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) | |
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48) | |
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1876) | |
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:926) | |
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:926) | |
at scala.Option.foreach(Option.scala:257) | |
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:926) | |
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2110) | |
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2059) | |
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2048) | |
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49) | |
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:737) | |
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2061) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:167) | |
... 33 more | |
Caused by: java.net.SocketException: Connection reset | |
at java.net.SocketInputStream.read(SocketInputStream.java:209) | |
at java.net.SocketInputStream.read(SocketInputStream.java:141) | |
at java.io.BufferedInputStream.fill(BufferedInputStream.java:246) | |
at java.io.BufferedInputStream.read(BufferedInputStream.java:265) | |
at java.io.DataInputStream.readInt(DataInputStream.java:387) | |
at org.apache.spark.sql.execution.python.ArrowPythonRunner$$anon$1.read(ArrowPythonRunner.scala:159) | |
at org.apache.spark.sql.execution.python.ArrowPythonRunner$$anon$1.read(ArrowPythonRunner.scala:122) | |
at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:406) | |
at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37) | |
at org.apache.spark.sql.execution.python.ArrowEvalPythonExec$$anon$2.<init>(ArrowEvalPythonExec.scala:98) | |
at org.apache.spark.sql.execution.python.ArrowEvalPythonExec.evaluate(ArrowEvalPythonExec.scala:96) | |
at org.apache.spark.sql.execution.python.EvalPythonExec$$anonfun$doExecute$1.apply(EvalPythonExec.scala:127) | |
at org.apache.spark.sql.execution.python.EvalPythonExec$$anonfun$doExecute$1.apply(EvalPythonExec.scala:89) | |
at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$23.apply(RDD.scala:801) | |
at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$23.apply(RDD.scala:801) | |
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) | |
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) | |
at org.apache.spark.rdd.RDD.iterator(RDD.scala:288) | |
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) | |
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) | |
at org.apache.spark.rdd.RDD.iterator(RDD.scala:288) | |
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) | |
at org.apache.spark.scheduler.Task.run(Task.scala:121) | |
at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408) | |
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360) | |
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414) | |
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) | |
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) | |
... 1 more | |
ianmilligan1@Ians-MacBook-Pro-3:~/dropbox/git/aut$ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment