Skip to content

Instantly share code, notes, and snippets.

@ianmilligan1
Created June 20, 2016 13:26
Show Gist options
  • Save ianmilligan1/eb0bf663f76747804a4ebc9243116507 to your computer and use it in GitHub Desktop.
Save ianmilligan1/eb0bf663f76747804a4ebc9243116507 to your computer and use it in GitHub Desktop.
[Stage 0:> (0 + 16) / 3347]ERROR Executor - Exception in task 12.0 in stage 0.0 (TID 12)
java.lang.NegativeArraySizeException
at org.warcbase.data.WarcRecordUtils.copyStream(WarcRecordUtils.java:125)
at org.warcbase.data.WarcRecordUtils.getContent(WarcRecordUtils.java:98)
at org.warcbase.spark.archive.io.GenericArchiveRecord.<init>(GenericArchiveRecord.scala:48)
at org.warcbase.spark.matchbox.RecordLoader$$anonfun$loadArchives$2.apply(RecordLoader.scala:45)
at org.warcbase.spark.matchbox.RecordLoader$$anonfun$loadArchives$2.apply(RecordLoader.scala:45)
at scala.collection.Iterator$$anon$11.next(Iterator.scala:328)
at scala.collection.Iterator$$anon$14.hasNext(Iterator.scala:389)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327)
at org.apache.spark.util.collection.ExternalSorter.insertAll(ExternalSorter.scala:209)
at org.apache.spark.shuffle.sort.SortShuffleWriter.write(SortShuffleWriter.scala:73)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
at org.apache.spark.scheduler.Task.run(Task.scala:88)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
WARN TaskSetManager - Lost task 12.0 in stage 0.0 (TID 12, localhost): java.lang.NegativeArraySizeException
at org.warcbase.data.WarcRecordUtils.copyStream(WarcRecordUtils.java:125)
at org.warcbase.data.WarcRecordUtils.getContent(WarcRecordUtils.java:98)
at org.warcbase.spark.archive.io.GenericArchiveRecord.<init>(GenericArchiveRecord.scala:48)
at org.warcbase.spark.matchbox.RecordLoader$$anonfun$loadArchives$2.apply(RecordLoader.scala:45)
at org.warcbase.spark.matchbox.RecordLoader$$anonfun$loadArchives$2.apply(RecordLoader.scala:45)
at scala.collection.Iterator$$anon$11.next(Iterator.scala:328)
at scala.collection.Iterator$$anon$14.hasNext(Iterator.scala:389)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327)
at org.apache.spark.util.collection.ExternalSorter.insertAll(ExternalSorter.scala:209)
at org.apache.spark.shuffle.sort.SortShuffleWriter.write(SortShuffleWriter.scala:73)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
at org.apache.spark.scheduler.Task.run(Task.scala:88)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
ERROR TaskSetManager - Task 12 in stage 0.0 failed 1 times; aborting job
WARN TaskSetManager - Lost task 11.0 in stage 0.0 (TID 11, localhost): TaskKilled (killed intentionally)
WARN TaskSetManager - Lost task 15.0 in stage 0.0 (TID 15, localhost): TaskKilled (killed intentionally)
WARN TaskSetManager - Lost task 7.0 in stage 0.0 (TID 7, localhost): TaskKilled (killed intentionally)
WARN TaskSetManager - Lost task 16.0 in stage 0.0 (TID 16, localhost): TaskKilled (killed intentionally)
WARN TaskSetManager - Lost task 5.0 in stage 0.0 (TID 5, localhost): TaskKilled (killed intentionally)
org.apache.spark.SparkException: Job aborted due to stage failure: Task 12 in stage 0.0 failed 1 times, most recent failure: Lost task 12.0 in stage 0.0 (TID 12, localhost): java.lang.NegativeArraySizeException
at org.warcbase.data.WarcRecordUtils.copyStream(WarcRecordUtils.java:125)
at org.warcbase.data.WarcRecordUtils.getContent(WarcRecordUtils.java:98)
at org.warcbase.spark.archive.io.GenericArchiveRecord.<init>(GenericArchiveRecord.scala:48)
at org.warcbase.spark.matchbox.RecordLoader$$anonfun$loadArchives$2.apply(RecordLoader.scala:45)
at org.warcbase.spark.matchbox.RecordLoader$$anonfun$loadArchives$2.apply(RecordLoader.scala:45)
at scala.collection.Iterator$$anon$11.next(Iterator.scala:328)
at scala.collection.Iterator$$anon$14.hasNext(Iterator.scala:389)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327)
at org.apache.spark.util.collection.ExternalSorter.insertAll(ExternalSorter.scala:209)
at org.apache.spark.shuffle.sort.SortShuffleWriter.write(SortShuffleWriter.scala:73)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
at org.apache.spark.scheduler.Task.run(Task.scala:88)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
Driver stacktrace:
at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1283)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1271)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1270)
at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1270)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:697)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:697)
at scala.Option.foreach(Option.scala:236)
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:697)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1496)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1458)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1447)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:567)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:1822)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:1835)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:1848)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:1919)
at org.apache.spark.rdd.RDD$$anonfun$collect$1.apply(RDD.scala:905)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:147)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:108)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:306)
at org.apache.spark.rdd.RDD.collect(RDD.scala:904)
at org.apache.spark.RangePartitioner$.sketch(Partitioner.scala:264)
at org.apache.spark.RangePartitioner.<init>(Partitioner.scala:126)
at org.apache.spark.rdd.OrderedRDDFunctions$$anonfun$sortByKey$1.apply(OrderedRDDFunctions.scala:62)
at org.apache.spark.rdd.OrderedRDDFunctions$$anonfun$sortByKey$1.apply(OrderedRDDFunctions.scala:61)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:147)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:108)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:306)
at org.apache.spark.rdd.OrderedRDDFunctions.sortByKey(OrderedRDDFunctions.scala:61)
at org.apache.spark.rdd.RDD$$anonfun$sortBy$1.apply(RDD.scala:547)
at org.apache.spark.rdd.RDD$$anonfun$sortBy$1.apply(RDD.scala:548)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:147)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:108)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:306)
at org.apache.spark.rdd.RDD.sortBy(RDD.scala:545)
at org.warcbase.spark.rdd.RecordRDD$CountableRDD.countItems(RecordRDD.scala:40)
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:28)
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:68)
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:70)
at $iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:72)
at $iwC$$iwC$$iwC$$iwC.<init>(<console>:74)
at $iwC$$iwC$$iwC.<init>(<console>:76)
at $iwC$$iwC.<init>(<console>:78)
at $iwC.<init>(<console>:80)
at <init>(<console>:82)
at .<init>(<console>:86)
at .<clinit>(<console>)
at .<init>(<console>:7)
at .<clinit>(<console>)
at $print(<console>)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:1065)
at org.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1340)
at org.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:840)
at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:871)
at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:819)
at org.apache.spark.repl.SparkILoop.org$apache$spark$repl$SparkILoop$$pasteCommand(SparkILoop.scala:825)
at org.apache.spark.repl.SparkILoop$$anonfun$standardCommands$8.apply(SparkILoop.scala:345)
at org.apache.spark.repl.SparkILoop$$anonfun$standardCommands$8.apply(SparkILoop.scala:345)
at scala.tools.nsc.interpreter.LoopCommands$LoopCommand$$anonfun$nullary$1.apply(LoopCommands.scala:65)
at scala.tools.nsc.interpreter.LoopCommands$LoopCommand$$anonfun$nullary$1.apply(LoopCommands.scala:65)
at scala.tools.nsc.interpreter.LoopCommands$NullaryCmd.apply(LoopCommands.scala:76)
at org.apache.spark.repl.SparkILoop.command(SparkILoop.scala:809)
at org.apache.spark.repl.SparkILoop.processLine$1(SparkILoop.scala:657)
at org.apache.spark.repl.SparkILoop.innerLoop$1(SparkILoop.scala:665)
at org.apache.spark.repl.SparkILoop.org$apache$spark$repl$SparkILoop$$loop(SparkILoop.scala:670)
at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply$mcZ$sp(SparkILoop.scala:997)
at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:945)
at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:945)
at scala.tools.nsc.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scala:135)
at org.apache.spark.repl.SparkILoop.org$apache$spark$repl$SparkILoop$$process(SparkILoop.scala:945)
at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:1059)
at org.apache.spark.repl.Main$.main(Main.scala:31)
at org.apache.spark.repl.Main.main(Main.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:672)
at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:180)
at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:205)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:120)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
Caused by: java.lang.NegativeArraySizeException
at org.warcbase.data.WarcRecordUtils.copyStream(WarcRecordUtils.java:125)
at org.warcbase.data.WarcRecordUtils.getContent(WarcRecordUtils.java:98)
at org.warcbase.spark.archive.io.GenericArchiveRecord.<init>(GenericArchiveRecord.scala:48)
at org.warcbase.spark.matchbox.RecordLoader$$anonfun$loadArchives$2.apply(RecordLoader.scala:45)
at org.warcbase.spark.matchbox.RecordLoader$$anonfun$loadArchives$2.apply(RecordLoader.scala:45)
at scala.collection.Iterator$$anon$11.next(Iterator.scala:328)
at scala.collection.Iterator$$anon$14.hasNext(Iterator.scala:389)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327)
at org.apache.spark.util.collection.ExternalSorter.insertAll(ExternalSorter.scala:209)
at org.apache.spark.shuffle.sort.SortShuffleWriter.write(SortShuffleWriter.scala:73)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
at org.apache.spark.scheduler.Task.run(Task.scala:88)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
scala> WARN TaskSetManager - Lost task 0.0 in stage 0.0 (TID 0, localhost): TaskKilled (killed intentionally)
WARN TaskSetManager - Lost task 8.0 in stage 0.0 (TID 8, localhost): TaskKilled (killed intentionally)
WARN TaskSetManager - Lost task 14.0 in stage 0.0 (TID 14, localhost): TaskKilled (killed intentionally)
WARN TaskSetManager - Lost task 6.0 in stage 0.0 (TID 6, localhost): TaskKilled (killed intentionally)
WARN TaskSetManager - Lost task 10.0 in stage 0.0 (TID 10, localhost): TaskKilled (killed intentionally)
WARN TaskSetManager - Lost task 4.0 in stage 0.0 (TID 4, localhost): TaskKilled (killed intentionally)
WARN TaskSetManager - Lost task 1.0 in stage 0.0 (TID 1, localhost): TaskKilled (killed intentionally)
WARN TaskSetManager - Lost task 3.0 in stage 0.0 (TID 3, localhost): TaskKilled (killed intentionally)
WARN TaskSetManager - Lost task 9.0 in stage 0.0 (TID 9, localhost): TaskKilled (killed intentionally)
WARN TaskSetManager - Lost task 13.0 in stage 0.0 (TID 13, localhost): TaskKilled (killed intentionally)
WARN TaskSetManager - Lost task 2.0 in stage 0.0 (TID 2, localhost): TaskKilled (killed intentionally)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment