Created
July 4, 2019 09:52
-
-
Save ianmilligan1/a3e23eed198bc95ae1569f78a704c60d to your computer and use it in GitHub Desktop.
Language Fail
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
scala> :paste | |
// Entering paste mode (ctrl-D to finish) | |
import io.archivesunleashed._ | |
import io.archivesunleashed.matchbox._ | |
RecordLoader.loadArchives("example.arc.gz", sc) | |
.keepValidPages() | |
.keepDomains(Set("www.archive.org")) | |
.keepLanguages(Set("fr")) | |
.map(r => (r.getCrawlDate, r.getDomain, r.getUrl, RemoveHTML(r.getContentString))) | |
.saveAsTextFile("plain-text-fr/") | |
// Exiting paste mode, now interpreting. | |
19/07/04 11:48:54 ERROR Utils: Aborting task | |
java.lang.NoClassDefFoundError: Could not initialize class org.apache.tika.langdetect.OptimaizeLangDetector | |
at io.archivesunleashed.matchbox.DetectLanguage$.apply(DetectLanguage.scala:35) | |
at io.archivesunleashed.package$WARecordRDD$$anonfun$keepLanguages$1.apply(package.scala:245) | |
at io.archivesunleashed.package$WARecordRDD$$anonfun$keepLanguages$1.apply(package.scala:245) | |
at scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:464) | |
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409) | |
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409) | |
at org.apache.spark.internal.io.SparkHadoopWriter$$anonfun$4.apply(SparkHadoopWriter.scala:128) | |
at org.apache.spark.internal.io.SparkHadoopWriter$$anonfun$4.apply(SparkHadoopWriter.scala:127) | |
at org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1394) | |
at org.apache.spark.internal.io.SparkHadoopWriter$.org$apache$spark$internal$io$SparkHadoopWriter$$executeTask(SparkHadoopWriter.scala:139) | |
at org.apache.spark.internal.io.SparkHadoopWriter$$anonfun$3.apply(SparkHadoopWriter.scala:83) | |
at org.apache.spark.internal.io.SparkHadoopWriter$$anonfun$3.apply(SparkHadoopWriter.scala:78) | |
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) | |
at org.apache.spark.scheduler.Task.run(Task.scala:121) | |
at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408) | |
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360) | |
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414) | |
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) | |
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) | |
at java.lang.Thread.run(Thread.java:745) | |
19/07/04 11:48:54 ERROR SparkHadoopWriter: Task attempt_20190704114854_0072_m_000000_0 aborted. | |
19/07/04 11:48:54 ERROR Executor: Exception in task 0.0 in stage 7.0 (TID 7) | |
org.apache.spark.SparkException: Task failed while writing rows | |
at org.apache.spark.internal.io.SparkHadoopWriter$.org$apache$spark$internal$io$SparkHadoopWriter$$executeTask(SparkHadoopWriter.scala:155) | |
at org.apache.spark.internal.io.SparkHadoopWriter$$anonfun$3.apply(SparkHadoopWriter.scala:83) | |
at org.apache.spark.internal.io.SparkHadoopWriter$$anonfun$3.apply(SparkHadoopWriter.scala:78) | |
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) | |
at org.apache.spark.scheduler.Task.run(Task.scala:121) | |
at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408) | |
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360) | |
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414) | |
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) | |
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) | |
at java.lang.Thread.run(Thread.java:745) | |
Caused by: java.lang.NoClassDefFoundError: Could not initialize class org.apache.tika.langdetect.OptimaizeLangDetector | |
at io.archivesunleashed.matchbox.DetectLanguage$.apply(DetectLanguage.scala:35) | |
at io.archivesunleashed.package$WARecordRDD$$anonfun$keepLanguages$1.apply(package.scala:245) | |
at io.archivesunleashed.package$WARecordRDD$$anonfun$keepLanguages$1.apply(package.scala:245) | |
at scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:464) | |
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409) | |
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409) | |
at org.apache.spark.internal.io.SparkHadoopWriter$$anonfun$4.apply(SparkHadoopWriter.scala:128) | |
at org.apache.spark.internal.io.SparkHadoopWriter$$anonfun$4.apply(SparkHadoopWriter.scala:127) | |
at org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1394) | |
at org.apache.spark.internal.io.SparkHadoopWriter$.org$apache$spark$internal$io$SparkHadoopWriter$$executeTask(SparkHadoopWriter.scala:139) | |
... 10 more | |
19/07/04 11:48:54 WARN TaskSetManager: Lost task 0.0 in stage 7.0 (TID 7, localhost, executor driver): org.apache.spark.SparkException: Task failed while writing rows | |
at org.apache.spark.internal.io.SparkHadoopWriter$.org$apache$spark$internal$io$SparkHadoopWriter$$executeTask(SparkHadoopWriter.scala:155) | |
at org.apache.spark.internal.io.SparkHadoopWriter$$anonfun$3.apply(SparkHadoopWriter.scala:83) | |
at org.apache.spark.internal.io.SparkHadoopWriter$$anonfun$3.apply(SparkHadoopWriter.scala:78) | |
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) | |
at org.apache.spark.scheduler.Task.run(Task.scala:121) | |
at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408) | |
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360) | |
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414) | |
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) | |
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) | |
at java.lang.Thread.run(Thread.java:745) | |
Caused by: java.lang.NoClassDefFoundError: Could not initialize class org.apache.tika.langdetect.OptimaizeLangDetector | |
at io.archivesunleashed.matchbox.DetectLanguage$.apply(DetectLanguage.scala:35) | |
at io.archivesunleashed.package$WARecordRDD$$anonfun$keepLanguages$1.apply(package.scala:245) | |
at io.archivesunleashed.package$WARecordRDD$$anonfun$keepLanguages$1.apply(package.scala:245) | |
at scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:464) | |
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409) | |
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409) | |
at org.apache.spark.internal.io.SparkHadoopWriter$$anonfun$4.apply(SparkHadoopWriter.scala:128) | |
at org.apache.spark.internal.io.SparkHadoopWriter$$anonfun$4.apply(SparkHadoopWriter.scala:127) | |
at org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1394) | |
at org.apache.spark.internal.io.SparkHadoopWriter$.org$apache$spark$internal$io$SparkHadoopWriter$$executeTask(SparkHadoopWriter.scala:139) | |
... 10 more | |
19/07/04 11:48:54 ERROR TaskSetManager: Task 0 in stage 7.0 failed 1 times; aborting job | |
19/07/04 11:48:54 ERROR SparkHadoopWriter: Aborting job job_20190704114854_0072. | |
org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 7.0 failed 1 times, most recent failure: Lost task 0.0 in stage 7.0 (TID 7, localhost, executor driver): org.apache.spark.SparkException: Task failed while writing rows | |
at org.apache.spark.internal.io.SparkHadoopWriter$.org$apache$spark$internal$io$SparkHadoopWriter$$executeTask(SparkHadoopWriter.scala:155) | |
at org.apache.spark.internal.io.SparkHadoopWriter$$anonfun$3.apply(SparkHadoopWriter.scala:83) | |
at org.apache.spark.internal.io.SparkHadoopWriter$$anonfun$3.apply(SparkHadoopWriter.scala:78) | |
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) | |
at org.apache.spark.scheduler.Task.run(Task.scala:121) | |
at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408) | |
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360) | |
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414) | |
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) | |
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) | |
at java.lang.Thread.run(Thread.java:745) | |
Caused by: java.lang.NoClassDefFoundError: Could not initialize class org.apache.tika.langdetect.OptimaizeLangDetector | |
at io.archivesunleashed.matchbox.DetectLanguage$.apply(DetectLanguage.scala:35) | |
at io.archivesunleashed.package$WARecordRDD$$anonfun$keepLanguages$1.apply(package.scala:245) | |
at io.archivesunleashed.package$WARecordRDD$$anonfun$keepLanguages$1.apply(package.scala:245) | |
at scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:464) | |
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409) | |
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409) | |
at org.apache.spark.internal.io.SparkHadoopWriter$$anonfun$4.apply(SparkHadoopWriter.scala:128) | |
at org.apache.spark.internal.io.SparkHadoopWriter$$anonfun$4.apply(SparkHadoopWriter.scala:127) | |
at org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1394) | |
at org.apache.spark.internal.io.SparkHadoopWriter$.org$apache$spark$internal$io$SparkHadoopWriter$$executeTask(SparkHadoopWriter.scala:139) | |
... 10 more | |
Driver stacktrace: | |
at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1889) | |
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1877) | |
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1876) | |
at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) | |
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48) | |
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1876) | |
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:926) | |
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:926) | |
at scala.Option.foreach(Option.scala:257) | |
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:926) | |
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2110) | |
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2059) | |
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2048) | |
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49) | |
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:737) | |
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2061) | |
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2082) | |
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2114) | |
at org.apache.spark.internal.io.SparkHadoopWriter$.write(SparkHadoopWriter.scala:78) | |
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopDataset$1.apply$mcV$sp(PairRDDFunctions.scala:1096) | |
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopDataset$1.apply(PairRDDFunctions.scala:1094) | |
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopDataset$1.apply(PairRDDFunctions.scala:1094) | |
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) | |
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112) | |
at org.apache.spark.rdd.RDD.withScope(RDD.scala:363) | |
at org.apache.spark.rdd.PairRDDFunctions.saveAsHadoopDataset(PairRDDFunctions.scala:1094) | |
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$4.apply$mcV$sp(PairRDDFunctions.scala:1067) | |
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$4.apply(PairRDDFunctions.scala:1032) | |
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$4.apply(PairRDDFunctions.scala:1032) | |
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) | |
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112) | |
at org.apache.spark.rdd.RDD.withScope(RDD.scala:363) | |
at org.apache.spark.rdd.PairRDDFunctions.saveAsHadoopFile(PairRDDFunctions.scala:1032) | |
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$1.apply$mcV$sp(PairRDDFunctions.scala:958) | |
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$1.apply(PairRDDFunctions.scala:958) | |
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$1.apply(PairRDDFunctions.scala:958) | |
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) | |
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112) | |
at org.apache.spark.rdd.RDD.withScope(RDD.scala:363) | |
at org.apache.spark.rdd.PairRDDFunctions.saveAsHadoopFile(PairRDDFunctions.scala:957) | |
at org.apache.spark.rdd.RDD$$anonfun$saveAsTextFile$1.apply$mcV$sp(RDD.scala:1499) | |
at org.apache.spark.rdd.RDD$$anonfun$saveAsTextFile$1.apply(RDD.scala:1478) | |
at org.apache.spark.rdd.RDD$$anonfun$saveAsTextFile$1.apply(RDD.scala:1478) | |
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) | |
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112) | |
at org.apache.spark.rdd.RDD.withScope(RDD.scala:363) | |
at org.apache.spark.rdd.RDD.saveAsTextFile(RDD.scala:1478) | |
at $line29.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<pastie>:70) | |
at $line29.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<pastie>:77) | |
at $line29.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<pastie>:79) | |
at $line29.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<pastie>:81) | |
at $line29.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<pastie>:83) | |
at $line29.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<pastie>:85) | |
at $line29.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<pastie>:87) | |
at $line29.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<pastie>:89) | |
at $line29.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<pastie>:91) | |
at $line29.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<pastie>:93) | |
at $line29.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<pastie>:95) | |
at $line29.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<pastie>:97) | |
at $line29.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<pastie>:99) | |
at $line29.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<pastie>:101) | |
at $line29.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<pastie>:103) | |
at $line29.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<pastie>:105) | |
at $line29.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<pastie>:107) | |
at $line29.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<pastie>:109) | |
at $line29.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<pastie>:111) | |
at $line29.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<pastie>:113) | |
at $line29.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<pastie>:115) | |
at $line29.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<pastie>:117) | |
at $line29.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<pastie>:119) | |
at $line29.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<pastie>:121) | |
at $line29.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<pastie>:123) | |
at $line29.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<pastie>:125) | |
at $line29.$read$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<pastie>:127) | |
at $line29.$read$$iw$$iw$$iw$$iw$$iw.<init>(<pastie>:129) | |
at $line29.$read$$iw$$iw$$iw$$iw.<init>(<pastie>:131) | |
at $line29.$read$$iw$$iw$$iw.<init>(<pastie>:133) | |
at $line29.$read$$iw$$iw.<init>(<pastie>:135) | |
at $line29.$read$$iw.<init>(<pastie>:137) | |
at $line29.$read.<init>(<pastie>:139) | |
at $line29.$read$.<init>(<pastie>:143) | |
at $line29.$read$.<clinit>(<pastie>) | |
at $line29.$eval$.$print$lzycompute(<pastie>:7) | |
at $line29.$eval$.$print(<pastie>:6) | |
at $line29.$eval.$print(<pastie>) | |
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) | |
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) | |
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) | |
at java.lang.reflect.Method.invoke(Method.java:498) | |
at scala.tools.nsc.interpreter.IMain$ReadEvalPrint.call(IMain.scala:793) | |
at scala.tools.nsc.interpreter.IMain$Request.loadAndRun(IMain.scala:1054) | |
at scala.tools.nsc.interpreter.IMain$WrappedRequest$$anonfun$loadAndRunReq$1.apply(IMain.scala:645) | |
at scala.tools.nsc.interpreter.IMain$WrappedRequest$$anonfun$loadAndRunReq$1.apply(IMain.scala:644) | |
at scala.reflect.internal.util.ScalaClassLoader$class.asContext(ScalaClassLoader.scala:31) | |
at scala.reflect.internal.util.AbstractFileClassLoader.asContext(AbstractFileClassLoader.scala:19) | |
at scala.tools.nsc.interpreter.IMain$WrappedRequest.loadAndRunReq(IMain.scala:644) | |
at scala.tools.nsc.interpreter.IMain.interpret(IMain.scala:576) | |
at scala.tools.nsc.interpreter.IMain.interpret(IMain.scala:572) | |
at scala.tools.nsc.interpreter.ILoop$$anonfun$20.apply(ILoop.scala:762) | |
at scala.tools.nsc.interpreter.ILoop$$anonfun$20.apply(ILoop.scala:762) | |
at scala.tools.nsc.interpreter.IMain.withLabel(IMain.scala:116) | |
at scala.tools.nsc.interpreter.ILoop.interpretCode$1(ILoop.scala:762) | |
at scala.tools.nsc.interpreter.ILoop.pasteCommand(ILoop.scala:776) | |
at scala.tools.nsc.interpreter.ILoop$$anonfun$standardCommands$9.apply(ILoop.scala:217) | |
at scala.tools.nsc.interpreter.ILoop$$anonfun$standardCommands$9.apply(ILoop.scala:217) | |
at scala.tools.nsc.interpreter.LoopCommands$LineCmd.apply(LoopCommands.scala:62) | |
at scala.tools.nsc.interpreter.ILoop.colonCommand(ILoop.scala:698) | |
at scala.tools.nsc.interpreter.ILoop.command(ILoop.scala:689) | |
at scala.tools.nsc.interpreter.ILoop.processLine(ILoop.scala:404) | |
at scala.tools.nsc.interpreter.ILoop.loop(ILoop.scala:425) | |
at org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply$mcZ$sp(SparkILoop.scala:285) | |
at org.apache.spark.repl.SparkILoop.runClosure(SparkILoop.scala:159) | |
at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:182) | |
at org.apache.spark.repl.Main$.doMain(Main.scala:78) | |
at org.apache.spark.repl.Main$.main(Main.scala:58) | |
at org.apache.spark.repl.Main.main(Main.scala) | |
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) | |
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) | |
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) | |
at java.lang.reflect.Method.invoke(Method.java:498) | |
at org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52) | |
at org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:849) | |
at org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:167) | |
at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:195) | |
at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:86) | |
at org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:924) | |
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:933) | |
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) | |
Caused by: org.apache.spark.SparkException: Task failed while writing rows | |
at org.apache.spark.internal.io.SparkHadoopWriter$.org$apache$spark$internal$io$SparkHadoopWriter$$executeTask(SparkHadoopWriter.scala:155) | |
at org.apache.spark.internal.io.SparkHadoopWriter$$anonfun$3.apply(SparkHadoopWriter.scala:83) | |
at org.apache.spark.internal.io.SparkHadoopWriter$$anonfun$3.apply(SparkHadoopWriter.scala:78) | |
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) | |
at org.apache.spark.scheduler.Task.run(Task.scala:121) | |
at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408) | |
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360) | |
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414) | |
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) | |
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) | |
at java.lang.Thread.run(Thread.java:745) | |
Caused by: java.lang.NoClassDefFoundError: Could not initialize class org.apache.tika.langdetect.OptimaizeLangDetector | |
at io.archivesunleashed.matchbox.DetectLanguage$.apply(DetectLanguage.scala:35) | |
at io.archivesunleashed.package$WARecordRDD$$anonfun$keepLanguages$1.apply(package.scala:245) | |
at io.archivesunleashed.package$WARecordRDD$$anonfun$keepLanguages$1.apply(package.scala:245) | |
at scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:464) | |
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409) | |
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409) | |
at org.apache.spark.internal.io.SparkHadoopWriter$$anonfun$4.apply(SparkHadoopWriter.scala:128) | |
at org.apache.spark.internal.io.SparkHadoopWriter$$anonfun$4.apply(SparkHadoopWriter.scala:127) | |
at org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1394) | |
at org.apache.spark.internal.io.SparkHadoopWriter$.org$apache$spark$internal$io$SparkHadoopWriter$$executeTask(SparkHadoopWriter.scala:139) | |
... 10 more | |
org.apache.spark.SparkException: Job aborted. | |
at org.apache.spark.internal.io.SparkHadoopWriter$.write(SparkHadoopWriter.scala:100) | |
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopDataset$1.apply$mcV$sp(PairRDDFunctions.scala:1096) | |
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopDataset$1.apply(PairRDDFunctions.scala:1094) | |
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopDataset$1.apply(PairRDDFunctions.scala:1094) | |
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) | |
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112) | |
at org.apache.spark.rdd.RDD.withScope(RDD.scala:363) | |
at org.apache.spark.rdd.PairRDDFunctions.saveAsHadoopDataset(PairRDDFunctions.scala:1094) | |
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$4.apply$mcV$sp(PairRDDFunctions.scala:1067) | |
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$4.apply(PairRDDFunctions.scala:1032) | |
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$4.apply(PairRDDFunctions.scala:1032) | |
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) | |
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112) | |
at org.apache.spark.rdd.RDD.withScope(RDD.scala:363) | |
at org.apache.spark.rdd.PairRDDFunctions.saveAsHadoopFile(PairRDDFunctions.scala:1032) | |
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$1.apply$mcV$sp(PairRDDFunctions.scala:958) | |
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$1.apply(PairRDDFunctions.scala:958) | |
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$1.apply(PairRDDFunctions.scala:958) | |
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) | |
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112) | |
at org.apache.spark.rdd.RDD.withScope(RDD.scala:363) | |
at org.apache.spark.rdd.PairRDDFunctions.saveAsHadoopFile(PairRDDFunctions.scala:957) | |
at org.apache.spark.rdd.RDD$$anonfun$saveAsTextFile$1.apply$mcV$sp(RDD.scala:1499) | |
at org.apache.spark.rdd.RDD$$anonfun$saveAsTextFile$1.apply(RDD.scala:1478) | |
at org.apache.spark.rdd.RDD$$anonfun$saveAsTextFile$1.apply(RDD.scala:1478) | |
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) | |
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112) | |
at org.apache.spark.rdd.RDD.withScope(RDD.scala:363) | |
at org.apache.spark.rdd.RDD.saveAsTextFile(RDD.scala:1478) | |
... 81 elided | |
Caused by: org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 7.0 failed 1 times, most recent failure: Lost task 0.0 in stage 7.0 (TID 7, localhost, executor driver): org.apache.spark.SparkException: Task failed while writing rows | |
at org.apache.spark.internal.io.SparkHadoopWriter$.org$apache$spark$internal$io$SparkHadoopWriter$$executeTask(SparkHadoopWriter.scala:155) | |
at org.apache.spark.internal.io.SparkHadoopWriter$$anonfun$3.apply(SparkHadoopWriter.scala:83) | |
at org.apache.spark.internal.io.SparkHadoopWriter$$anonfun$3.apply(SparkHadoopWriter.scala:78) | |
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) | |
at org.apache.spark.scheduler.Task.run(Task.scala:121) | |
at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408) | |
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360) | |
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414) | |
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) | |
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) | |
at java.lang.Thread.run(Thread.java:745) | |
Caused by: java.lang.NoClassDefFoundError: Could not initialize class org.apache.tika.langdetect.OptimaizeLangDetector | |
at io.archivesunleashed.matchbox.DetectLanguage$.apply(DetectLanguage.scala:35) | |
at io.archivesunleashed.package$WARecordRDD$$anonfun$keepLanguages$1.apply(package.scala:245) | |
at io.archivesunleashed.package$WARecordRDD$$anonfun$keepLanguages$1.apply(package.scala:245) | |
at scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:464) | |
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409) | |
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409) | |
at org.apache.spark.internal.io.SparkHadoopWriter$$anonfun$4.apply(SparkHadoopWriter.scala:128) | |
at org.apache.spark.internal.io.SparkHadoopWriter$$anonfun$4.apply(SparkHadoopWriter.scala:127) | |
at org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1394) | |
at org.apache.spark.internal.io.SparkHadoopWriter$.org$apache$spark$internal$io$SparkHadoopWriter$$executeTask(SparkHadoopWriter.scala:139) | |
... 10 more | |
Driver stacktrace: | |
at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1889) | |
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1877) | |
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1876) | |
at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) | |
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48) | |
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1876) | |
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:926) | |
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:926) | |
at scala.Option.foreach(Option.scala:257) | |
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:926) | |
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2110) | |
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2059) | |
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2048) | |
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49) | |
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:737) | |
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2061) | |
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2082) | |
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2114) | |
at org.apache.spark.internal.io.SparkHadoopWriter$.write(SparkHadoopWriter.scala:78) | |
... 109 more | |
Caused by: org.apache.spark.SparkException: Task failed while writing rows | |
at org.apache.spark.internal.io.SparkHadoopWriter$.org$apache$spark$internal$io$SparkHadoopWriter$$executeTask(SparkHadoopWriter.scala:155) | |
at org.apache.spark.internal.io.SparkHadoopWriter$$anonfun$3.apply(SparkHadoopWriter.scala:83) | |
at org.apache.spark.internal.io.SparkHadoopWriter$$anonfun$3.apply(SparkHadoopWriter.scala:78) | |
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) | |
at org.apache.spark.scheduler.Task.run(Task.scala:121) | |
at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408) | |
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360) | |
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414) | |
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) | |
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) | |
at java.lang.Thread.run(Thread.java:745) | |
Caused by: java.lang.NoClassDefFoundError: Could not initialize class org.apache.tika.langdetect.OptimaizeLangDetector | |
at io.archivesunleashed.matchbox.DetectLanguage$.apply(DetectLanguage.scala:35) | |
at io.archivesunleashed.package$WARecordRDD$$anonfun$keepLanguages$1.apply(package.scala:245) | |
at io.archivesunleashed.package$WARecordRDD$$anonfun$keepLanguages$1.apply(package.scala:245) | |
at scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:464) | |
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409) | |
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409) | |
at org.apache.spark.internal.io.SparkHadoopWriter$$anonfun$4.apply(SparkHadoopWriter.scala:128) | |
at org.apache.spark.internal.io.SparkHadoopWriter$$anonfun$4.apply(SparkHadoopWriter.scala:127) | |
at org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1394) | |
at org.apache.spark.internal.io.SparkHadoopWriter$.org$apache$spark$internal$io$SparkHadoopWriter$$executeTask(SparkHadoopWriter.scala:139) | |
... 10 more | |
scala> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment