Created
May 19, 2016 01:29
-
-
Save ianmilligan1/e1826508fcc150605487139d76c7cf26 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
i2millig@rho:/mnt/vol1/derivative_data/walk$ spark | |
WARN NativeCodeLoader - Unable to load native-hadoop library for your platform... using builtin-java classes where applicable | |
Welcome to | |
____ __ | |
/ __/__ ___ _____/ /__ | |
_\ \/ _ \/ _ `/ __/ '_/ | |
/___/ .__/\_,_/_/ /_/\_\ version 1.5.1 | |
/_/ | |
Using Scala version 2.10.4 (Java HotSpot(TM) 64-Bit Server VM, Java 1.8.0_45) | |
Type in expressions to have them evaluated. | |
Type :help for more information. | |
WARN MetricsSystem - Using default name DAGScheduler for source because spark.app.id is not set. | |
Spark context available as sc. | |
SQL context available as sqlContext. | |
scala> :paste | |
// Entering paste mode (ctrl-D to finish) | |
import org.warcbase.spark.matchbox._ | |
import org.warcbase.spark.rdd.RecordRDD._ | |
val university_of_alberta_websites = | |
RecordLoader.loadArchives("/mnt/vol1/data_sets/TEST/*.gz", sc) | |
.keepValidPages() | |
.map(r => (r.getCrawlMonth, ExtractDomain(r.getUrl))) | |
.countItems() | |
.saveAsTextFile("/mnt/vol1/derivative_data/walk/university_of_alberta_websites") | |
// Exiting paste mode, now interpreting. | |
[Stage 0:> (40 + 12) / 2771]WARN HeartbeatReceiver - Removing executor driver with no recent heartbeats: 139250 ms exceeds timeout 120000 ms | |
[Stage 0:> (40 + 12) / 2771]ERROR TaskSchedulerImpl - Lost executor driver on localhost: Executor heartbeat timed out after 139250 ms | |
[Stage 0:> (40 + 12) / 2771]WARN AkkaRpcEndpointRef - Error sending message [message = Heartbeat(driver,[Lscala.Tuple2;@3f595bc1,BlockManagerId(driver, localhost, 47904))] in 1 attempts | |
org.apache.spark.rpc.RpcTimeoutException: Futures timed out after [120 seconds]. This timeout is controlled by spark.rpc.askTimeout | |
at org.apache.spark.rpc.RpcTimeout.org$apache$spark$rpc$RpcTimeout$$createRpcTimeoutException(RpcEnv.scala:214) | |
at org.apache.spark.rpc.RpcTimeout$$anonfun$addMessageIfTimeout$1.applyOrElse(RpcEnv.scala:229) | |
at org.apache.spark.rpc.RpcTimeout$$anonfun$addMessageIfTimeout$1.applyOrElse(RpcEnv.scala:225) | |
at scala.runtime.AbstractPartialFunction.apply(AbstractPartialFunction.scala:33) | |
at org.apache.spark.rpc.RpcTimeout.awaitResult(RpcEnv.scala:242) | |
at org.apache.spark.rpc.RpcEndpointRef.askWithRetry(RpcEndpointRef.scala:101) | |
at org.apache.spark.rpc.RpcEndpointRef.askWithRetry(RpcEndpointRef.scala:77) | |
at org.apache.spark.executor.Executor.org$apache$spark$executor$Executor$$reportHeartBeat(Executor.scala:452) | |
at org.apache.spark.executor.Executor$$anon$1$$anonfun$run$1.apply$mcV$sp(Executor.scala:472) | |
at org.apache.spark.executor.Executor$$anon$1$$anonfun$run$1.apply(Executor.scala:472) | |
at org.apache.spark.executor.Executor$$anon$1$$anonfun$run$1.apply(Executor.scala:472) | |
at org.apache.spark.util.Utils$.logUncaughtExceptions(Utils.scala:1699) | |
at org.apache.spark.executor.Executor$$anon$1.run(Executor.scala:472) | |
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) | |
at java.util.concurrent.FutureTask.runAndReset(FutureTask.java:308) | |
at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$301(ScheduledThreadPoolExecutor.java:180) | |
at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:294) | |
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) | |
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) | |
at java.lang.Thread.run(Thread.java:745) | |
Caused by: java.util.concurrent.TimeoutException: Futures timed out after [120 seconds] | |
at scala.concurrent.impl.Promise$DefaultPromise.ready(Promise.scala:219) | |
at scala.concurrent.impl.Promise$DefaultPromise.result(Promise.scala:223) | |
at scala.concurrent.Await$$anonfun$result$1.apply(package.scala:107) | |
at scala.concurrent.BlockContext$DefaultBlockContext$.blockOn(BlockContext.scala:53) | |
at scala.concurrent.Await$.result(package.scala:107) | |
at org.apache.spark.rpc.RpcTimeout.awaitResult(RpcEnv.scala:241) | |
... 15 more | |
[Stage 0:> (40 + 11) / 2771]WARN TaskSetManager - Lost task 50.0 in stage 0.0 (TID 50, localhost): ExecutorLostFailure (executor driver lost) | |
ERROR TaskSetManager - Task 50 in stage 0.0 failed 1 times; aborting job | |
WARN TaskSetManager - Lost task 44.0 in stage 0.0 (TID 44, localhost): ExecutorLostFailure (executor driver lost) | |
WARN TaskSetManager - Lost task 47.0 in stage 0.0 (TID 47, localhost): ExecutorLostFailure (executor driver lost) | |
WARN TaskSetManager - Lost task 2.0 in stage 0.0 (TID 2, localhost): ExecutorLostFailure (executor driver lost) | |
WARN TaskSetManager - Lost task 46.0 in stage 0.0 (TID 46, localhost): ExecutorLostFailure (executor driver lost) | |
WARN TaskSetManager - Lost task 49.0 in stage 0.0 (TID 49, localhost): ExecutorLostFailure (executor driver lost) | |
WARN TaskSetManager - Lost task 40.0 in stage 0.0 (TID 40, localhost): ExecutorLostFailure (executor driver lost) | |
WARN TaskSetManager - Lost task 7.0 in stage 0.0 (TID 7, localhost): ExecutorLostFailure (executor driver lost) | |
WARN TaskSetManager - Lost task 45.0 in stage 0.0 (TID 45, localhost): ExecutorLostFailure (executor driver lost) | |
WARN TaskSetManager - Lost task 48.0 in stage 0.0 (TID 48, localhost): ExecutorLostFailure (executor driver lost) | |
WARN TaskSetManager - Lost task 3.0 in stage 0.0 (TID 3, localhost): ExecutorLostFailure (executor driver lost) | |
WARN TaskSetManager - Lost task 51.0 in stage 0.0 (TID 51, localhost): ExecutorLostFailure (executor driver lost) | |
[Stage 0:> (40 + -29) / 2771]WARN SparkContext - Killing executors is only supported in coarse-grained mode | |
org.apache.spark.SparkException: Job aborted due to stage failure: Task 50 in stage 0.0 failed 1 times, most recent failure: Lost task 50.0 in stage 0.0 (TID 50, localhost): ExecutorLostFailure (executor driver lost) | |
Driver stacktrace: | |
at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1283) | |
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1271) | |
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1270) | |
at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) | |
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47) | |
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1270) | |
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:697) | |
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:697) | |
at scala.Option.foreach(Option.scala:236) | |
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:697) | |
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1496) | |
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1458) | |
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1447) | |
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48) | |
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:567) | |
at org.apache.spark.SparkContext.runJob(SparkContext.scala:1822) | |
at org.apache.spark.SparkContext.runJob(SparkContext.scala:1835) | |
at org.apache.spark.SparkContext.runJob(SparkContext.scala:1848) | |
at org.apache.spark.SparkContext.runJob(SparkContext.scala:1919) | |
at org.apache.spark.rdd.RDD$$anonfun$collect$1.apply(RDD.scala:905) | |
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:147) | |
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:108) | |
at org.apache.spark.rdd.RDD.withScope(RDD.scala:306) | |
at org.apache.spark.rdd.RDD.collect(RDD.scala:904) | |
at org.apache.spark.RangePartitioner$.sketch(Partitioner.scala:264) | |
at org.apache.spark.RangePartitioner.<init>(Partitioner.scala:126) | |
at org.apache.spark.rdd.OrderedRDDFunctions$$anonfun$sortByKey$1.apply(OrderedRDDFunctions.scala:62) | |
at org.apache.spark.rdd.OrderedRDDFunctions$$anonfun$sortByKey$1.apply(OrderedRDDFunctions.scala:61) | |
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:147) | |
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:108) | |
at org.apache.spark.rdd.RDD.withScope(RDD.scala:306) | |
at org.apache.spark.rdd.OrderedRDDFunctions.sortByKey(OrderedRDDFunctions.scala:61) | |
at org.apache.spark.rdd.RDD$$anonfun$sortBy$1.apply(RDD.scala:547) | |
at org.apache.spark.rdd.RDD$$anonfun$sortBy$1.apply(RDD.scala:548) | |
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:147) | |
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:108) | |
at org.apache.spark.rdd.RDD.withScope(RDD.scala:306) | |
at org.apache.spark.rdd.RDD.sortBy(RDD.scala:545) | |
at org.warcbase.spark.rdd.RecordRDD$CountableRDD.countItems(RecordRDD.scala:40) | |
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:28) | |
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:36) | |
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:38) | |
at $iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:40) | |
at $iwC$$iwC$$iwC$$iwC.<init>(<console>:42) | |
at $iwC$$iwC$$iwC.<init>(<console>:44) | |
at $iwC$$iwC.<init>(<console>:46) | |
at $iwC.<init>(<console>:48) | |
at <init>(<console>:50) | |
at .<init>(<console>:54) | |
at .<clinit>(<console>) | |
at .<init>(<console>:7) | |
at .<clinit>(<console>) | |
at $print(<console>) | |
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) | |
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) | |
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) | |
at java.lang.reflect.Method.invoke(Method.java:497) | |
at org.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:1065) | |
at org.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1340) | |
at org.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:840) | |
at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:871) | |
at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:819) | |
at org.apache.spark.repl.SparkILoop.org$apache$spark$repl$SparkILoop$$pasteCommand(SparkILoop.scala:825) | |
at org.apache.spark.repl.SparkILoop$$anonfun$standardCommands$8.apply(SparkILoop.scala:345) | |
at org.apache.spark.repl.SparkILoop$$anonfun$standardCommands$8.apply(SparkILoop.scala:345) | |
at scala.tools.nsc.interpreter.LoopCommands$LoopCommand$$anonfun$nullary$1.apply(LoopCommands.scala:65) | |
at scala.tools.nsc.interpreter.LoopCommands$LoopCommand$$anonfun$nullary$1.apply(LoopCommands.scala:65) | |
at scala.tools.nsc.interpreter.LoopCommands$NullaryCmd.apply(LoopCommands.scala:76) | |
at org.apache.spark.repl.SparkILoop.command(SparkILoop.scala:809) | |
at org.apache.spark.repl.SparkILoop.processLine$1(SparkILoop.scala:657) | |
at org.apache.spark.repl.SparkILoop.innerLoop$1(SparkILoop.scala:665) | |
at org.apache.spark.repl.SparkILoop.org$apache$spark$repl$SparkILoop$$loop(SparkILoop.scala:670) | |
at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply$mcZ$sp(SparkILoop.scala:997) | |
at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:945) | |
at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:945) | |
at scala.tools.nsc.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scala:135) | |
at org.apache.spark.repl.SparkILoop.org$apache$spark$repl$SparkILoop$$process(SparkILoop.scala:945) | |
at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:1059) | |
at org.apache.spark.repl.Main$.main(Main.scala:31) | |
at org.apache.spark.repl.Main.main(Main.scala) | |
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) | |
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) | |
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) | |
at java.lang.reflect.Method.invoke(Method.java:497) | |
at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:672) | |
at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:180) | |
at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:205) | |
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:120) | |
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) | |
scala> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment