Created
December 8, 2017 05:05
-
-
Save anonymous/614a1746162dd12bb0b810add85deda2 to your computer and use it in GitHub Desktop.
Dataset<Row> map into Dataset<Integer>
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
17/12/08 09:46:47 WARN TaskSetManager: Lost task 0.0 in stage 2.0 (TID 4, 172.31.48.232, executor 0): java.lang.ClassCastException: cannot assign instance of scala.collection.immutable.List$SerializationProxy to field org.apache.spark.rdd.RDD.org$apache$spark$rdd$RDD$$dependencies_ of type scala.collection.Seq in instance of org.apache.spark.rdd.MapPartitionsRDD | |
at java.io.ObjectStreamClass$FieldReflector.setObjFieldValues(ObjectStreamClass.java:2233) | |
at java.io.ObjectStreamClass.setObjFieldValues(ObjectStreamClass.java:1405) | |
at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2288) | |
at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2206) | |
at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2064) | |
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1568) | |
at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2282) | |
at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2206) | |
at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2064) | |
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1568) | |
at java.io.ObjectInputStream.readObject(ObjectInputStream.java:428) | |
at org.apache.spark.serializer.JavaDeserializationStream.readObject(JavaSerializer.scala:75) | |
at org.apache.spark.serializer.JavaSerializerInstance.deserialize(JavaSerializer.scala:114) | |
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:80) | |
at org.apache.spark.scheduler.Task.run(Task.scala:108) | |
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:335) | |
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) | |
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) | |
at java.lang.Thread.run(Thread.java:748) | |
17/12/08 09:46:47 INFO TaskSetManager: Starting task 0.1 in stage 2.0 (TID 5, 172.31.48.232, executor 0, partition 0, PROCESS_LOCAL, 5430 bytes) | |
17/12/08 09:46:47 INFO TaskSetManager: Lost task 0.1 in stage 2.0 (TID 5) on 172.31.48.232, executor 0: java.lang.ClassCastException (cannot assign instance of scala.collection.immutable.List$SerializationProxy to field org.apache.spark.rdd.RDD.org$apache$spark$rdd$RDD$$dependencies_ of type scala.collection.Seq in instance of org.apache.spark.rdd.MapPartitionsRDD) [duplicate 1] | |
17/12/08 09:46:47 INFO TaskSetManager: Starting task 0.2 in stage 2.0 (TID 6, 172.31.48.232, executor 0, partition 0, PROCESS_LOCAL, 5430 bytes) | |
17/12/08 09:46:47 INFO TaskSetManager: Lost task 0.2 in stage 2.0 (TID 6) on 172.31.48.232, executor 0: java.lang.ClassCastException (cannot assign instance of scala.collection.immutable.List$SerializationProxy to field org.apache.spark.rdd.RDD.org$apache$spark$rdd$RDD$$dependencies_ of type scala.collection.Seq in instance of org.apache.spark.rdd.MapPartitionsRDD) [duplicate 2] | |
17/12/08 09:46:47 INFO TaskSetManager: Starting task 0.3 in stage 2.0 (TID 7, 172.31.48.232, executor 0, partition 0, PROCESS_LOCAL, 5430 bytes) | |
17/12/08 09:46:47 INFO TaskSetManager: Lost task 0.3 in stage 2.0 (TID 7) on 172.31.48.232, executor 0: java.lang.ClassCastException (cannot assign instance of scala.collection.immutable.List$SerializationProxy to field org.apache.spark.rdd.RDD.org$apache$spark$rdd$RDD$$dependencies_ of type scala.collection.Seq in instance of org.apache.spark.rdd.MapPartitionsRDD) [duplicate 3] | |
17/12/08 09:46:47 ERROR TaskSetManager: Task 0 in stage 2.0 failed 4 times; aborting job | |
17/12/08 09:46:47 INFO TaskSchedulerImpl: Removed TaskSet 2.0, whose tasks have all completed, from pool | |
17/12/08 09:46:47 INFO TaskSchedulerImpl: Cancelling stage 2 | |
17/12/08 09:46:47 INFO DAGScheduler: ResultStage 2 (show at BatchProcess.java:38) failed in 0.450 s due to Job aborted due to stage failure: Task 0 in stage 2.0 failed 4 times, most recent failure: Lost task 0.3 in stage 2.0 (TID 7, 172.31.48.232, executor 0): java.lang.ClassCastException: cannot assign instance of scala.collection.immutable.List$SerializationProxy to field org.apache.spark.rdd.RDD.org$apache$spark$rdd$RDD$$dependencies_ of type scala.collection.Seq in instance of org.apache.spark.rdd.MapPartitionsRDD | |
at java.io.ObjectStreamClass$FieldReflector.setObjFieldValues(ObjectStreamClass.java:2233) | |
at java.io.ObjectStreamClass.setObjFieldValues(ObjectStreamClass.java:1405) | |
at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2288) | |
at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2206) | |
at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2064) | |
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1568) | |
at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2282) | |
at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2206) | |
at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2064) | |
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1568) | |
at java.io.ObjectInputStream.readObject(ObjectInputStream.java:428) | |
at org.apache.spark.serializer.JavaDeserializationStream.readObject(JavaSerializer.scala:75) | |
at org.apache.spark.serializer.JavaSerializerInstance.deserialize(JavaSerializer.scala:114) | |
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:80) | |
at org.apache.spark.scheduler.Task.run(Task.scala:108) | |
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:335) | |
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) | |
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) | |
at java.lang.Thread.run(Thread.java:748) | |
Driver stacktrace: | |
17/12/08 09:46:47 INFO DAGScheduler: Job 2 failed: show at BatchProcess.java:38, took 0.476302 s | |
Exception in thread "main" org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 2.0 failed 4 times, most recent failure: Lost task 0.3 in stage 2.0 (TID 7, 172.31.48.232, executor 0): java.lang.ClassCastException: cannot assign instance of scala.collection.immutable.List$SerializationProxy to field org.apache.spark.rdd.RDD.org$apache$spark$rdd$RDD$$dependencies_ of type scala.collection.Seq in instance of org.apache.spark.rdd.MapPartitionsRDD | |
at java.io.ObjectStreamClass$FieldReflector.setObjFieldValues(ObjectStreamClass.java:2233) | |
at java.io.ObjectStreamClass.setObjFieldValues(ObjectStreamClass.java:1405) | |
at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2288) | |
at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2206) | |
at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2064) | |
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1568) | |
at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2282) | |
at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2206) | |
at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2064) | |
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1568) | |
at java.io.ObjectInputStream.readObject(ObjectInputStream.java:428) | |
at org.apache.spark.serializer.JavaDeserializationStream.readObject(JavaSerializer.scala:75) | |
at org.apache.spark.serializer.JavaSerializerInstance.deserialize(JavaSerializer.scala:114) | |
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:80) | |
at org.apache.spark.scheduler.Task.run(Task.scala:108) | |
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:335) | |
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) | |
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) | |
at java.lang.Thread.run(Thread.java:748) | |
Driver stacktrace: | |
at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1499) | |
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1487) | |
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1486) | |
at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) | |
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48) | |
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1486) | |
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:814) | |
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:814) | |
at scala.Option.foreach(Option.scala:257) | |
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:814) | |
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1714) | |
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1669) | |
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1658) | |
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48) | |
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:630) | |
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2022) | |
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2043) | |
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2062) | |
at org.apache.spark.sql.execution.SparkPlan.executeTake(SparkPlan.scala:336) | |
at org.apache.spark.sql.execution.CollectLimitExec.executeCollect(limit.scala:38) | |
at org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$collectFromPlan(Dataset.scala:2853) | |
at org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset.scala:2153) | |
at org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset.scala:2153) | |
at org.apache.spark.sql.Dataset$$anonfun$55.apply(Dataset.scala:2837) | |
at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:65) | |
at org.apache.spark.sql.Dataset.withAction(Dataset.scala:2836) | |
at org.apache.spark.sql.Dataset.head(Dataset.scala:2153) | |
at org.apache.spark.sql.Dataset.take(Dataset.scala:2366) | |
at org.apache.spark.sql.Dataset.showString(Dataset.scala:245) | |
at org.apache.spark.sql.Dataset.show(Dataset.scala:646) | |
at org.apache.spark.sql.Dataset.show(Dataset.scala:623) | |
at se.cambio.sparktest.BatchProcess.startProcess(BatchProcess.java:38) | |
at se.cambio.sparktest.BatchProcess.main(BatchProcess.java:22) | |
Caused by: java.lang.ClassCastException: cannot assign instance of scala.collection.immutable.List$SerializationProxy to field org.apache.spark.rdd.RDD.org$apache$spark$rdd$RDD$$dependencies_ of type scala.collection.Seq in instance of org.apache.spark.rdd.MapPartitionsRDD | |
at java.io.ObjectStreamClass$FieldReflector.setObjFieldValues(ObjectStreamClass.java:2233) | |
at java.io.ObjectStreamClass.setObjFieldValues(ObjectStreamClass.java:1405) | |
at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2288) | |
at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2206) | |
at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2064) | |
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1568) | |
at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2282) | |
at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2206) | |
at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2064) | |
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1568) | |
at java.io.ObjectInputStream.readObject(ObjectInputStream.java:428) | |
at org.apache.spark.serializer.JavaDeserializationStream.readObject(JavaSerializer.scala:75) | |
at org.apache.spark.serializer.JavaSerializerInstance.deserialize(JavaSerializer.scala:114) | |
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:80) | |
at org.apache.spark.scheduler.Task.run(Task.scala:108) | |
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:335) | |
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) | |
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) | |
at java.lang.Thread.run(Thread.java:748) | |
17/12/08 09:46:47 INFO SparkContext: Invoking stop() from shutdown hook | |
17/12/08 09:46:47 INFO SparkUI: Stopped Spark web UI at http://172.31.48.232:4040 | |
17/12/08 09:46:47 INFO StandaloneSchedulerBackend: Shutting down all executors | |
17/12/08 09:46:47 INFO CoarseGrainedSchedulerBackend$DriverEndpoint: Asking each executor to shut down | |
17/12/08 09:46:48 WARN TransportChannelHandler: Exception in connection from /172.31.48.232:2521 | |
java.io.IOException: An existing connection was forcibly closed by the remote host | |
at sun.nio.ch.SocketDispatcher.read0(Native Method) | |
at sun.nio.ch.SocketDispatcher.read(SocketDispatcher.java:43) | |
at sun.nio.ch.IOUtil.readIntoNativeBuffer(IOUtil.java:223) | |
at sun.nio.ch.IOUtil.read(IOUtil.java:192) | |
at sun.nio.ch.SocketChannelImpl.read(SocketChannelImpl.java:380) | |
at io.netty.buffer.PooledUnsafeDirectByteBuf.setBytes(PooledUnsafeDirectByteBuf.java:221) | |
at io.netty.buffer.AbstractByteBuf.writeBytes(AbstractByteBuf.java:899) | |
at io.netty.channel.socket.nio.NioSocketChannel.doReadBytes(NioSocketChannel.java:275) | |
at io.netty.channel.nio.AbstractNioByteChannel$NioByteUnsafe.read(AbstractNioByteChannel.java:119) | |
at io.netty.channel.nio.NioEventLoop.processSelectedKey(NioEventLoop.java:643) | |
at io.netty.channel.nio.NioEventLoop.processSelectedKeysOptimized(NioEventLoop.java:566) | |
at io.netty.channel.nio.NioEventLoop.processSelectedKeys(NioEventLoop.java:480) | |
at io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:442) | |
at io.netty.util.concurrent.SingleThreadEventExecutor$2.run(SingleThreadEventExecutor.java:131) | |
at io.netty.util.concurrent.DefaultThreadFactory$DefaultRunnableDecorator.run(DefaultThreadFactory.java:144) | |
at java.lang.Thread.run(Thread.java:748) | |
17/12/08 09:46:48 INFO MapOutputTrackerMasterEndpoint: MapOutputTrackerMasterEndpoint stopped! | |
17/12/08 09:46:48 INFO MemoryStore: MemoryStore cleared | |
17/12/08 09:46:48 INFO BlockManager: BlockManager stopped | |
17/12/08 09:46:48 INFO BlockManagerMaster: BlockManagerMaster stopped | |
17/12/08 09:46:48 INFO OutputCommitCoordinator$OutputCommitCoordinatorEndpoint: OutputCommitCoordinator stopped! | |
17/12/08 09:46:48 INFO SparkContext: Successfully stopped SparkContext | |
17/12/08 09:46:48 INFO ShutdownHookManager: Shutdown hook called | |
17/12/08 09:46:48 INFO ShutdownHookManager: Deleting directory C:\Users\HDeSilva\AppData\Local\Temp\spark-6d584156-46ba-47a3-8e5c-95698994fb0c |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment