Created
July 10, 2015 22:59
-
-
Save jarutis/54c9b84d8331847f053c to your computer and use it in GitHub Desktop.
Word2wec on spark error
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{"metadata":{"name":"Sentimentas","user_save_timestamp":"1970-01-01T00:00:00.000Z","auto_save_timestamp":"1970-01-01T00:00:00.000Z","language_info":{"name":"scala","file_extension":"scala","codemirror_mode":"text/x-scala"},"trusted":true,"customLocalRepo":"/home/jjarutis/.m2/repository","customRepos":null,"customDeps":["org.apache.avro % avro-mapred % 1.7.7","com.twitter % parquet-hive-bundle % 1.6.0","com.databricks:spark-avro_2.10:1.0.0","com.databricks:spark-csv_2.11:1.0.3","org.apache.spark % spark-mllib_2.10 % 1.4.0","org.deeplearning4j % dl4j-spark-nlp % 0.0.3.3.5.alpha2-SNAPSHOT","org.deeplearning4j % dl4j-spark % 0.0.3.3.5.alpha2-SNAPSHOT","org.nd4j % nd4j-java % 0.0.3.5.5.6-SNAPSHOT"],"customImports":null,"customSparkConf":{"SparkDl4jMultiLayer.AVERAGE_EACH_ITERATION":"false","Word2VecPerformer.NEGATIVE":"0","spark.akka.frameSize":"100","Word2VecPerformer.VECTOR_LENGTH":"300","spark.app.name":"Sentimentai","spark.master":"yarn-client","spark.executor.memory":"5G","spark.executor.instances":"70","spark.sql.shuffle.partitions":"400","spark.yarn.jar":"hdfs:///user/jjarutis/spark-assembly-1.4.0-hadoop2.5.0-cdh5.3.2.jar"}},"cells":[{"metadata":{"trusted":true,"input_collapsed":false,"collapsed":false},"cell_type":"code","source":"import org.apache.spark.sql.hive.HiveContext\nval hiveCtx = new HiveContext(sc)","outputs":[{"name":"stdout","output_type":"stream","text":"import org.apache.spark.sql.hive.HiveContext\nhiveCtx: org.apache.spark.sql.hive.HiveContext = org.apache.spark.sql.hive.HiveContext@59415f0d\n"},{"metadata":{},"data":{"text/html":"org.apache.spark.sql.hive.HiveContext@59415f0d"},"output_type":"execute_result","execution_count":24}]},{"metadata":{"trusted":true,"input_collapsed":false,"collapsed":false},"cell_type":"code","source":"hiveCtx.sql(\"use jarucio\")\nval feedback = hiveCtx.table(\"us__user_feedback\")\n .select(\"feedback\")\n .filter(\"length(feedback) > 40\")\n .rdd.map(_.getString(0))","outputs":[{"name":"stdout","output_type":"stream","text":"feedback: org.apache.spark.rdd.RDD[String] = MapPartitionsRDD[55] at map at <console>:58\n"},{"metadata":{},"data":{"text/html":"MapPartitionsRDD[55] at map at <console>:58"},"output_type":"execute_result","execution_count":25}]},{"metadata":{"trusted":true,"input_collapsed":false,"collapsed":false},"cell_type":"code","source":"feedback.count","outputs":[{"name":"stdout","output_type":"stream","text":"res5: Long = 785118\n"},{"metadata":{},"data":{"text/html":"785118"},"output_type":"execute_result","execution_count":26}]},{"metadata":{"trusted":true,"input_collapsed":false,"collapsed":false},"cell_type":"code","source":"import org.deeplearning4j.spark.models.embeddings.word2vec.Word2Vec\nimport org.deeplearning4j.spark.models.embeddings.word2vec.Word2VecPerformer","outputs":[{"name":"stdout","output_type":"stream","text":"import org.deeplearning4j.spark.models.embeddings.word2vec.Word2Vec\nimport org.deeplearning4j.spark.models.embeddings.word2vec.Word2VecPerformer\n"},{"metadata":{},"data":{"text/html":""},"output_type":"execute_result","execution_count":27}]},{"metadata":{"trusted":true,"input_collapsed":false,"collapsed":false},"cell_type":"code","source":"val vec = new Word2Vec()\nval table = vec.train(feedback.toJavaRDD())","outputs":[{"name":"stdout","output_type":"stream","text":"org.apache.spark.SparkException: Job aborted due to stage failure: Task 2 in stage 13.0 failed 4 times, most recent failure: Lost task 2.3 in stage 13.0 (TID 61, hz-bd-hdp20.vinted.net): java.io.IOException: Failed to connect to hz-bd-hdp26.vinted.net/46.4.101.230:35504\n\tat org.apache.spark.network.client.TransportClientFactory.createClient(TransportClientFactory.java:193)\n\tat org.apache.spark.network.client.TransportClientFactory.createClient(TransportClientFactory.java:156)\n\tat org.apache.spark.network.netty.NettyBlockTransferService$$anon$1.createAndStart(NettyBlockTransferService.scala:88)\n\tat org.apache.spark.network.shuffle.RetryingBlockFetcher.fetchAllOutstanding(RetryingBlockFetcher.java:140)\n\tat org.apache.spark.network.shuffle.RetryingBlockFetcher.access$200(RetryingBlockFetcher.java:43)\n\tat org.apache.spark.network.shuffle.RetryingBlockFetcher$1.run(RetryingBlockFetcher.java:170)\n\tat java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:471)\n\tat java.util.concurrent.FutureTask.run(FutureTask.java:262)\n\tat java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)\n\tat java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)\n\tat java.lang.Thread.run(Thread.java:745)\nCaused by: java.net.ConnectException: Connection refused: hz-bd-hdp26.vinted.net/46.4.101.230:35504\n\tat sun.nio.ch.SocketChannelImpl.checkConnect(Native Method)\n\tat sun.nio.ch.SocketChannelImpl.finishConnect(SocketChannelImpl.java:739)\n\tat io.netty.channel.socket.nio.NioSocketChannel.doFinishConnect(NioSocketChannel.java:208)\n\tat io.netty.channel.nio.AbstractNioChannel$AbstractNioUnsafe.finishConnect(AbstractNioChannel.java:287)\n\tat io.netty.channel.nio.NioEventLoop.processSelectedKey(NioEventLoop.java:528)\n\tat io.netty.channel.nio.NioEventLoop.processSelectedKeysOptimized(NioEventLoop.java:468)\n\tat io.netty.channel.nio.NioEventLoop.processSelectedKeys(NioEventLoop.java:382)\n\tat io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:354)\n\tat io.netty.util.concurrent.SingleThreadEventExecutor$2.run(SingleThreadEventExecutor.java:116)\n\t... 1 more\n\nDriver stacktrace:\n\tat org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1266)\n\tat org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1257)\n\tat org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1256)\n\tat scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)\n\tat scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)\n\tat org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1256)\n\tat org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:730)\n\tat org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:730)\n\tat scala.Option.foreach(Option.scala:236)\n\tat org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:730)\n\tat org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1450)\n\tat org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1411)\n\tat org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)\n\n"}]},{"metadata":{"trusted":true,"input_collapsed":false,"collapsed":true},"cell_type":"code","source":"","outputs":[]}],"nbformat":4} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment