Created
September 5, 2020 18:05
-
-
Save zorteran/8cac5652f9f30db5eec16d335cea59c2 to your computer and use it in GitHub Desktop.
Spylon kernel doesn't like MongoDB connector.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"%%init_spark\n", | |
"launcher.packages = [\"com.datastax.spark:spark-cassandra-connector_2.12:3.0.0-beta\", \"org.mongodb.spark:mongo-spark-connector_2.12:3.0.0\", \"mysql:mysql-connector-java:5.1.49\"]\n", | |
"\n", | |
"launcher.conf.set(\"spark.mongodb.input.uri\", \"mongodb://root:example@mongo/school.students?authSource=admin\")\n", | |
"launcher.conf.set(\"spark.mongodb.output.uri\", \"mongodb://root:example@mongoschool.students?authSource=admin\")\n", | |
"\n", | |
"launcher.conf.set(\"spark.cassandra.connection.host\", \"cassandra\")\n", | |
"launcher.conf.set(\"spark.cassandra.auth.username\", \"cassandra\")\n", | |
"launcher.conf.set(\"spark.cassandra.auth.password\", \"cassandra\")\n", | |
"launcher.conf.set(\"spark.sql.extensions\", \"com.datastax.spark.connector.CassandraSparkExtensions\")\n", | |
"launcher.conf.set(\"spark.sql.catalog.casscatalog\",\"com.datastax.spark.connector.datasource.CassandraCatalog\")" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Read from MySql" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"groups: org.apache.spark.sql.DataFrame = [group_id: int, group_number: string ... 2 more fields]\n" | |
] | |
}, | |
"execution_count": 3, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"val groups = spark.read\n", | |
" .format(\"jdbc\")\n", | |
" .option(\"url\", \"jdbc:mysql://mysql/school\")\n", | |
" .option(\"driver\", \"com.mysql.jdbc.Driver\")\n", | |
" .option(\"dbtable\", \"groups\")\n", | |
" .option(\"user\", \"user\")\n", | |
" .option(\"password\", \"password\")\n", | |
" .load()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"+--------+------------+-----------+-------------------+\n", | |
"|group_id|group_number|description|something_important|\n", | |
"+--------+------------+-----------+-------------------+\n", | |
"| 1| 1A| some group| 100|\n", | |
"| 2| 2A| some group| 102|\n", | |
"| 3| 3A| some group| 101|\n", | |
"| 4| 1B| some group| 123|\n", | |
"| 5| 2B| some group| 133|\n", | |
"| 6| 3B| some group| 144|\n", | |
"+--------+------------+-----------+-------------------+\n", | |
"\n" | |
] | |
} | |
], | |
"source": [ | |
"groups.show" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Read from Mongodb" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"ename": "org.apache.spark.SparkException", | |
"evalue": " Job aborted due to stage failure: Task 0 in stage 1.0 failed 1 times, most recent failure: Lost task 0.0 in stage 1.0 (TID 1, 50146e75cc08, executor driver): java.lang.ClassCastException: class com.mongodb.spark.MongoConnector cannot be cast to class com.mongodb.spark.MongoConnector (com.mongodb.spark.MongoConnector is in unnamed module of loader scala.reflect.internal.util.ScalaClassLoader$URLClassLoader @5a06eee2; com.mongodb.spark.MongoConnector is in unnamed module of loader org.apache.spark.util.MutableURLClassLoader @3af37506)", | |
"output_type": "error", | |
"traceback": [ | |
"org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 1.0 failed 1 times, most recent failure: Lost task 0.0 in stage 1.0 (TID 1, 50146e75cc08, executor driver): java.lang.ClassCastException: class com.mongodb.spark.MongoConnector cannot be cast to class com.mongodb.spark.MongoConnector (com.mongodb.spark.MongoConnector is in unnamed module of loader scala.reflect.internal.util.ScalaClassLoader$URLClassLoader @5a06eee2; com.mongodb.spark.MongoConnector is in unnamed module of loader org.apache.spark.util.MutableURLClassLoader @3af37506)", | |
"\tat com.mongodb.spark.rdd.MongoRDD.compute(MongoRDD.scala:160)", | |
"\tat org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:349)", | |
"\tat org.apache.spark.rdd.RDD.iterator(RDD.scala:313)", | |
"\tat org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)", | |
"\tat org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:349)", | |
"\tat org.apache.spark.rdd.RDD.iterator(RDD.scala:313)", | |
"\tat org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)", | |
"\tat org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:349)", | |
"\tat org.apache.spark.rdd.RDD.iterator(RDD.scala:313)", | |
"\tat org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)", | |
"\tat org.apache.spark.scheduler.Task.run(Task.scala:127)", | |
"\tat org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:444)", | |
"\tat org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1377)", | |
"\tat org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:447)", | |
"\tat java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)", | |
"\tat java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)", | |
"\tat java.base/java.lang.Thread.run(Thread.java:834)", | |
"", | |
"Driver stacktrace:", | |
" at org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2023)", | |
" at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:1972)", | |
" at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:1971)", | |
" at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)", | |
" at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)", | |
" at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)", | |
" at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1971)", | |
" at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:950)", | |
" at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:950)", | |
" at scala.Option.foreach(Option.scala:407)", | |
" at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:950)", | |
" at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2203)", | |
" at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2152)", | |
" at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2141)", | |
" at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)", | |
" at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:752)", | |
" at org.apache.spark.SparkContext.runJob(SparkContext.scala:2093)", | |
" at org.apache.spark.SparkContext.runJob(SparkContext.scala:2188)", | |
" at org.apache.spark.rdd.RDD.$anonfun$fold$1(RDD.scala:1157)", | |
" at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)", | |
" at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)", | |
" at org.apache.spark.rdd.RDD.withScope(RDD.scala:388)", | |
" at org.apache.spark.rdd.RDD.fold(RDD.scala:1151)", | |
" at org.apache.spark.rdd.RDD.$anonfun$treeAggregate$1(RDD.scala:1220)", | |
" at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)", | |
" at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)", | |
" at org.apache.spark.rdd.RDD.withScope(RDD.scala:388)", | |
" at org.apache.spark.rdd.RDD.treeAggregate(RDD.scala:1196)", | |
" at com.mongodb.spark.sql.MongoInferSchema$.apply(MongoInferSchema.scala:88)", | |
" at com.mongodb.spark.MongoSpark.toDF(MongoSpark.scala:587)", | |
" at com.mongodb.spark.rdd.MongoRDD.toDF(MongoRDD.scala:72)", | |
" ... 37 elided", | |
"Caused by: java.lang.ClassCastException: class com.mongodb.spark.MongoConnector cannot be cast to class com.mongodb.spark.MongoConnector (com.mongodb.spark.MongoConnector is in unnamed module of loader scala.reflect.internal.util.ScalaClassLoader$URLClassLoader @5a06eee2; com.mongodb.spark.MongoConnector is in unnamed module of loader org.apache.spark.util.MutableURLClassLoader @3af37506)", | |
" at com.mongodb.spark.rdd.MongoRDD.compute(MongoRDD.scala:160)", | |
" at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:349)", | |
" at org.apache.spark.rdd.RDD.iterator(RDD.scala:313)", | |
" at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)", | |
" at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:349)", | |
" at org.apache.spark.rdd.RDD.iterator(RDD.scala:313)", | |
" at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)", | |
" at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:349)", | |
" at org.apache.spark.rdd.RDD.iterator(RDD.scala:313)", | |
" at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)", | |
" at org.apache.spark.scheduler.Task.run(Task.scala:127)", | |
" at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:444)", | |
" at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1377)", | |
" at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:447)", | |
" at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)", | |
" at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)", | |
" ... 1 more", | |
"" | |
] | |
} | |
], | |
"source": [ | |
"import com.mongodb.spark._\n", | |
"val students_rdd = MongoSpark.load(sc)\n", | |
"val studets = students_rdd.toDF" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"students.show" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "spylon-kernel", | |
"language": "scala", | |
"name": "spylon-kernel" | |
}, | |
"language_info": { | |
"codemirror_mode": "text/x-scala", | |
"file_extension": ".scala", | |
"help_links": [ | |
{ | |
"text": "MetaKernel Magics", | |
"url": "https://metakernel.readthedocs.io/en/latest/source/README.html" | |
} | |
], | |
"mimetype": "text/x-scala", | |
"name": "scala", | |
"pygments_lexer": "scala", | |
"version": "0.4.1" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 4 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment