Skip to content

Instantly share code, notes, and snippets.

@zorteran
Created September 5, 2020 18:05
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save zorteran/8cac5652f9f30db5eec16d335cea59c2 to your computer and use it in GitHub Desktop.
Save zorteran/8cac5652f9f30db5eec16d335cea59c2 to your computer and use it in GitHub Desktop.
Spylon kernel doesn't like MongoDB connector.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"%%init_spark\n",
"launcher.packages = [\"com.datastax.spark:spark-cassandra-connector_2.12:3.0.0-beta\", \"org.mongodb.spark:mongo-spark-connector_2.12:3.0.0\", \"mysql:mysql-connector-java:5.1.49\"]\n",
"\n",
"launcher.conf.set(\"spark.mongodb.input.uri\", \"mongodb://root:example@mongo/school.students?authSource=admin\")\n",
"launcher.conf.set(\"spark.mongodb.output.uri\", \"mongodb://root:example@mongoschool.students?authSource=admin\")\n",
"\n",
"launcher.conf.set(\"spark.cassandra.connection.host\", \"cassandra\")\n",
"launcher.conf.set(\"spark.cassandra.auth.username\", \"cassandra\")\n",
"launcher.conf.set(\"spark.cassandra.auth.password\", \"cassandra\")\n",
"launcher.conf.set(\"spark.sql.extensions\", \"com.datastax.spark.connector.CassandraSparkExtensions\")\n",
"launcher.conf.set(\"spark.sql.catalog.casscatalog\",\"com.datastax.spark.connector.datasource.CassandraCatalog\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Read from MySql"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"groups: org.apache.spark.sql.DataFrame = [group_id: int, group_number: string ... 2 more fields]\n"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"val groups = spark.read\n",
" .format(\"jdbc\")\n",
" .option(\"url\", \"jdbc:mysql://mysql/school\")\n",
" .option(\"driver\", \"com.mysql.jdbc.Driver\")\n",
" .option(\"dbtable\", \"groups\")\n",
" .option(\"user\", \"user\")\n",
" .option(\"password\", \"password\")\n",
" .load()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"+--------+------------+-----------+-------------------+\n",
"|group_id|group_number|description|something_important|\n",
"+--------+------------+-----------+-------------------+\n",
"| 1| 1A| some group| 100|\n",
"| 2| 2A| some group| 102|\n",
"| 3| 3A| some group| 101|\n",
"| 4| 1B| some group| 123|\n",
"| 5| 2B| some group| 133|\n",
"| 6| 3B| some group| 144|\n",
"+--------+------------+-----------+-------------------+\n",
"\n"
]
}
],
"source": [
"groups.show"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Read from Mongodb"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"ename": "org.apache.spark.SparkException",
"evalue": " Job aborted due to stage failure: Task 0 in stage 1.0 failed 1 times, most recent failure: Lost task 0.0 in stage 1.0 (TID 1, 50146e75cc08, executor driver): java.lang.ClassCastException: class com.mongodb.spark.MongoConnector cannot be cast to class com.mongodb.spark.MongoConnector (com.mongodb.spark.MongoConnector is in unnamed module of loader scala.reflect.internal.util.ScalaClassLoader$URLClassLoader @5a06eee2; com.mongodb.spark.MongoConnector is in unnamed module of loader org.apache.spark.util.MutableURLClassLoader @3af37506)",
"output_type": "error",
"traceback": [
"org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 1.0 failed 1 times, most recent failure: Lost task 0.0 in stage 1.0 (TID 1, 50146e75cc08, executor driver): java.lang.ClassCastException: class com.mongodb.spark.MongoConnector cannot be cast to class com.mongodb.spark.MongoConnector (com.mongodb.spark.MongoConnector is in unnamed module of loader scala.reflect.internal.util.ScalaClassLoader$URLClassLoader @5a06eee2; com.mongodb.spark.MongoConnector is in unnamed module of loader org.apache.spark.util.MutableURLClassLoader @3af37506)",
"\tat com.mongodb.spark.rdd.MongoRDD.compute(MongoRDD.scala:160)",
"\tat org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:349)",
"\tat org.apache.spark.rdd.RDD.iterator(RDD.scala:313)",
"\tat org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)",
"\tat org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:349)",
"\tat org.apache.spark.rdd.RDD.iterator(RDD.scala:313)",
"\tat org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)",
"\tat org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:349)",
"\tat org.apache.spark.rdd.RDD.iterator(RDD.scala:313)",
"\tat org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)",
"\tat org.apache.spark.scheduler.Task.run(Task.scala:127)",
"\tat org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:444)",
"\tat org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1377)",
"\tat org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:447)",
"\tat java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)",
"\tat java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)",
"\tat java.base/java.lang.Thread.run(Thread.java:834)",
"",
"Driver stacktrace:",
" at org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2023)",
" at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:1972)",
" at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:1971)",
" at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)",
" at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)",
" at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)",
" at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1971)",
" at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:950)",
" at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:950)",
" at scala.Option.foreach(Option.scala:407)",
" at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:950)",
" at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2203)",
" at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2152)",
" at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2141)",
" at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)",
" at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:752)",
" at org.apache.spark.SparkContext.runJob(SparkContext.scala:2093)",
" at org.apache.spark.SparkContext.runJob(SparkContext.scala:2188)",
" at org.apache.spark.rdd.RDD.$anonfun$fold$1(RDD.scala:1157)",
" at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)",
" at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)",
" at org.apache.spark.rdd.RDD.withScope(RDD.scala:388)",
" at org.apache.spark.rdd.RDD.fold(RDD.scala:1151)",
" at org.apache.spark.rdd.RDD.$anonfun$treeAggregate$1(RDD.scala:1220)",
" at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)",
" at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)",
" at org.apache.spark.rdd.RDD.withScope(RDD.scala:388)",
" at org.apache.spark.rdd.RDD.treeAggregate(RDD.scala:1196)",
" at com.mongodb.spark.sql.MongoInferSchema$.apply(MongoInferSchema.scala:88)",
" at com.mongodb.spark.MongoSpark.toDF(MongoSpark.scala:587)",
" at com.mongodb.spark.rdd.MongoRDD.toDF(MongoRDD.scala:72)",
" ... 37 elided",
"Caused by: java.lang.ClassCastException: class com.mongodb.spark.MongoConnector cannot be cast to class com.mongodb.spark.MongoConnector (com.mongodb.spark.MongoConnector is in unnamed module of loader scala.reflect.internal.util.ScalaClassLoader$URLClassLoader @5a06eee2; com.mongodb.spark.MongoConnector is in unnamed module of loader org.apache.spark.util.MutableURLClassLoader @3af37506)",
" at com.mongodb.spark.rdd.MongoRDD.compute(MongoRDD.scala:160)",
" at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:349)",
" at org.apache.spark.rdd.RDD.iterator(RDD.scala:313)",
" at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)",
" at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:349)",
" at org.apache.spark.rdd.RDD.iterator(RDD.scala:313)",
" at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)",
" at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:349)",
" at org.apache.spark.rdd.RDD.iterator(RDD.scala:313)",
" at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)",
" at org.apache.spark.scheduler.Task.run(Task.scala:127)",
" at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:444)",
" at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1377)",
" at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:447)",
" at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)",
" at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)",
" ... 1 more",
""
]
}
],
"source": [
"import com.mongodb.spark._\n",
"val students_rdd = MongoSpark.load(sc)\n",
"val studets = students_rdd.toDF"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"students.show"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "spylon-kernel",
"language": "scala",
"name": "spylon-kernel"
},
"language_info": {
"codemirror_mode": "text/x-scala",
"file_extension": ".scala",
"help_links": [
{
"text": "MetaKernel Magics",
"url": "https://metakernel.readthedocs.io/en/latest/source/README.html"
}
],
"mimetype": "text/x-scala",
"name": "scala",
"pygments_lexer": "scala",
"version": "0.4.1"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment