-
-
Save senseysensor/a8158687a48b676685c04c8d22afed73 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[hadoop@ip-172-31-1-78 ~]$ pyspark --jars /usr/share/aws/emr/ddb/lib/emr-ddb-hadoop.jar | |
Python 2.7.12 (default, Sep 1 2016, 22:14:00) | |
[GCC 4.8.3 20140911 (Red Hat 4.8.3-9)] on linux2 | |
Type "help", "copyright", "credits" or "license" for more information. | |
Setting default log level to "WARN". | |
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel). | |
17/02/08 10:45:00 WARN Client: Neither spark.yarn.jars nor spark.yarn.archive is set, falling back to uploading libraries under SPARK_HOME. | |
17/02/08 10:45:22 WARN ObjectStore: Failed to get database global_temp, returning NoSuchObjectException | |
Welcome to | |
____ __ | |
/ __/__ ___ _____/ /__ | |
_\ \/ _ \/ _ `/ __/ '_/ | |
/__ / .__/\_,_/_/ /_/\_\ version 2.1.0 | |
/_/ | |
Using Python version 2.7.12 (default, Sep 1 2016 22:14:00) | |
SparkSession available as 'spark'. | |
>>> conf = { | |
... "dynamodb.servicename": "dynamodb", | |
... "dynamodb.input.tableName": "users", | |
... "dynamodb.endpoint": "https://dynamodb.eu-central-1.amazonaws.com", | |
... "dynamodb.regionid": "eu-central-1", | |
... "mapred.output.format.class": "org.apache.hadoop.dynamodb.write.DynamoDBOutputFormat", | |
... "mapred.input.format.class": "org.apache.hadoop.dynamodb.read.DynamoDBInputFormat" | |
... } | |
>>> conf | |
{'dynamodb.regionid': 'eu-central-1', 'dynamodb.servicename': 'dynamodb', 'mapred.input.format.class': 'org.apache.hadoop.dynamodb.read.DynamoDBInputFormat', 'mapred.output.format.class': 'org.apache.hadoop.dynamodb.write.DynamoDBOutputFormat', 'dynamodb.input.tableName': 'users', 'dynamodb.endpoint': 'https://dynamodb.eu-central-1.amazonaws.com'} | |
>>> sc.hadoopRDD( | |
... inputFormatClass='org.apache.hadoop.dynamodb.read.DynamoDBInputFormat', | |
... keyClass='org.apache.hadoop.io.Text', | |
... valueClass='org.apache.hadoop.dynamodb.DynamoDBItemWritable', | |
... conf=conf | |
... ) | |
17/02/08 10:45:49 WARN TaskCalculator: The calculated max number of concurrent map tasks is less than 1. Use 1 instead. | |
17/02/08 10:45:52 ERROR TaskSetManager: Task 0.0 in stage 0.0 (TID 0) had a not serializable result: org.apache.hadoop.dynamodb.DynamoDBItemWritable | |
Serialization stack: | |
- object not serializable (class: org.apache.hadoop.dynamodb.DynamoDBItemWritable, value: {"id":{"n":"10"},"login":{"s":"some_log_in_124324"}}) | |
- field (class: scala.Tuple2, name: _2, type: class java.lang.Object) | |
- object (class scala.Tuple2, (,{"id":{"n":"10"},"login":{"s":"some_log_in_124324"}})) | |
- element of array (index: 0) | |
- array (class [Lscala.Tuple2;, size 1); not retrying | |
17/02/08 10:45:52 WARN ExecutorAllocationManager: No stages are running, but numRunningTasks != 0 | |
Traceback (most recent call last): | |
File "<stdin>", line 5, in <module> | |
File "/usr/lib/spark/python/pyspark/context.py", line 732, in hadoopRDD | |
jconf, batchSize) | |
File "/usr/lib/spark/python/lib/py4j-0.10.4-src.zip/py4j/java_gateway.py", line 1133, in __call__ | |
File "/usr/lib/spark/python/pyspark/sql/utils.py", line 63, in deco | |
return f(*a, **kw) | |
File "/usr/lib/spark/python/lib/py4j-0.10.4-src.zip/py4j/protocol.py", line 319, in get_return_value | |
py4j.protocol.Py4JJavaError: An error occurred while calling z:org.apache.spark.api.python.PythonRDD.hadoopRDD. | |
: org.apache.spark.SparkException: Job aborted due to stage failure: Task 0.0 in stage 0.0 (TID 0) had a not serializable result: org.apache.hadoop.dynamodb.DynamoDBItemWritable | |
Serialization stack: | |
- object not serializable (class: org.apache.hadoop.dynamodb.DynamoDBItemWritable, value: {"id":{"n":"10"},"login":{"s":"some_log_in_124324"}}) | |
- field (class: scala.Tuple2, name: _2, type: class java.lang.Object) | |
- object (class scala.Tuple2, (,{"id":{"n":"10"},"login":{"s":"some_log_in_124324"}})) | |
- element of array (index: 0) | |
- array (class [Lscala.Tuple2;, size 1) | |
at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1435) | |
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1423) | |
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1422) | |
at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) | |
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48) | |
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1422) | |
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:802) | |
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:802) | |
at scala.Option.foreach(Option.scala:257) | |
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:802) | |
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1650) | |
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1605) | |
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1594) | |
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48) | |
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:628) | |
at org.apache.spark.SparkContext.runJob(SparkContext.scala:1918) | |
at org.apache.spark.SparkContext.runJob(SparkContext.scala:1931) | |
at org.apache.spark.SparkContext.runJob(SparkContext.scala:1944) | |
at org.apache.spark.rdd.RDD$$anonfun$take$1.apply(RDD.scala:1353) | |
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) | |
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112) | |
at org.apache.spark.rdd.RDD.withScope(RDD.scala:362) | |
at org.apache.spark.rdd.RDD.take(RDD.scala:1326) | |
at org.apache.spark.api.python.SerDeUtil$.pairRDDToPython(SerDeUtil.scala:203) | |
at org.apache.spark.api.python.PythonRDD$.hadoopRDD(PythonRDD.scala:651) | |
at org.apache.spark.api.python.PythonRDD.hadoopRDD(PythonRDD.scala) | |
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) | |
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) | |
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) | |
at java.lang.reflect.Method.invoke(Method.java:498) | |
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244) | |
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357) | |
at py4j.Gateway.invoke(Gateway.java:280) | |
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132) | |
at py4j.commands.CallCommand.execute(CallCommand.java:79) | |
at py4j.GatewayConnection.run(GatewayConnection.java:214) | |
at java.lang.Thread.run(Thread.java:745) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment