Last active
October 15, 2020 14:34
-
-
Save zmjjmz/52e3d15fd2efee52ff20c5a0b76a7cdd to your computer and use it in GitHub Desktop.
glue_devendpt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
from awsglue.transforms import * | |
from awsglue.utils import getResolvedOptions | |
from pyspark.context import SparkContext | |
from awsglue.context import GlueContext | |
from awsglue.job import Job | |
from pyspark.sql import functions as sf | |
from pyspark.sql import types as st | |
from awsglue.dynamicframe import DynamicFrame | |
from pyspark.sql.functions import udf, from_json, col, coalesce | |
import time | |
## @params: [JOB_NAME] | |
#args = getResolvedOptions(sys.argv, ['JOB_NAME']) | |
# sc = SparkContext() | |
glueContext = GlueContext(sc) | |
spark = glueContext.spark_session | |
job = Job(glueContext) | |
#job.init(args['JOB_NAME'], args) | |
## @type: DataSource | |
## @args: [database = "okc-ml", table_name = "second_votes", transformation_ctx = "datasource0"] | |
## @return: datasource0 | |
## @inputs: [] | |
predicate = '(year==2019) and (month==02) and (day==26) and (hour==22)' | |
partition_keys = ['year','month','day','hour'] | |
times = {} | |
read_tic = time.time() | |
datasource0 = glueContext.create_dynamic_frame.from_catalog(database = "okc-ml", table_name = "second_votes", | |
transformation_ctx = "datasource0", | |
push_down_predicate=predicate) | |
times['read'] = time.time() - read_tic | |
datasource0.printSchema() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Starting Spark application | |
ID YARN Application ID Kind State Spark UI Driver log Current session? | |
0 application_1554413049589_0001 pyspark idle Link Link ✔ | |
SparkSession available as 'spark'. | |
An error was encountered: | |
Invalid status code '400' from http://localhost:8998/sessions/0/statements/1 with error payload: "requirement failed: Session isn't active." | |
An error was encountered: | |
Session 0 unexpectedly reached final status 'dead'. See logs: | |
stdout: | |
stderr: | |
SLF4J: Class path contains multiple SLF4J bindings. | |
SLF4J: Found binding in [jar:file:/usr/share/aws/glue/etl/jars/glue-assembly.jar!/org/slf4j/impl/StaticLoggerBinder.class] | |
SLF4J: Found binding in [jar:file:/usr/lib/spark/jars/slf4j-log4j12-1.7.16.jar!/org/slf4j/impl/StaticLoggerBinder.class] | |
SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation. | |
SLF4J: Actual binding is of type [org.slf4j.impl.Log4jLoggerFactory] | |
19/04/04 21:57:56 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable | |
19/04/04 21:57:57 INFO RMProxy: Connecting to ResourceManager at ip-172-32-146-183.ec2.internal/172.32.146.183:8032 | |
19/04/04 21:57:57 INFO Client: Requesting a new application from cluster with 1 NodeManagers | |
19/04/04 21:57:57 INFO Client: Verifying our application has not requested more than the maximum memory capability of the cluster (12288 MB per container) | |
19/04/04 21:57:57 INFO Client: Will allocate AM container, with 1408 MB memory including 384 MB overhead | |
19/04/04 21:57:57 INFO Client: Setting up container launch context for our AM | |
19/04/04 21:57:57 INFO Client: Setting up the launch environment for our AM container | |
19/04/04 21:57:57 INFO Client: Preparing resources for our AM container | |
19/04/04 21:57:58 WARN Client: Neither spark.yarn.jars nor spark.yarn.archive is set, falling back to uploading libraries under SPARK_HOME. | |
19/04/04 21:58:00 INFO Client: Uploading resource file:/mnt/tmp/spark-8fd0c4f5-49ac-4572-ac25-121811033b0b/__spark_libs__9052615244458024117.zip -> hdfs://ip-172-32-146-183.ec2.internal:8020/user/livy/.sparkStaging/application_1554413049589_0001/__spark_libs__9052615244458024117.zip | |
19/04/04 21:58:01 INFO Client: Uploading resource file:/usr/lib/livy/rsc-jars/livy-api-0.4.0-incubating.jar -> hdfs://ip-172-32-146-183.ec2.internal:8020/user/livy/.sparkStaging/application_1554413049589_0001/livy-api-0.4.0-incubating.jar | |
19/04/04 21:58:01 INFO Client: Uploading resource file:/usr/lib/livy/rsc-jars/livy-rsc-0.4.0-incubating.jar -> hdfs://ip-172-32-146-183.ec2.internal:8020/user/livy/.sparkStaging/application_1554413049589_0001/livy-rsc-0.4.0-incubating.jar | |
19/04/04 21:58:01 INFO Client: Uploading resource file:/usr/lib/livy/rsc-jars/netty-all-4.0.29.Final.jar -> hdfs://ip-172-32-146-183.ec2.internal:8020/user/livy/.sparkStaging/application_1554413049589_0001/netty-all-4.0.29.Final.jar | |
19/04/04 21:58:01 INFO Client: Uploading resource file:/usr/share/aws/glue/etl/jars/glue-assembly.jar -> hdfs://ip-172-32-146-183.ec2.internal:8020/user/livy/.sparkStaging/application_1554413049589_0001/glue-assembly.jar | |
19/04/04 21:58:03 INFO Client: Uploading resource file:/usr/local/lib/python2.7/site-packages/sagemaker_pyspark/jars/sagemaker-spark_2.11-spark_2.2.1-1.0.jar -> hdfs://ip-172-32-146-183.ec2.internal:8020/user/livy/.sparkStaging/application_1554413049589_0001/sagemaker-spark_2.11-spark_2.2.1-1.0.jar | |
19/04/04 21:58:03 INFO Client: Uploading resource file:/usr/lib/livy/repl_2.11-jars/livy-repl_2.11-0.4.0-incubating.jar -> hdfs://ip-172-32-146-183.ec2.internal:8020/user/livy/.sparkStaging/application_1554413049589_0001/livy-repl_2.11-0.4.0-incubating.jar | |
19/04/04 21:58:03 INFO Client: Uploading resource file:/usr/lib/livy/repl_2.11-jars/livy-core_2.11-0.4.0-incubating.jar -> hdfs://ip-172-32-146-183.ec2.internal:8020/user/livy/.sparkStaging/application_1554413049589_0001/livy-core_2.11-0.4.0-incubating.jar | |
19/04/04 21:58:03 INFO Client: Uploading resource file:/usr/lib/livy/repl_2.11-jars/commons-codec-1.9.jar -> hdfs://ip-172-32-146-183.ec2.internal:8020/user/livy/.sparkStaging/application_1554413049589_0001/commons-codec-1.9.jar | |
19/04/04 21:58:03 INFO Client: Uploading resource file:/usr/share/aws/glue/etl/python/PyGlue.zip -> hdfs://ip-172-32-146-183.ec2.internal:8020/user/livy/.sparkStaging/application_1554413049589_0001/PyGlue.zip | |
19/04/04 21:58:03 INFO Client: Uploading resource file:/usr/lib/spark/python/lib/pyspark.zip -> hdfs://ip-172-32-146-183.ec2.internal:8020/user/livy/.sparkStaging/application_1554413049589_0001/pyspark.zip | |
19/04/04 21:58:03 INFO Client: Uploading resource file:/usr/lib/spark/python/lib/py4j-0.10.4-src.zip -> hdfs://ip-172-32-146-183.ec2.internal:8020/user/livy/.sparkStaging/application_1554413049589_0001/py4j-0.10.4-src.zip | |
19/04/04 21:58:03 INFO Client: Uploading resource file:/mnt/tmp/spark-8fd0c4f5-49ac-4572-ac25-121811033b0b/__spark_conf__2026243233940390355.zip -> hdfs://ip-172-32-146-183.ec2.internal:8020/user/livy/.sparkStaging/application_1554413049589_0001/__spark_conf__.zip | |
19/04/04 21:58:03 INFO SecurityManager: Changing view acls to: livy | |
19/04/04 21:58:03 INFO SecurityManager: Changing modify acls to: livy | |
19/04/04 21:58:03 INFO SecurityManager: Changing view acls groups to: | |
19/04/04 21:58:03 INFO SecurityManager: Changing modify acls groups to: | |
19/04/04 21:58:03 INFO SecurityManager: SecurityManager: authentication disabled; ui acls disabled; users with view permissions: Set(livy); groups with view permissions: Set(); users with modify permissions: Set(livy); groups with modify permissions: Set() | |
19/04/04 21:58:03 INFO Client: Submitting application application_1554413049589_0001 to ResourceManager | |
19/04/04 21:58:03 INFO YarnClientImpl: Submitted application application_1554413049589_0001 | |
19/04/04 21:58:03 INFO Client: Application report for application_1554413049589_0001 (state: ACCEPTED) | |
19/04/04 21:58:03 INFO Client: | |
client token: N/A | |
diagnostics: N/A | |
ApplicationMaster host: N/A | |
ApplicationMaster RPC port: -1 | |
queue: default | |
start time: 1554415083804 | |
final status: UNDEFINED | |
tracking URL: http://ip-172-32-146-183.ec2.internal:20888/proxy/application_1554413049589_0001/ | |
user: livy | |
19/04/04 21:58:03 INFO ShutdownHookManager: Shutdown hook called | |
19/04/04 21:58:03 INFO ShutdownHookManager: Deleting directory /mnt/tmp/spark-8fd0c4f5-49ac-4572-ac25-121811033b0b | |
YARN Diagnostics: | |
Application application_1554413049589_0001 failed 1 times due to AM Container for appattempt_1554413049589_0001_000001 exited with exitCode: -104 | |
For more detailed output, check application tracking page:http://ip-172-32-146-183.ec2.internal:8088/cluster/app/application_1554413049589_0001Then, click on links to logs of each attempt. | |
Diagnostics: Container [pid=12807,containerID=container_1554413049589_0001_01_000001] is running beyond physical memory limits. Current usage: 1.4 GB of 1.4 GB physical memory used; 3.6 GB of 6.9 GB virtual memory used. Killing container. | |
Dump of the process-tree for container_1554413049589_0001_01_000001 : | |
|- PID PPID PGRPID SESSID CMD_NAME USER_MODE_TIME(MILLIS) SYSTEM_TIME(MILLIS) VMEM_USAGE(BYTES) RSSMEM_USAGE(PAGES) FULL_CMD_LINE | |
|- 12825 12807 12807 12807 (java) 10160 193 3257659392 353507 /usr/lib/jvm/java-openjdk/bin/java -server -Xmx1024m -Djava.io.tmpdir=/mnt/yarn/usercache/livy/appcache/application_1554413049589_0001/container_1554413049589_0001_01_000001/tmp -DRDS_TRUSTSTORE_URL=file:///usr/share/aws/glue/RDSTrustStore.jks -Djavax.net.ssl.trustStorePassword=amazon -Djavax.net.ssl.trustStoreType=JKS -Djavax.net.ssl.trustStore=/usr/lib/jvm/java/jre/lib/security/cacerts -DREDSHIFT_ROOT_CERT_PATH=/usr/share/aws/glue/redshift-ssl-ca-cert.pem -DRDS_ROOT_CERT_PATH=/usr/share/aws/glue/rds-combined-ca-bundle.pem -XX:+UseConcMarkSweepGC -XX:CMSInitiatingOccupancyFraction=70 -XX:MaxHeapFreeRatio=70 -XX:+CMSClassUnloadingEnabled -XX:OnOutOfMemoryError=kill -9 %p -Dspark.yarn.app.container.log.dir=/var/log/hadoop-yarn/containers/application_1554413049589_0001/container_1554413049589_0001_01_000001 org.apache.spark.deploy.yarn.ApplicationMaster --class org.apache.livy.rsc.driver.RSCDriverBootstrapper --properties-file /mnt/yarn/usercache/livy/appcache/application_1554413049589_0001/container_1554413049589_0001_01_000001/__spark_conf__/__spark_conf__.properties | |
|- 12807 12805 12807 12807 (bash) 0 0 115822592 688 /bin/bash -c LD_LIBRARY_PATH=/usr/lib/hadoop/lib/native:/usr/lib/hadoop-lzo/lib/native:::/usr/lib/hadoop-lzo/lib/native:/usr/lib/hadoop/lib/native::/usr/lib/hadoop-lzo/lib/native:/usr/lib/hadoop/lib/native:/usr/lib/hadoop-lzo/lib/native:/usr/lib/hadoop/lib/native /usr/lib/jvm/java-openjdk/bin/java -server -Xmx1024m -Djava.io.tmpdir=/mnt/yarn/usercache/livy/appcache/application_1554413049589_0001/container_1554413049589_0001_01_000001/tmp '-DRDS_TRUSTSTORE_URL=file:///usr/share/aws/glue/RDSTrustStore.jks' '-Djavax.net.ssl.trustStorePassword=amazon' '-Djavax.net.ssl.trustStoreType=JKS' '-Djavax.net.ssl.trustStore=/usr/lib/jvm/java/jre/lib/security/cacerts' '-DREDSHIFT_ROOT_CERT_PATH=/usr/share/aws/glue/redshift-ssl-ca-cert.pem' '-DRDS_ROOT_CERT_PATH=/usr/share/aws/glue/rds-combined-ca-bundle.pem' '-XX:+UseConcMarkSweepGC' '-XX:CMSInitiatingOccupancyFraction=70' '-XX:MaxHeapFreeRatio=70' '-XX:+CMSClassUnloadingEnabled' '-XX:OnOutOfMemoryError=kill -9 %p' -Dspark.yarn.app.container.log.dir=/var/log/hadoop-yarn/containers/application_1554413049589_0001/container_1554413049589_0001_01_000001 org.apache.spark.deploy.yarn.ApplicationMaster --class 'org.apache.livy.rsc.driver.RSCDriverBootstrapper' --properties-file /mnt/yarn/usercache/livy/appcache/application_1554413049589_0001/container_1554413049589_0001_01_000001/__spark_conf__/__spark_conf__.properties 1> /var/log/hadoop-yarn/containers/application_1554413049589_0001/container_1554413049589_0001_01_000001/stdout 2> /var/log/hadoop-yarn/containers/application_1554413049589_0001/container_1554413049589_0001_01_000001/stderr | |
|- 12870 12825 12807 12807 (python) 48 6 443006976 8066 python /mnt/yarn/usercache/livy/appcache/application_1554413049589_0001/container_1554413049589_0001_01_000001/tmp/7058602018821482371 | |
Container killed on request. Exit code is 143 | |
Container exited with a non-zero exit code 143 | |
Failing this attempt. Failing the application. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment