Last active
March 7, 2024 11:03
-
-
Save mandar2174/c3f101d7d8c17bd28ff5134d3afdf025 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from airflow import DAG | |
from airflow.contrib.operators.spark_submit_operator import SparkSubmitOperator | |
from datetime import datetime, timedelta | |
args = { | |
'owner': 'airflow', | |
'start_date': datetime(2018, 5, 24) | |
} | |
dag = DAG('spark_job', default_args=args, schedule_interval="*/10 * * * *") | |
operator = SparkSubmitOperator( | |
task_id='spark_submit_job', | |
application='/home/ubuntu/test.py', | |
total_executor_cores='1', | |
executor_cores='1', | |
executor_memory='2g', | |
num_executors='1', | |
name='airflow-spark', | |
verbose=False, | |
driver_memory='1g', | |
conf={'master':'spark://xx.xx.xx.xx:7077'}, | |
dag=dag, | |
) | |
Error: | |
ubuntu@ip-172-31-1-88:~/airflow$ airflow test spark_job spark_submit_job 2015-05-25 | |
/home/ubuntu/.local/lib/python2.7/site-packages/psycopg2/__init__.py:144: UserWarning: The psycopg2 wheel package will be renamed from release 2.8; in order to keep installing from binary please use "pip install psycopg2-binary" instead. For details see: <http://initd.org/psycopg/docs/install.html#binary-install-from-pypi>. | |
""") | |
[2018-05-25 14:52:16,026] {driver.py:120} INFO - Generating grammar tables from /usr/lib/python2.7/lib2to3/Grammar.txt | |
[2018-05-25 14:52:16,059] {driver.py:120} INFO - Generating grammar tables from /usr/lib/python2.7/lib2to3/PatternGrammar.txt | |
[2018-05-25 14:52:16,157] {__init__.py:45} INFO - Using executor SequentialExecutor | |
[2018-05-25 14:52:16,194] {models.py:189} INFO - Filling up the DagBag from /home/ubuntu/airflow/dags | |
[2018-05-25 14:52:16,248] {base_hook.py:80} INFO - Using connection to: yarn | |
[2018-05-25 14:52:17,031] {spark_submit_hook.py:260} INFO - Warning: Ignoring non-spark config property: master=spark://xx.xx.xx.xx:7077 | |
[2018-05-25 14:52:17,065] {spark_submit_hook.py:260} INFO - Exception in thread "main" java.lang.Exception: When running with master 'yarn' either HADOOP_CONF_DIR or YARN_CONF_DIR must be set in the environment. | |
[2018-05-25 14:52:17,066] {spark_submit_hook.py:260} INFO - at org.apache.spark.deploy.SparkSubmitArguments.validateSubmitArguments(SparkSubmitArguments.scala:286) | |
[2018-05-25 14:52:17,066] {spark_submit_hook.py:260} INFO - at org.apache.spark.deploy.SparkSubmitArguments.validateArguments(SparkSubmitArguments.scala:246) | |
[2018-05-25 14:52:17,066] {spark_submit_hook.py:260} INFO - at org.apache.spark.deploy.SparkSubmitArguments.<init>(SparkSubmitArguments.scala:119) | |
[2018-05-25 14:52:17,066] {spark_submit_hook.py:260} INFO - at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:129) | |
[2018-05-25 14:52:17,066] {spark_submit_hook.py:260} INFO - at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) | |
Traceback (most recent call last): | |
File "/usr/local/bin/airflow", line 27, in <module> | |
args.func(args) | |
File "/usr/local/lib/python2.7/dist-packages/airflow/bin/cli.py", line 528, in test | |
ti.run(ignore_task_deps=True, ignore_ti_state=True, test_mode=True) | |
File "/usr/local/lib/python2.7/dist-packages/airflow/utils/db.py", line 50, in wrapper | |
result = func(*args, **kwargs) | |
File "/usr/local/lib/python2.7/dist-packages/airflow/models.py", line 1584, in run | |
session=session) | |
File "/usr/local/lib/python2.7/dist-packages/airflow/utils/db.py", line 50, in wrapper | |
result = func(*args, **kwargs) | |
File "/usr/local/lib/python2.7/dist-packages/airflow/models.py", line 1493, in _run_raw_task | |
result = task_copy.execute(context=context) | |
File "/usr/local/lib/python2.7/dist-packages/airflow/contrib/operators/spark_submit_operator.py", line 145, in execute | |
self._hook.submit(self._application) | |
File "/usr/local/lib/python2.7/dist-packages/airflow/contrib/hooks/spark_submit_hook.py", line 239, in submit | |
spark_submit_cmd, returncode | |
airflow.exceptions.AirflowException: Cannot execute: ['spark-submit', '--master', u'yarn', '--conf', 'master=spark://xx.xx.xx.xx:7077', '--num-executors', '1', '--total-executor-cores', '1', '--executor-cores', '1', '--executor-memory', '2g', '--driver-memory', '1g', '--name', u'airflow-spark', '--queue', u'root.default', '/home/ubuntu/test.py']. Error code is: 1. | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hi. How did the eroor got resolved? Could you please provide a working code for the same? I am trying to do a spark submit master = local