Last active
July 4, 2023 02:12
-
-
Save matthewchoy88/d1886156bdb32216e996899f2950b2da to your computer and use it in GitHub Desktop.
Databricks notebook magic command to catch cell errors
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pyspark.dbutils import DBUtils | |
from pyspark.sql import SparkSession | |
import json | |
from IPython.core.magic import register_cell_magic | |
from IPython import get_ipython | |
from IPython.core.magics.execution import capture_output | |
# Register the %%job_wrapper_handle notebook magic | |
# This runs a cells contents in a try-except block and on exception exits the notebook gracefully to the parent job. | |
@register_cell_magic('job_wrapper_handle') | |
def job_wrapper_handle(line, cell): | |
# Capture cell output in case of exceptions, because dbutils.notebook.exit will wipe the cell output | |
with capture_output(True, True, False) as io: | |
result=get_ipython().run_cell(cell) | |
try: | |
result.raise_error() | |
except Exception as e: | |
# Pass message and cell output to function otherwise the output is wiped. | |
databricks_notebook_exit(status='error', cell_output=str(io), message=f'Got exception {e}. Exiting gracefully to parent job.') | |
else: | |
# No exceptions so print the cell output | |
print(io) | |
# Function to get dbutils in a non-notebook .py file | |
def get_dbutils(spark): | |
if spark.conf.get("spark.databricks.service.client.enabled") == "true": | |
return DBUtils(spark) | |
else: | |
import IPython | |
return IPython.get_ipython().user_ns["dbutils"] | |
# Exit the notebook gracefully to the parent job | |
def databricks_notebook_exit(status, cell_output=None, message=None): | |
spark = SparkSession.builder.getOrCreate() | |
dbutils = get_dbutils(spark) | |
# Exit gracefully and pass job run details to the parent wrapper | |
notebook_context = json.loads( | |
dbutils.notebook.entry_point.getDbutils().notebook().getContext().toJson() | |
) | |
# Data to return to parent job | |
payload = { | |
"cell_output" : cell_output, | |
"message" : message, | |
"status": status, | |
"org_id": notebook_context["tags"].get("orgId"), | |
"job_id": notebook_context["tags"].get("jobId"), | |
"run_id": notebook_context["tags"].get("multitaskParentRunId"), | |
"task_run_id": notebook_context.get("currentRunId"), | |
} | |
# This will clear cell output | |
dbutils.notebook.exit(json.dumps(payload)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Created for use in Databricks Notebooks, to allow child jobs (created by dbutils.notebook.run() to exit gracefully to a parent job.
This code defines a notebook magic command "%%job_wrapper_handle".
The magic can be added to the beginning of a notebook cell and will catch any exceptions, and exit gracefully to a parent job, passing any relevant info.