Skip to content

Instantly share code, notes, and snippets.

@matthewchoy88
Last active July 4, 2023 02:12
Show Gist options
  • Save matthewchoy88/d1886156bdb32216e996899f2950b2da to your computer and use it in GitHub Desktop.
Save matthewchoy88/d1886156bdb32216e996899f2950b2da to your computer and use it in GitHub Desktop.
Databricks notebook magic command to catch cell errors
from pyspark.dbutils import DBUtils
from pyspark.sql import SparkSession
import json
from IPython.core.magic import register_cell_magic
from IPython import get_ipython
from IPython.core.magics.execution import capture_output
# Register the %%job_wrapper_handle notebook magic
# This runs a cells contents in a try-except block and on exception exits the notebook gracefully to the parent job.
@register_cell_magic('job_wrapper_handle')
def job_wrapper_handle(line, cell):
# Capture cell output in case of exceptions, because dbutils.notebook.exit will wipe the cell output
with capture_output(True, True, False) as io:
result=get_ipython().run_cell(cell)
try:
result.raise_error()
except Exception as e:
# Pass message and cell output to function otherwise the output is wiped.
databricks_notebook_exit(status='error', cell_output=str(io), message=f'Got exception {e}. Exiting gracefully to parent job.')
else:
# No exceptions so print the cell output
print(io)
# Function to get dbutils in a non-notebook .py file
def get_dbutils(spark):
if spark.conf.get("spark.databricks.service.client.enabled") == "true":
return DBUtils(spark)
else:
import IPython
return IPython.get_ipython().user_ns["dbutils"]
# Exit the notebook gracefully to the parent job
def databricks_notebook_exit(status, cell_output=None, message=None):
spark = SparkSession.builder.getOrCreate()
dbutils = get_dbutils(spark)
# Exit gracefully and pass job run details to the parent wrapper
notebook_context = json.loads(
dbutils.notebook.entry_point.getDbutils().notebook().getContext().toJson()
)
# Data to return to parent job
payload = {
"cell_output" : cell_output,
"message" : message,
"status": status,
"org_id": notebook_context["tags"].get("orgId"),
"job_id": notebook_context["tags"].get("jobId"),
"run_id": notebook_context["tags"].get("multitaskParentRunId"),
"task_run_id": notebook_context.get("currentRunId"),
}
# This will clear cell output
dbutils.notebook.exit(json.dumps(payload))
@matthewchoy88
Copy link
Author

Created for use in Databricks Notebooks, to allow child jobs (created by dbutils.notebook.run() to exit gracefully to a parent job.

This code defines a notebook magic command "%%job_wrapper_handle".
The magic can be added to the beginning of a notebook cell and will catch any exceptions, and exit gracefully to a parent job, passing any relevant info.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment