Skip to content

Instantly share code, notes, and snippets.

What would you like to do?
Decorator based trick to enable Python logging from PySpark executors
import functools
import logging
import pyspark
from typing import Callable
LOG_FORMAT = "[P%(process)s/%(name)s] %(levelname)s: %(message)s"
def ensure_executor_logging(
f=None, *,
Decorator to enable standard Python logging from functions that are used in PySpark executors.
The PySpark workers are forked processes without any Python logging setup.
This means that standard Python logging messages are lost (even if logging has been set up in the
PySpark driver script).
Use this decorator for functions that are used in PySpark executor contexts, to ensure a minimal logging setup.
def set_up_logging():
"""Set up logging if not already (short version of `logging.basicConfig`)"""
root = logging.getLogger()
if len(root.handlers) == 0:
handler = logging.StreamHandler()
if level is not None:
def decorator(f: Callable):
def wrapped(*args, **kwargs):
return f(*args, **kwargs)
return wrapped
# Was decorator used without parenthesis or parameterized?
return decorator(f) if callable(f) else decorator
def manipulate(x):
logging.getLogger("manipulate").info("got {x}".format(x=x))
return x * x
def main():
log = logging.getLogger("main")
sc = pyspark.SparkContext.getOrCreate()"Spark context: {s!r}".format(s=sc))
rdd = sc.parallelize(range(10))"rdd: {r!r}".format(r=rdd))
result =
if __name__ == '__main__':
logging.basicConfig(level=logging.INFO, format=LOG_FORMAT)

This comment has been minimized.

Copy link
Owner Author

@soxofaan soxofaan commented Apr 8, 2020

created a package for this now:

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment