Decorator based trick to enable Python logging from PySpark executors
import functools
import logging
import pyspark
from typing import Callable
LOG_FORMAT = "[P%(process)s/%(name)s] %(levelname)s: %(message)s"
def ensure_executor_logging(
f=None, *,
Decorator to enable standard Python logging from functions that are used in PySpark executors.
The PySpark workers are forked processes without any Python logging setup.
This means that standard Python logging messages are lost (even if logging has been set up in the
PySpark driver script).
Use this decorator for functions that are used in PySpark executor contexts, to ensure a minimal logging setup.
def set_up_logging():
"""Set up logging if not already (short version of `logging.basicConfig`)"""
root = logging.getLogger()
if len(root.handlers) == 0:
handler = logging.StreamHandler()
if level is not None:
def decorator(f: Callable):
def wrapped(*args, **kwargs):
return f(*args, **kwargs)
return wrapped
# Was decorator used without parenthesis or parameterized?
return decorator(f) if callable(f) else decorator
def manipulate(x):
logging.getLogger("manipulate").info("got {x}".format(x=x))
return x * x
def main():
log = logging.getLogger("main")
sc = pyspark.SparkContext.getOrCreate()"Spark context: {s!r}".format(s=sc))
rdd = sc.parallelize(range(10))"rdd: {r!r}".format(r=rdd))
result =
if __name__ == '__main__':
logging.basicConfig(level=logging.INFO, format=LOG_FORMAT)

