fjsj/celery_settings.py

## celery_settings.py
# Recommended Celery Django settings for reliability:
# (use `app.config_from_object('django.conf:settings', namespace='CELERY')`
#  in proj/celery.py module)

from decouple import config  # use python-decouple: https://github.com/HBNetwork/python-decouple

# Prefer RabbitMQ over Redis for Broker,
# mainly because RabbitMQ doesn't need visibility timeout. See:
# https://blog.daftcode.pl/working-with-asynchronous-celery-tasks-lessons-learned-32bb7495586b
# https://engineering.instawork.com/celery-eta-tasks-demystified-424b836e4e94
# https://github.com/celery/celery/issues/4400
CELERY_BROKER_URL = config("CELERY_BROKER_URL", default="")

# When RabbitMQ is stressed (out of memory or disk space)
# task.delay/task.apply_async calls will fail silently if confirm_publish is set to False,
# which is the default. It's better to fail loud than silently when this happens See:
# https://github.com/celery/celery/issues/5410
# If confirm_timeout passes, a socket.timeout exception is raised:
CELERY_BROKER_TRANSPORT_OPTIONS = {"confirm_publish": True, "confirm_timeout": 5.0}

# Some cloud providers have low broker connection limits (e.g. cloudamqp),
# to play safe, better to set the broker_pool_limit to 1.
# The actual max number of connections used is equal to:
# broker_pool_limit * (web nodes * web processes + worker nodes * worker concurrency)
CELERY_BROKER_POOL_LIMIT = config("CELERY_BROKER_POOL_LIMIT", cast=int, default=1)

# Some cloud providers require a timeout value longer than default one
# for the broker connection (e.g. cloudamqp):
CELERY_BROKER_CONNECTION_TIMEOUT = config("CELERY_BROKER_CONNECTION_TIMEOUT", cast=float, default=30.0)

# Prefer Redis over RabbitMQ for result backend,
# as Redis is better suited to work as a key-value store:
CELERY_RESULT_BACKEND = config("CELERY_RESULT_BACKEND", default="")

# Set the maximum number of Redis connections that your cloud provider requires.
# Note this is specifc to the result backend, so you may actually use more:
CELERY_REDIS_MAX_CONNECTIONS = config(
    "CELERY_REDIS_MAX_CONNECTIONS", cast=lambda v: int(v) if v else None, default=None
)

# THE MOST CRITICAL SETTING:
# task_acks_late = True ensures tasks are automatically re-queued
# in the event of a abrupt shutdown of the worker such as:
# a power failure, a forced restart, worker instance being killed during a deployment.
# Note that you need to ensure your tasks are idempotent, to prevent repeated work.
# See "Mixing reliability with Celery for delicious async tasks" talk for more details:
# https://bit.ly/celery-reliability
CELERY_TASK_ACKS_LATE = config("CELERY_TASK_ACKS_LATE", cast=bool, default=True)

# By default Celery will ack tasks that failed due to exceptions or timed out
# (based on task_time_limit/time_limit). In general, that's the wanted behavior
# because retries are the recommended way to handle intermittent task failures.
# Since explicit is better than implicit, we keep the default True set here:
CELERY_TASK_ACKS_ON_FAILURE_OR_TIMEOUT = config(
    "CELERY_TASK_ACKS_ON_FAILURE_OR_TIMEOUT", cast=bool, default=True
)

# Below we keep Celery's default for tasks abruptly killed by a signal:
# do not requeue the task. For projects with good task stability, no memory leaks,
# no hanging workers, idempotent tasks, and continous deployments
# (workers frequently interrupted), it's safer to set this to True to avoid losing tasks:
CELERY_TASK_REJECT_ON_WORKER_LOST = config(
    "CELERY_TASK_REJECT_ON_WORKER_LOST", cast=bool, default=False
)

# Disable task prefetching for workers to increase reliability against lost tasks
# at the cost of higher peak latency. A low prefetch multiplier also prevents
# unbalanced workloads when some tasks are much slower than others.
# See: https://docs.celeryq.dev/en/latest/userguide/optimizing.html#optimizing-prefetch-limit
# Note that 0 means no prefetch limit: the worker will keep consuming messages,
# not respecting that there may be other available worker nodes.
CELERY_WORKER_PREFETCH_MULTIPLIER = config("CELERY_WORKER_PREFETCH_MULTIPLIER", cast=int, default=1)

# The number of concurrent worker processes executing tasks.
# If you’re doing mostly I/O you can have more processes, but if mostly CPU-bound,
# better to leave as None, which means the number of CPUs/cores:
CELERY_WORKER_CONCURRENCY = config(
    "CELERY_WORKER_CONCURRENCY", cast=lambda v: int(v) if v else None, default=None
)

# Automatically restart each worker process after 1000 tasks are processed on it.
# This is possibly an overkill and introduces latency, but it's a good safety net
# against memory leaks:
CELERY_WORKER_MAX_TASKS_PER_CHILD = config(
    "CELERY_WORKER_MAX_TASKS_PER_CHILD", cast=int, default=1000
)

# By default Celery doesn't send task events,
# but if you want to use a monitor tool like Flower, you need this:
CELERY_WORKER_SEND_TASK_EVENTS = config("CELERY_WORKER_SEND_TASK_EVENTS", cast=bool, default=True)
CELERY_EVENT_QUEUE_EXPIRES = config("CELERY_EVENT_QUEUE_EXPIRES", cast=float, default=60.0)
CELERY_EVENT_QUEUE_TTL = config("CELERY_EVENT_QUEUE_TTL", cast=float, default=5.0)
# you may also need CELERY_TASK_SEND_SENT_EVENT = True
# to track tasks before they’re consumed by a worker (but Flower doesn't support that).

# Below are two non-Celery settings that you should use and configure:
# always have timeouts for HTTP requests (or any kind of I/O) that happens
# inside Celery tasks. A task that blocks indefinitely may eventually
# stop the worker instance from doing any other work. See warning at:
# https://docs.celeryq.dev/en/stable/userguide/tasks.html
# Note: you must manually read from those settings and configure your HTTP client lib.
# Alternatively, you can set CELERY_TASK_SOFT_TIME_LIMIT and CELERY_TASK_TIME_LIMIT,
# but those introduce latency because they restart worker processes.
TASK_HTTP_CONNECT_TIMEOUT = config("TASK_HTTP_CONNECT_TIMEOUT", cast=float, default=5.0)
TASK_HTTP_READ_TIMEOUT = config("TASK_HTTP_READ_TIMEOUT", cast=float, default=120.0)

# Sources:
# - https://docs.celeryq.dev/en/stable/userguide/configuration.html
# - https://bit.ly/celery-reliability
# - https://denibertovic.com/posts/celery-best-practices/
# - https://medium.com/squad-engineering/celery-in-production-three-more-years-of-fixing-bugs-2ee462cef39f
# - https://engineering.instawork.com/celery-eta-tasks-demystified-424b836e4e94
# - https://blog.wolt.com/engineering/2021/09/15/5-tips-for-writing-production-ready-celery-tasks/
# - https://adamj.eu/tech/2020/02/03/common-celery-issues-on-django-projects/
# - https://blog.daftcode.pl/working-with-asynchronous-celery-tasks-lessons-learned-32bb7495586b
# - https://italux.medium.com/celery-rabbitmq-common-issues-a741a3800b30
# - https://stackoverflow.com/questions/66978028/application-impacts-of-celery-workers-running-with-the-without-heartbeat-fla
	# Recommended Celery Django settings for reliability:
	# (use `app.config_from_object('django.conf:settings', namespace='CELERY')`
	# in proj/celery.py module)

	from decouple import config # use python-decouple: https://github.com/HBNetwork/python-decouple

	# Prefer RabbitMQ over Redis for Broker,
	# mainly because RabbitMQ doesn't need visibility timeout. See:
	# https://blog.daftcode.pl/working-with-asynchronous-celery-tasks-lessons-learned-32bb7495586b
	# https://engineering.instawork.com/celery-eta-tasks-demystified-424b836e4e94
	# https://github.com/celery/celery/issues/4400
	CELERY_BROKER_URL = config("CELERY_BROKER_URL", default="")

	# When RabbitMQ is stressed (out of memory or disk space)
	# task.delay/task.apply_async calls will fail silently if confirm_publish is set to False,
	# which is the default. It's better to fail loud than silently when this happens See:
	# https://github.com/celery/celery/issues/5410
	# If confirm_timeout passes, a socket.timeout exception is raised:
	CELERY_BROKER_TRANSPORT_OPTIONS = {"confirm_publish": True, "confirm_timeout": 5.0}

	# Some cloud providers have low broker connection limits (e.g. cloudamqp),
	# to play safe, better to set the broker_pool_limit to 1.
	# The actual max number of connections used is equal to:
	# broker_pool_limit * (web nodes * web processes + worker nodes * worker concurrency)
	CELERY_BROKER_POOL_LIMIT = config("CELERY_BROKER_POOL_LIMIT", cast=int, default=1)

	# Some cloud providers require a timeout value longer than default one
	# for the broker connection (e.g. cloudamqp):
	CELERY_BROKER_CONNECTION_TIMEOUT = config("CELERY_BROKER_CONNECTION_TIMEOUT", cast=float, default=30.0)

	# Prefer Redis over RabbitMQ for result backend,
	# as Redis is better suited to work as a key-value store:
	CELERY_RESULT_BACKEND = config("CELERY_RESULT_BACKEND", default="")

	# Set the maximum number of Redis connections that your cloud provider requires.
	# Note this is specifc to the result backend, so you may actually use more:
	CELERY_REDIS_MAX_CONNECTIONS = config(
	"CELERY_REDIS_MAX_CONNECTIONS", cast=lambda v: int(v) if v else None, default=None
	)

	# THE MOST CRITICAL SETTING:
	# task_acks_late = True ensures tasks are automatically re-queued
	# in the event of a abrupt shutdown of the worker such as:
	# a power failure, a forced restart, worker instance being killed during a deployment.
	# Note that you need to ensure your tasks are idempotent, to prevent repeated work.
	# See "Mixing reliability with Celery for delicious async tasks" talk for more details:
	# https://bit.ly/celery-reliability
	CELERY_TASK_ACKS_LATE = config("CELERY_TASK_ACKS_LATE", cast=bool, default=True)

	# By default Celery will ack tasks that failed due to exceptions or timed out
	# (based on task_time_limit/time_limit). In general, that's the wanted behavior
	# because retries are the recommended way to handle intermittent task failures.
	# Since explicit is better than implicit, we keep the default True set here:
	CELERY_TASK_ACKS_ON_FAILURE_OR_TIMEOUT = config(
	"CELERY_TASK_ACKS_ON_FAILURE_OR_TIMEOUT", cast=bool, default=True
	)

	# Below we keep Celery's default for tasks abruptly killed by a signal:
	# do not requeue the task. For projects with good task stability, no memory leaks,
	# no hanging workers, idempotent tasks, and continous deployments
	# (workers frequently interrupted), it's safer to set this to True to avoid losing tasks:
	CELERY_TASK_REJECT_ON_WORKER_LOST = config(
	"CELERY_TASK_REJECT_ON_WORKER_LOST", cast=bool, default=False
	)

	# Disable task prefetching for workers to increase reliability against lost tasks
	# at the cost of higher peak latency. A low prefetch multiplier also prevents
	# unbalanced workloads when some tasks are much slower than others.
	# See: https://docs.celeryq.dev/en/latest/userguide/optimizing.html#optimizing-prefetch-limit
	# Note that 0 means no prefetch limit: the worker will keep consuming messages,
	# not respecting that there may be other available worker nodes.
	CELERY_WORKER_PREFETCH_MULTIPLIER = config("CELERY_WORKER_PREFETCH_MULTIPLIER", cast=int, default=1)

	# The number of concurrent worker processes executing tasks.
	# If you’re doing mostly I/O you can have more processes, but if mostly CPU-bound,
	# better to leave as None, which means the number of CPUs/cores:
	CELERY_WORKER_CONCURRENCY = config(
	"CELERY_WORKER_CONCURRENCY", cast=lambda v: int(v) if v else None, default=None
	)

	# Automatically restart each worker process after 1000 tasks are processed on it.
	# This is possibly an overkill and introduces latency, but it's a good safety net
	# against memory leaks:
	CELERY_WORKER_MAX_TASKS_PER_CHILD = config(
	"CELERY_WORKER_MAX_TASKS_PER_CHILD", cast=int, default=1000
	)

	# By default Celery doesn't send task events,
	# but if you want to use a monitor tool like Flower, you need this:
	CELERY_WORKER_SEND_TASK_EVENTS = config("CELERY_WORKER_SEND_TASK_EVENTS", cast=bool, default=True)
	CELERY_EVENT_QUEUE_EXPIRES = config("CELERY_EVENT_QUEUE_EXPIRES", cast=float, default=60.0)
	CELERY_EVENT_QUEUE_TTL = config("CELERY_EVENT_QUEUE_TTL", cast=float, default=5.0)
	# you may also need CELERY_TASK_SEND_SENT_EVENT = True
	# to track tasks before they’re consumed by a worker (but Flower doesn't support that).

	# Below are two non-Celery settings that you should use and configure:
	# always have timeouts for HTTP requests (or any kind of I/O) that happens
	# inside Celery tasks. A task that blocks indefinitely may eventually
	# stop the worker instance from doing any other work. See warning at:
	# https://docs.celeryq.dev/en/stable/userguide/tasks.html
	# Note: you must manually read from those settings and configure your HTTP client lib.
	# Alternatively, you can set CELERY_TASK_SOFT_TIME_LIMIT and CELERY_TASK_TIME_LIMIT,
	# but those introduce latency because they restart worker processes.
	TASK_HTTP_CONNECT_TIMEOUT = config("TASK_HTTP_CONNECT_TIMEOUT", cast=float, default=5.0)
	TASK_HTTP_READ_TIMEOUT = config("TASK_HTTP_READ_TIMEOUT", cast=float, default=120.0)

	# Sources:
	# - https://docs.celeryq.dev/en/stable/userguide/configuration.html
	# - https://bit.ly/celery-reliability
	# - https://denibertovic.com/posts/celery-best-practices/
	# - https://medium.com/squad-engineering/celery-in-production-three-more-years-of-fixing-bugs-2ee462cef39f
	# - https://engineering.instawork.com/celery-eta-tasks-demystified-424b836e4e94
	# - https://blog.wolt.com/engineering/2021/09/15/5-tips-for-writing-production-ready-celery-tasks/
	# - https://adamj.eu/tech/2020/02/03/common-celery-issues-on-django-projects/
	# - https://blog.daftcode.pl/working-with-asynchronous-celery-tasks-lessons-learned-32bb7495586b
	# - https://italux.medium.com/celery-rabbitmq-common-issues-a741a3800b30
	# - https://stackoverflow.com/questions/66978028/application-impacts-of-celery-workers-running-with-the-without-heartbeat-fla