Last active
February 3, 2026 07:01
-
-
Save bartTC/c768065be5f8073911e06519beccd814 to your computer and use it in GitHub Desktop.
My go to Gunicorn configuration for Django projects.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| """ | |
| Gunicorn configuration for production deployment. | |
| This config is optimized for: | |
| - AWS Fargate containers | |
| - Application Load Balancer (ALB) frontend | |
| - Django with database-backed sessions | |
| Usage: | |
| gunicorn -c gunicorn.conf.py myproject.config.wsgi:application | |
| Environment variables: | |
| GUNICORN_WORKERS - Number of worker processes (default: 4) | |
| GUNICORN_THREADS - Threads per worker (default: 8) | |
| PORT - Port to bind to (default: 8000) | |
| """ | |
| import os | |
| # ----------------------------------------------------------------------------- | |
| # Server Socket | |
| # ----------------------------------------------------------------------------- | |
| # The socket to bind. | |
| # Use 0.0.0.0 to accept connections from any IP (required for containers). | |
| bind = f"0.0.0.0:{os.environ.get('PORT', '8000')}" | |
| # The maximum number of pending connections in the queue. | |
| # High value handles traffic bursts when workers are busy. | |
| # Default is 2048, which is good for high-traffic applications. | |
| backlog = 2048 | |
| # ----------------------------------------------------------------------------- | |
| # Worker Processes | |
| # ----------------------------------------------------------------------------- | |
| # Number of worker processes. | |
| # Rule of thumb: 2-4 workers per CPU core. | |
| # For Fargate, match to vCPU count (e.g., 2 vCPU = 4-8 workers). | |
| workers = int(os.environ.get("GUNICORN_WORKERS", "4")) | |
| # Worker class to use. | |
| # - "sync": One request per worker (simple, but blocks on I/O) | |
| # - "gthread": Threaded workers (good for I/O-bound Django apps) | |
| # - "gevent": Greenlet-based async (requires gevent package) | |
| # | |
| # gthread is a good balance: handles concurrent requests without | |
| # the complexity of async, works well with Django ORM. | |
| worker_class = "gthread" | |
| # Number of threads per worker (only for gthread worker class). | |
| # Each thread can handle one request, so total capacity = workers × threads. | |
| # Example: 4 workers × 8 threads = 32 concurrent requests. | |
| threads = int(os.environ.get("GUNICORN_THREADS", "8")) | |
| # ----------------------------------------------------------------------------- | |
| # Worker Lifecycle | |
| # ----------------------------------------------------------------------------- | |
| # Maximum requests a worker will process before restarting. | |
| # Prevents memory leaks from accumulating. Set to 0 to disable. | |
| # After this many requests, worker is gracefully restarted. | |
| max_requests = 1000 | |
| # Random jitter added to max_requests to stagger worker restarts. | |
| # Prevents all workers from restarting simultaneously under load. | |
| # With max_requests=1000 and jitter=100, workers restart between 900-1100 requests. | |
| max_requests_jitter = 100 | |
| # Timeout for graceful worker shutdown (seconds). | |
| # When a worker is recycled (max_requests) or during deployment, | |
| # it has this long to finish in-flight requests before being killed. | |
| # Should be >= your longest expected request time. | |
| graceful_timeout = 30 | |
| # ----------------------------------------------------------------------------- | |
| # Timeouts | |
| # ----------------------------------------------------------------------------- | |
| # Worker timeout (seconds). | |
| # If a worker doesn't respond to the arbiter within this time, it's killed. | |
| # Should be longer than your slowest endpoint. | |
| # Note: This is NOT the request timeout - it's the heartbeat timeout. | |
| timeout = 60 | |
| # Keep-alive timeout (seconds). | |
| # How long to wait for requests on a keep-alive connection. | |
| # | |
| # CRITICAL FOR ALB: Must be HIGHER than ALB's idle timeout (default 60s). | |
| # If Gunicorn closes the connection before ALB, you get 502 errors | |
| # when ALB tries to reuse a closed connection. | |
| # | |
| # Set to 75s to safely exceed ALB's 60s default. | |
| keepalive = 75 | |
| # ----------------------------------------------------------------------------- | |
| # Server Mechanics | |
| # ----------------------------------------------------------------------------- | |
| # Load app in master process before forking workers. | |
| # Workers inherit the loaded app through copy-on-write, which is also more memory efficient. | |
| preload_app = True | |
| # Directory for worker heartbeat files. | |
| # /dev/shm is a RAM-based filesystem, faster than disk. | |
| # Important for containers where disk I/O may be slow. | |
| worker_tmp_dir = "/dev/shm" # noqa: S108 | |
| # Accept proxy headers from any IP. | |
| # Required when behind a load balancer to get real client IPs. | |
| # In production, you might restrict this to your ALB's IP range. | |
| forwarded_allow_ips = "*" | |
| # ----------------------------------------------------------------------------- | |
| # Logging | |
| # ----------------------------------------------------------------------------- | |
| # Log to stdout/stderr for container environments. | |
| # Container orchestrators (ECS, Kubernetes) capture these automatically. | |
| accesslog = "-" | |
| errorlog = "-" | |
| # Access log format. | |
| # Includes response time which is useful for performance monitoring. | |
| access_log_format = ( | |
| '%(h)s %(l)s %(u)s %(t)s "%(r)s" %(s)s %(b)s "%(f)s" "%(a)s" %(D)sμs' | |
| ) | |
| # Log level for error log. | |
| # Options: debug, info, warning, error, critical | |
| loglevel = "info" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment