Skip to content

Instantly share code, notes, and snippets.

@peterjaap
Last active March 13, 2024 21:57
Show Gist options
  • Star 12 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save peterjaap/006169c5d95eeffde3a1cc062de1b514 to your computer and use it in GitHub Desktop.
Save peterjaap/006169c5d95eeffde3a1cc062de1b514 to your computer and use it in GitHub Desktop.
Updated Magento 2 Varnish 6 VCL, in cooperation with Varnish Software
# A number of these changes come form the following PR's; , combines changes in https://github.com/magento/magento2/pull/29360, https://github.com/magento/magento2/pull/28944 and https://github.com/magento/magento2/pull/28894, https://github.com/magento/magento2/pull/35228, https://github.com/magento/magento2/pull/36524, https://github.com/magento/magento2/pull/34323
# VCL version 5.0 is not supported so it should be 4.0 even though actually used Varnish version is 6
vcl 4.1;
import std;
# The minimal Varnish version is 6.0
# For SSL offloading, pass the following header in your proxy server or load balancer: '/* {{ ssl_offloaded_header }} */: https'
backend default {
.host = "/* {{ host }} */";
.port = "/* {{ port }} */";
.first_byte_timeout = 600s;
.probe = {
.url = "/health_check.php";
.timeout = 2s;
.interval = 5s;
.window = 10;
.threshold = 5;
}
}
acl purge {
/* {{ ips }} */
}
sub vcl_recv {
# Remove empty query string parameters
# e.g.: www.example.com/index.html?
if (req.url ~ "\?$") {
set req.url = regsub(req.url, "\?$", "");
}
# Remove port number from host header
set req.http.Host = regsub(req.http.Host, ":[0-9]+", "");
# Sorts query string parameters alphabetically for cache normalization purposes
set req.url = std.querysort(req.url);
# Remove the proxy header to mitigate the httpoxy vulnerability
# See https://httpoxy.org/
unset req.http.proxy;
# Add X-Forwarded-Proto header when using https
if (!req.http.X-Forwarded-Proto && (std.port(server.ip) == 443 || std.port(server.ip) == 8443)) {
set req.http.X-Forwarded-Proto = "https";
}
# Reduce grace to the configured setting if the backend is healthy
# In case of an unhealthy backend, the original grace is used
if (std.healthy(req.backend_hint)) {
set req.grace = /* {{ grace }} */s;
}
if (req.method == "PURGE") {
if (client.ip !~ purge) {
return (synth(405, "Method not allowed"));
}
# To use the X-Pool header for purging varnish during automated deployments, make sure the X-Pool header
# has been added to the response in your backend server config. This is used, for example, by the
# capistrano-magento2 gem for purging old content from varnish during it's deploy routine.
if (!req.http.X-Magento-Tags-Pattern && !req.http.X-Pool) {
return (purge);
}
if (req.http.X-Magento-Tags-Pattern) {
ban("obj.http.X-Magento-Tags ~ " + req.http.X-Magento-Tags-Pattern);
}
if (req.http.X-Pool) {
ban("obj.http.X-Pool ~ " + req.http.X-Pool);
}
return (synth(200, "Purged"));
}
if (req.method != "GET" &&
req.method != "HEAD" &&
req.method != "PUT" &&
req.method != "POST" &&
req.method != "PATCH" &&
req.method != "TRACE" &&
req.method != "OPTIONS" &&
req.method != "DELETE") {
return (pipe);
}
# We only deal with GET and HEAD by default
if (req.method != "GET" && req.method != "HEAD") {
return (pass);
}
# Bypass health check requests
if (req.url ~ "^/(pub/)?(health_check.php)$") {
return (pass);
}
# Collapse multiple cookie headers into one
std.collect(req.http.Cookie);
# Remove all marketing get parameters to minimize the cache objects
if (req.url ~ "(\?|&)(gad_source|gclid|cx|_kx|ie|cof|siteurl|zanpid|origin|fbclid|mc_[a-z]+|utm_[a-z]+|_bta_[a-z]+)=") {
set req.url = regsuball(req.url, "(gad_source|gclid|cx|_kx|ie|cof|siteurl|zanpid|origin|fbclid|mc_[a-z]+|utm_[a-z]+|_bta_[a-z]+)=[-_A-z0-9+()%.]+&?", "");
set req.url = regsub(req.url, "[?|&]+$", "");
}
# Static files caching
if (req.url ~ "^/(pub/)?(media|static)/") {
# Static files should not be cached by default
return (pass);
# But if you use a few locales and don't use CDN you can enable caching static files by commenting previous line (#return (pass);) and uncommenting next 3 lines
#unset req.http.Https;
#unset req.http./* {{ ssl_offloaded_header }} */;
#unset req.http.Cookie;
}
# Don't cache the authenticated GraphQL requests
if (req.url ~ "/graphql" && req.http.Authorization ~ "^Bearer") {
return (pass);
}
return (hash);
}
sub vcl_hash {
if (req.url !~ "/graphql" && req.http.cookie ~ "X-Magento-Vary=") {
hash_data(regsub(req.http.cookie, "^.*?X-Magento-Vary=([^;]+);*.*$", "\1"));
}
# To make sure http users don't see ssl warning
hash_data(req.http./* {{ ssl_offloaded_header }} */);
/* {{ design_exceptions_code }} */
if (req.url ~ "/graphql") {
if (req.http.X-Magento-Cache-Id) {
hash_data(req.http.X-Magento-Cache-Id);
} else {
# if no X-Magento-Cache-Id (which already contains Store & Currency) is not set, use the HTTP headers
hash_data(req.http.Store);
hash_data(req.http.Content-Currency);
}
}
}
sub vcl_backend_response {
# Serve stale content for three days after object expiration
# Perform asynchronous revalidation while stale content is served
set beresp.grace = 3d;
# All text-based content can be parsed as ESI
if (beresp.http.content-type ~ "text") {
set beresp.do_esi = true;
}
# Allow GZIP compression on all JavaScript files and all text-based content
if (bereq.url ~ "\.js$" || beresp.http.content-type ~ "text") {
set beresp.do_gzip = true;
}
# Add debug headers
if (beresp.http.X-Magento-Debug) {
set beresp.http.X-Magento-Cache-Control = beresp.http.Cache-Control;
}
# Only cache HTTP 200 and HTTP 404 responses
if (beresp.status != 200 && beresp.status != 404) {
set beresp.ttl = 120s;
set beresp.uncacheable = true;
return (deliver);
}
# Don't cache if the request cache ID doesn't match the response cache ID for graphql requests
if (bereq.url ~ "/graphql" && bereq.http.X-Magento-Cache-Id && bereq.http.X-Magento-Cache-Id != beresp.http.X-Magento-Cache-Id) {
set beresp.ttl = 120s;
set beresp.uncacheable = true;
return (deliver);
}
# Remove the Set-Cookie header for cacheable content
# Only for HTTP GET & HTTP HEAD requests
if (beresp.ttl > 0s && (bereq.method == "GET" || bereq.method == "HEAD")) {
unset beresp.http.Set-Cookie;
}
}
sub vcl_deliver {
if (obj.uncacheable) {
set resp.http.X-Magento-Cache-Debug = "UNCACHEABLE";
} else if (obj.hits) {
set resp.http.X-Magento-Cache-Debug = "HIT";
set resp.http.Grace = req.http.grace;
} else {
set resp.http.X-Magento-Cache-Debug = "MISS";
}
# Not letting browser to cache non-static files.
if (resp.http.Cache-Control !~ "private" && req.url !~ "^/(pub/)?(media|static)/") {
set resp.http.Pragma = "no-cache";
set resp.http.Expires = "-1";
set resp.http.Cache-Control = "no-store, no-cache, must-revalidate, max-age=0";
}
if (!resp.http.X-Magento-Debug) {
unset resp.http.Age;
}
unset resp.http.X-Magento-Debug;
unset resp.http.X-Magento-Tags;
unset resp.http.X-Powered-By;
unset resp.http.Server;
unset resp.http.X-Varnish;
unset resp.http.Via;
unset resp.http.Link;
}
@ThijsFeryn
Copy link

if (bereq.url ~ "/graphql" && bereq.http.X-Magento-Cache-Id && bereq.http.X-Magento-Cache-Id != beresp.http.X-Magento-Cache-Id) {
    set beresp.ttl = 0s;
    set beresp.uncacheable = true;
}

The set beresp.ttl = 0s; should be replaced with set beresp.ttl = 120s;. You never want this value to be zero, because that will result in bypassing the wait list.

@ThijsFeryn
Copy link

ThijsFeryn commented Nov 26, 2022

Another suggestion would be do drop the following block from the VCL:

if (beresp.ttl <= 0s ||
    beresp.http.Surrogate-control ~ "no-store" ||
    (!beresp.http.Surrogate-Control &&
    beresp.http.Cache-Control ~ "no-cache|no-store") ||
    beresp.http.Vary == "*") {
    # Mark as Hit-For-Pass for the next 2 minutes
    set beresp.ttl = 120s;
    set beresp.uncacheable = true;
}

All this code is part of the built-in VCL and should not really be repeated.

This would result in the final return(deliver) in vcl_backend_response being removed as well to ensure that execution of the built-in VCL is continued.

I would also advise to change the following block:

if (bereq.url ~ "/graphql" && bereq.http.X-Magento-Cache-Id && bereq.http.X-Magento-Cache-Id != beresp.http.X-Magento-Cache-Id) {
    set beresp.ttl = 0s;
    set beresp.uncacheable = true;
}

Into the following:

if (bereq.url ~ "/graphql" && bereq.http.X-Magento-Cache-Id && bereq.http.X-Magento-Cache-Id != beresp.http.X-Magento-Cache-Id) {
    set beresp.ttl = 120s;
    set beresp.uncacheable = true;
    return(deliver);
}

@ThijsFeryn
Copy link

@peterjaap I'll continue look at this and suggest small fixes on Monday

@peterjaap
Copy link
Author

@ThijsFeryn thanks! I updated the gist accordingly.

@peterjaap
Copy link
Author

@ThijsFeryn we also have some rules in some of our VCLs to bypass routes that are never cached in Magento, does that make sense?

     # Bypass customer, shopping cart, checkout, admin
     if (req.url ~ "/customer" || req.url ~ "/checkout" || req.url ~ "/admin") {
        return (pass);
    }

@ThijsFeryn
Copy link

@ThijsFeryn we also have some rules in some of our VCLs to bypass routes that are never cached in Magento, does that make sense?

     # Bypass customer, shopping cart, checkout, admin
     if (req.url ~ "/customer" || req.url ~ "/checkout" || req.url ~ "/admin") {
        return (pass);
    }

If these pages return Cache-Control: private response headers or variations like Cache-Control: no-cache or Cache-Control: no-store, there is no need to explicitly have those rules for /customer, /checkout and /admin in VCL. Varnish will respect the Cache-Control header and bypass these requests from hitting the cache.

However, if the Cache-Control header doesn't return the right value, you need to write VCL.

Long story short: only write VCL in situations where the built-in VCL doesn't provide a native solution.

@ThijsFeryn
Copy link

@peterjaap The Varnish6.vcl file below you diff has the following code:

sub process_graphql_headers {
    if (req.http.X-Magento-Cache-Id) {
        hash_data(req.http.X-Magento-Cache-Id);

        # When the frontend stops sending the auth token, make sure users stop getting results cached for logged-in users
        if (req.http.Authorization ~ "^Bearer") {
            hash_data("Authorized");
        }
    }

    hash_data(req.http.Store);

    hash_data(req.http.Content-Currency);
}

This code implies that authorized /graphql calls can be cached in Magento. Is that the desired behavior? I was under the impression that authorized /graphql calls should bypass the cache.

Please clarify.

@peterjaap
Copy link
Author

peterjaap commented Nov 29, 2022

@ThijsFeryn from the Magento 2 docs;

Adobe Commerce and Magento Open Source can cache pages rendered from the results of certain GraphQL queries with full-page caching. Full-page caching improves response time and reduces the load on the server. Without caching, each page might need to run blocks of code and retrieve large amounts of information from the database. Only queries submitted with an HTTP GET operation can be cached. POST queries cannot be cached.

Indeed, I feel Authorized graphql calls should bypass the cache.

@JeroenBoersma
Copy link

Further optimization we do is skipping Varnish fully for certain requests..
Varnish doesn't support SSL so because we hit nginx for static files and immediately serve them by Nginx...

For instance: /static are only files which can be served with nginx

location /media/customer/ { deny all; }
location /media/import/   { deny all; }
location /magento_version { deny all; }

location /static {
  expires max;

  # Remove signature of the static files that is used to overcome the browser cache
  location ~ ^/static/version {
    rewrite ^/static/(version\d*/)?(.*)$ /static/$2 last;
  }

  add_header Cache-Control "public";
  add_header X-Frame-Options "SAMEORIGIN";

  allow all;
}

@JeroenBoersma
Copy link

@ThijsFeryn
Copy link

ThijsFeryn commented Nov 29, 2022

Further optimization we do is skipping Varnish fully for certain requests.. Varnish doesn't support SSL so because we hit nginx for static files and immediately serve them by Nginx...

For instance: /static are only files which can be served with nginx

location /media/customer/ { deny all; }
location /media/import/   { deny all; }
location /magento_version { deny all; }

location /static {
  expires max;

  # Remove signature of the static files that is used to overcome the browser cache
  location ~ ^/static/version {
    rewrite ^/static/(version\d*/)?(.*)$ /static/$2 last;
  }

  add_header Cache-Control "public";
  add_header X-Frame-Options "SAMEORIGIN";

  allow all;
}

@JeroenBoersma
But that is if you use Nginx as a TLS proxy. I actually advise using Hitch as a TLS proxy:

  • It's developed by Varnish Software
  • It's open source
  • It's a pure TLS proxy that has no HTTP awareness
  • It supports both PROXY protocol version 1 & 2

See https://www.varnish-software.com/developers/tutorials/terminate-tls-varnish-hitch/ for more information about Hitch and Varnish.

@convenient
Copy link

@peterjaap Here's the notes I added for my team, with pwa studio references for usage


Seems magento 2.4.4 and above, and latest PWA studio have improved cacheability for GQL requests
https://devdocs.magento.com/guides/v2.4/graphql/caching.html#customers

Magento generates a x-cache-id variation - https://github.com/magento/magento2/blob/192b28038dac2e993c65b49fb8b5af10cb520945/app/code/Magento/PageCache/etc/varnish6.vcl#L116-L148

PWA studio constantly grabs it from the response, and updates it to attach to the next requests so that when you log in etc you change customer groups / get cacheability
magento/pwa-studio@626ea1f#diff-b638977b4ea6fd3f29b668736551ed752c5b3915805df1af2f1a7579a006aacbR27

https://github.com/magento/pwa-studio/blob/626ea1f1c384654703501783b219bc7c4efa672d/packages/peregrine/lib/Apollo/links/gqlCacheLink.js#L41

@IvanChepurnyi
Copy link

IvanChepurnyi commented Nov 29, 2022

@peterjaap The Varnish6.vcl file below you diff has the following code:

sub process_graphql_headers {
    if (req.http.X-Magento-Cache-Id) {
        hash_data(req.http.X-Magento-Cache-Id);

        # When the frontend stops sending the auth token, make sure users stop getting results cached for logged-in users
        if (req.http.Authorization ~ "^Bearer") {
            hash_data("Authorized");
        }
    }

    hash_data(req.http.Store);

    hash_data(req.http.Content-Currency);
}

This code implies that authorized /graphql calls can be cached in Magento. Is that the desired behavior? I was under the impression that authorized /graphql calls should bypass the cache.

Please clarify.

This specific example implies that caching happens only when X-Magento-Cache-Id is set then, it additionally may add Authorization to it. Although, by default, GraphQL API allows doing queries without Authorization for stuff like catalog, so definitely looks strange.

Also, Authorization makes the cache user-specific, which removes any benefit of caching it in the Varnish server side, as if you have a spike of 1000 users, each of which will generate 1000 variations of the same cache.
I would probably remove Authorization header from the request in vcl_recv if X-Magento-Cache-Id is specified to prevent caching of customer-sensitive data in the first place. As a side effect, it should increase the cache hit ratio, as I imagine Authorization is added automatically if the request is sent when the user gets logged in. But that is implementation specific of the frontend that uses GraphQL, which should always separate customer-specific queries form cacheable ones.

@JeroenBoersma
Copy link

@JeroenBoersma But that is if you use Nginx as a TLS proxy. I actually advise using Hitch as a TLS proxy:

  • It's developed by Varnish Software
  • It's open source
  • It's a pure TLS proxy that has no HTTP awareness
  • It supports both PROXY protocol version 1 & 2

See https://www.varnish-software.com/developers/tutorials/terminate-tls-varnish-hitch/ for more information about Hitch and Varnish.

@ThijsFeryn If it would be just SSL termination than that's a true point, in this case it's a combined effort for file serving and (generated) content you get a performance gain by serving the files directly from the server and using Varnish for the generated content. There is no need to cache these static files on the same server in Varnish.

@JeroenBoersma
Copy link

rewrite /pub/health_check.php /health_check.php;

@JeroenBoersma
Copy link

A customer notified us with a list of more marketing pixels....

I think we can include these in the list of parameters to...
https://maxchadwick.xyz/tracking-query-params-registry/ props to @mpchadwick

Source which links to the above list: https://www.rumvision.com/blog/make-sure-to-check-your-caching-rules/

@ThijsFeryn
Copy link

A customer notified us with a list of more marketing pixels....

I think we can include these in the list of parameters to... https://maxchadwick.xyz/tracking-query-params-registry/ props to @mpchadwick

Source which links to the above list: https://www.rumvision.com/blog/make-sure-to-check-your-caching-rules/

@JeroenBoersma: here's the updated VCL:

    if (req.url ~ "(\?|&)(_branch_match_id|_bta_[a-z]+|campid|customid|_ga|gclid|gclsrc|gdf[a-z]+|cx|dm_i|ef_id|epik|ie|igshid|cof|hsa_[a-z]+|_ke|mk[a-z]{3}|msclkid|(mtm|matomo)_[a-z]+|pcrid|p(iwi)?k_[a-z]+|redirect(_log)?_mongo_id|siteurl|s_kwcid|sb_referer_host|si|trk_[a-z]+|zanpid|origin|fbclid|mc_[a-z]+|utm_[a-z]+|_bta_[a-z]+)=") {
        set req.url = regsuball(req.url, "(_branch_match_id|_bta_[a-z]+|campid|customid|_ga|gclid|gclsrc|cx|dm_i|ef_id|epik|ie|igshid|cof|hsa_[a-z]+|_ke|mk[a-z]{3}|msclkid|(mtm|matomo)_[a-z]+|pcrid|p(iwi)?k_[a-z]+|redirect(_log)?_mongo_id|siteurl|s_kwcid|sb_referer_host|si|trk_[a-z]+|zanpid|origin|fbclid|mc_[a-z]+|utm_[a-z]+|_bta_[a-z]+)=[-_A-z0-9+()%.]+&?", "");
        set req.url = regsub(req.url, "[?|&]+$", "");
    }

@damienwebdev
Copy link

@peterjaap and @ThijsFeryn magento/magento2#37524

These marketing pixels should be ignored when hitting graphql otherwise you'll endup regsub way more than you expect. Sometimes, likely unexpectedly, there's a relative URL (with query params) hidden inside the URL.

@chrisastley
Copy link

You have an error on line 44, the close bracket before the || needs removing.

@peterjaap
Copy link
Author

@chrisastley thanks!

@peterjaap
Copy link
Author

@glo11372
Copy link

glo11372 commented Feb 27, 2024

sub process_graphql_headers {
if (req.http.X-Magento-Cache-Id) {
hash_data(req.http.X-Magento-Cache-Id);

    # When the frontend stops sending the auth token, make sure users stop getting results cached for logged-in users
    if (req.http.Authorization ~ "^Bearer") {
        hash_data("Authorized");
    }
}

hash_data(req.http.Store);

hash_data(req.http.Content-Currency);

}
Can we remove Authorization token from frontend as ‘X-Magento-Cache-Id’ already there which provide Authorization so no need of bearer token?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment