Skip to content

Instantly share code, notes, and snippets.

@Integralist
Last active November 15, 2021 13:18
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Integralist/f7a6abdd946ad5b3b06907069f79cc48 to your computer and use it in GitHub Desktop.
Save Integralist/f7a6abdd946ad5b3b06907069f79cc48 to your computer and use it in GitHub Desktop.
[Fastly Varnish Serve Stale Testing] #fastly #varnish #vcl #cdn #cache #stale #stale-while-revalidate #stale-if-error
include "serve_stale_verification"
sub vcl_fetch {
#FASTLY fetch
...
call serve_stale_verification;
if (beresp.status >= 500 && beresp.status < 600) {
if (stale.exists) {
return(deliver_stale);
}
}
...
return(deliver);
}
# ensure this subroutine is called _before_ any checks for stale.exists
sub serve_stale_verification {
if (req.http.X-ModifyDirectives == "shorten") {
set beresp.ttl = 10s;
set beresp.stale_while_revalidate = 20s;
set beresp.stale_if_error = 30s;
# a '304 Not Modified' from origin will not cause vcl_fetch to be executed.
# so to prevent that scenario from occuring, and thus causing confusion around
# serve stale expectations, we'll strip any ETag/Last-Modified headers so that
# we should always cause vcl_fetch to execute (as the cached object won't have
# any ETag or Last-Modified headers to compare against).
unset beresp.http.ETag;
unset beresp.http.Last-Modified;
}
if (req.http.X-ModifyDirectives == "fail") {
set beresp.status = 500;
set beresp.cacheable = false;
}
}
""" Run this script using: time poetry run python verify_stale/stale.py """
# standard library modules
import os
import re
import sys
import time
from datetime import datetime
# third-party modules
from tornado.gen import coroutine
from tornado.httpclient import AsyncHTTPClient, HTTPResponse
from tornado.ioloop import IOLoop
from tornado.simple_httpclient import HTTPTimeoutError
AsyncHTTPClient.configure(None, defaults=dict(user_agent="IntegralistTesting"))
http_client = AsyncHTTPClient()
def get_urls(cachebust: float = 0.0, fail: bool = False) -> HTTPResponse:
"""requests list of URLs concurrently.
this example code presumes a staging environment is used and
that this staging environment is protected with BasicAuth.
"""
user = os.environ["AUTH_USER"]
password = os.environ["AUTH_PASS"]
creds = f"{user}:{password}@"
subdomain = f"{creds}stage"
host = f"{subdomain}.example.com"
query = f"?cachebust={cachebust}"
paths = [
"/foo",
"/bar",
"/baz",
]
responses = []
state = "fail" if fail else "shorten"
headers = {"X-BF-Debug": "1", "X-ModifyDirectives": state}
for path in paths:
url = f"https://{host}{path}{query}"
print(f"request: {url}\n\t{headers}")
responses.append(
# we don't need to `await` these 'fetch' calls because the function
# returns a Future and so we collect a group of futures and return
# those to the caller.
#
# tornado is then able to 'yield' multiple Futures within a list.
http_client.fetch(url, headers=headers, raise_error=False)
)
return responses
@coroutine
def validate(
timestamp: float, expected: str, then_sleep: int = 0, fail: bool = False
):
"""validate should confirm if responses are as expected.
we use a map data structure (`m`) to translate the `then_sleep` value into
a descriptive value for the purposes of debugging when a validation check
fails.
the `then_sleep` value indicates how long the program will 'sleep' for
before continuing. this means the next batch of requests will be blocked
for that period of time.
for example, if `then_sleep` is set to 5, then this means we'll request the
various pages and then we'll stop the program for five seconds. when the
next batch of requests are made we should find that they yield cache HITs
because the max-age of our cached objects are set to 10s.
remember: in the CDN/VCL we are using a request header to control the
caching directives and also whether we want the origin response to look
like a failure or not...
sub serve_stale_verification {
if (req.http.X-ModifyDirectives == "shorten") {
set beresp.ttl = 10s;
set beresp.stale_while_revalidate = 20s;
set beresp.stale_if_error = 30s;
unset beresp.http.ETag;
unset beresp.http.Last-Modified;
}
if (req.http.X-ModifyDirectives == "fail") {
set beresp.status = 500;
set beresp.cacheable = false;
}
}
"""
m = {
5: ">> make requests, expect misses (as fresh content cached for first time), then sleep for 5s", # noqa
6: ">> make requests, expect hits (as same content requested), then sleep for 6s", # noqa
32: ">> make requests, expect stale-while-revalidate (as max-age ttl expired), cache object updated and ttls reset, then sleep for 32s", # noqa
35: ">> make requests, expect stale-if-error (as max-age+stale-while-revalidate ttls expired), then sleep for 35s", # noqa
0: ">> make requests, expect misses (as stale-if-error ttl expired)", # noqa
}
print(f"\n\n{m[then_sleep]}\n\n")
filter_headers = r"^(Age|Date|Etag|Fastly)"
try:
a, b, c, d, e, f, g, h, i, j = yield get_urls(
cachebust=timestamp, fail=fail
)
except HTTPTimeoutError as err:
print(f"\n\ntornado HTTPTimeoutError: {err}\n\n")
sys.exit(1)
for k, v in locals().items():
if re.search(
"^(timestamp|expected|then_sleep|fail|m|filter_headers)", k
):
# ignore function arguments and other variables
# yes, this list gets tedious to update as the code changes :-/
continue
state = v.headers["Fastly-State"]
url = v.effective_url
details = "\n\t".join(
[
f"{h}: {hv}"
for h, hv in v.headers.items()
if re.search(filter_headers, h)
]
)
if not re.search(expected, state, flags=re.IGNORECASE):
try:
print("\nvalidation check failed")
print(
f"convert response to 500: {fail}\n\t{url}\n\t{state} != {expected}\n\n\t{details}" # noqa
)
except KeyError as err:
print("\nvalidation check failed: KeyError!")
print(
f"convert response to 500: {fail}\n\t{url}\n\tKeyError: {err}\n" # noqa
) # noqa
else:
print(f"\n√ no validation errors\n\t{url}\n\t{details}")
time.sleep(then_sleep)
@coroutine
def process_urls():
"""make multiple async requests to validate responses.
because we're using tornado's own `@coroutine` syntax instead of the
native async/await, it means we need to `yield` and ensure any async
functions we call are also decorated with `@coroutine`.
"""
ts = datetime.now().timestamp()
miss = "^MISS(?:-CLUSTER)?"
hit = "^HIT(?:-CLUSTER)?"
stale = "^HIT-STALE(?:-CLUSTER)?"
# fresh content cached for the first time
yield validate(ts, expected=miss, then_sleep=5)
# expect hits from recently cached resources
yield validate(ts, expected=hit, then_sleep=6)
# at this point we would have slept for a total of 11 seconds, meaning our
# max-age should have expired and we should now serve
# stale-while-revalidate.
#
# we now find that although we've been served stale content, a new request
# to the origin has been made and the object in the cache has been reset
# using the cache control headers from the original request (which in our
# case is the shortened directives that we tweaked in our vcl override).
#
# because our origins are setting an ETag, it means we'd normally get a
# '304 Not Modified' response from origin, and thus vcl_fetch would not get
# executed. to avoid that scenario in vcl_fetch when we first tweak the
# cache control directives we also strip ETag/Last-Modified from the
# origin's response so that there can be no 'conditional request' made and
# thus we have to do a full request to the origin.
#
# the updated object means we shouldn't try to sleep for 21s (which is the
# stale-while-revalidate TTL) but sleep for 31s (which is the combination
# of the max-age and stale-while-revalidate TTLs).
#
# we actually sleep for 32s just to give a bit of extra padding.
yield validate(ts, expected=stale, then_sleep=32)
# stale-while-revalidate expired, so serve stale-if-error
#
# note: at this point the age of the object is 32 and so we have 8 seconds
# before stale-if-error TTL expires.
yield validate(ts, fail=True, expected=stale, then_sleep=35)
# stale-if-error expired, so expect cache misses again
yield validate(ts, expected=miss)
io_loop = IOLoop.current()
io_loop.run_sync(process_urls)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment