Skip to content

Instantly share code, notes, and snippets.

@dinvlad
Last active Dec 17, 2021
Embed
What would you like to do?
Retries with exponential backoff and jitter for idempotent background Google Cloud Functions
// one can also use Google Cloud Firestore library,
// with a slight change in semantics
import { firestore } from 'firebase-admin';
import { EventContext, runWith } from 'firebase-functions';
import { promisify } from 'util';
const eventCollection = 'function-events';
enum EventStatus {
RUNNING = 'running',
FAILED = 'failed',
}
export const baseRetryDelayMs = 1000;
export const retryDelayFactor = 2;
export const maxRetryAgeMs = 2 * 60 * 60 * 1000;
export const functionTimeoutSec = 60;
// Use this wrapper around background functions to
// enable automatic retries with exponential backoff and jitter
// (https://aws.amazon.com/blogs/architecture/exponential-backoff-and-jitter).
//
// The background function **must** be idempotent (safe to retry).
// Additionally, you must ensure that the wrapped function
// is well-tested, and to manually enable retries for it.
// Please see https://cloud.google.com/functions/docs/bestpractices/retries
// for more details.
//
// This solution also prevents double-firing of a function
// for the same event, which can happen with background functions,
// because they don't guarantee once-only delivery.
//
// We use Firestore as a transactional store for the events.
// It gets cleaned up automatically upon a successful retry,
// so it also serves as a journal of permanent failures.
export const backgroundFunction = async <T>(
event: T,
{ eventId, eventType, timestamp: timeCreated }: EventContext,
handler: (event: T) => Promise<void>
) => {
const start = Date.now();
if (start - Date.parse(timeCreated) > maxRetryAgeMs) {
return;
}
const db = firestore();
const ref = db.collection(eventCollection).doc(eventId);
const doc = await db.runTransaction(async transaction => {
const snapshot = await transaction.get(ref);
let attempt: number = snapshot.get('attempt') || 0;
let status: EventStatus | undefined = snapshot.get('status');
switch (status) {
case undefined:
status = EventStatus.RUNNING;
transaction.create(ref, {
event: JSON.parse(JSON.stringify(event)),
eventType,
timeCreated,
attempt,
status,
});
break;
case EventStatus.FAILED:
attempt++;
status = EventStatus.RUNNING;
transaction.update(ref, { attempt, status });
console.warn(`Retrying '${eventId}' eventId`);
break;
case EventStatus.RUNNING:
console.warn(`Triggered a duplicate '${eventId}' eventId`);
return {};
default:
console.error(
`Unrecognized status '${status}' for '${eventId}' eventId`
);
return {};
}
return { attempt, status };
});
if (doc.status !== EventStatus.RUNNING) {
return;
}
let err: Error | undefined;
try {
await handler(event);
} catch (e) {
err = e;
}
// reliably record error status in Firestore,
// or clean it up on success
while (true) {
try {
await db.runTransaction(async transaction => {
if (err) {
transaction.update(ref, {
status: EventStatus.FAILED,
reason: err.stack,
});
} else {
transaction.delete(ref);
}
});
break;
} catch (e) {
// ignore transient errors when writing to Firestore
console.error(e);
}
}
// optionally, check if err is not transient (e.g. a 400/404)
// and return early **without** re-throwing it
// (to prevent a retry)
// ...
// otherwise, if error is transient, retry after a delay
if (err) {
const retryDelayMs =
Math.random() *
Math.min(
baseRetryDelayMs * Math.pow(retryDelayFactor, doc.attempt),
functionTimeoutSec * 1000 - (Date.now() - start)
);
await promisify(setTimeout)(retryDelayMs);
throw err;
}
};
// example use of backgroundFunction() with Firebase
export const helloPubSub = runWith({ timeoutSeconds: functionTimeoutSec })
.pubsub.topic('topic-name').onPublish((event, context) =>
backgroundFunction(event, context, async ({ json }) => {
// ... handle message json
})
);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment