Skip to content

Instantly share code, notes, and snippets.

@sperand-io
Last active October 28, 2023 09:41
Show Gist options
  • Star 12 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save sperand-io/fc211731e31c628232abb998febb077b to your computer and use it in GitHub Desktop.
Save sperand-io/fc211731e31c628232abb998febb077b to your computer and use it in GitHub Desktop.
Cloudflare Workers / Segment Smart Proxy — serve data collection assets and endpoints from your own domain
/**
* Steps to use:
* 1. Create CF Worker, copy and paste this in
* 2. (Optional) Update configuration defaults
* - If you want to manage in code, do so below under "Static Configuration"
* - If you want dynamic custom config: Create CFW KV namespace, link them, and add reference below
*
* - You can overwrite default path prefix for loading analytics.js (<yourdomain>/ajs)
* (corresponding KV entry: `script_path_prefix`)
* - You can overwrite default path prefix for handling first-party data collection (<yourdomain>/data)
* (corresponding KV entry: `collection_api_path_prefix`)
* - You can overwrite default cookie name for the edge-side anonymous ID
* (corresponding KV entry: `cookie_name`)
* - You can overwrite default integration list path prefix (/int-list)
* (corresponding KV entry: `integration_list_path_prefix`)
* - You can overwrite the default refresh trigger if you want to more regularly update the anonymousId
* (corresponding KV entry: `refresh_threshold`)
* - You can set a path for echoing the session ID
* (corresponding KV entry: `default_write_key`)
* - You can set a default write key if you just want to use one globally and want to omit it from your site code
* (corresponding KV entry: `write_key`)
* - You can set an error collection endpoint if you have a logging service that accepts webhooks
* (corresponding KV entry: `write_key`)
*
* 3. (If needed) If you use it for Consent Management, update any conditional destination loading logic to pull the integration list from your host + integration list path prefix
* eg. If using Segment Consent Manager or https://gist.github.com/sperand-io/4725e248a35d5005d68d810d8a8f7b29
* ...instead of fetch(`https://cdn.segment.com/v1/projects/${writeKey}/integrations`)
* ...replace with fetch(`${location.origin}/ilist/${writeKey}`) or fetch(`${location.origin}/ilist/}`)
* 3. (REQUIRED) Deploy and configure the worker to serve for your desired domain/subdomain and at your desired path
* 4. (REQUIRED) Update your segment snippet to load from your host + script path prefix
* (eg find n.src="https://cdn.segment.com/analytics.js/v1/"+t+"/analytics.min.js" in snippet and ...)
* (replace with n.src=`${location.origin}/ajs` if you have a default write key set)
* (or with n.src=`${location.origin}/ajs/${t}` if not)
*/
let KV_NAMESPACE
// START STATIC CONFIGURATION
const STATIC_CONFIG = {
COOKIE_NAME: '__anonymous_session_id',
SCRIPT_PATH_PREFIX: 'ajs',
COLLECTION_API_PATH_PREFIX: 'data',
INTEGRATION_LIST_PATH_PREFIX: 'ilist',
ANONYMOUS_ID_ECHO_PATH: '',
REFRESH_THRESHOLD: 45,
DEFAULT_WRITE_KEY: '3K4xZlUgQFAa3MRdnRRKvbvDEukDCWeu',
ERROR_ENDPOINT: 'https://enj0zt42hq1y.x.pipedream.net'
}
// END STATIC CONFIGUATION. Editing below this line is discouraged.
/**
* Attach top-level responder.
*/
addEventListener('fetch', event => {
event.respondWith(handleErr(event))
})
/**
* Top level event handler.
*
* Wraps our request handler in an error handler,
* optionally forward errors to a logging service.
*
* @param {Event} event
*/
async function handleErr(event) {
try {
const res = await handleEvent(event)
return res
} catch (err) {
let endpoint = KV_NAMESPACE && (await KV_NAMESPACE.get('error_endpoint'))
if (!endpoint) endpoint = STATIC_CONFIG['ERROR_ENDPOINT']
if (endpoint) event.waitUntil(log(endpoint, err, event.request))
return new Response(err.message || 'An error occurred!', {
status: err.statusCode || 500
})
}
}
/**
* Respond to the request
*
* Provides special handling for Segment requests against the configured || default paths.
*
* @param {Event} event
*/
async function handleEvent(event) {
const config = KV_NAMESPACE ? await hydrateConfig(KV_NAMESPACE) : STATIC_CONFIG
const {
COOKIE_NAME,
SCRIPT_PATH_PREFIX,
COLLECTION_API_PATH_PREFIX,
INTEGRATION_LIST_PATH_PREFIX,
ANONYMOUS_ID_ECHO_PATH
} = config
const cache = caches.default
const { request } = event
const url = new URL(request.url)
// extract cookie information
const cookieData = getCookieData(request, COOKIE_NAME)
// serve analytics.js
if (startsWith(url, SCRIPT_PATH_PREFIX))
return await handleScript(event, cache, cookieData, config)
// serve first party data collection pings
if (startsWith(url, COLLECTION_API_PATH_PREFIX))
return await handleDataCollection(request, cookieData, config)
// serve first party data collection pings
if (startsWith(url, INTEGRATION_LIST_PATH_PREFIX))
return await handleIntegrationListing(request, config)
// serve anonymousId echo
if (ANONYMOUS_ID_ECHO_PATH && startsWith(url, ANONYMOUS_ID_ECHO_PATH))
return await handleEcho(event, cookieData)
// passthrough everything else
return await fetch(request)
}
/**
* Serve analytics.js
*
* Serves a modified analytics.js for (default || passed) writeKey at (default || configured) (path || path prefix)
* Mods:
* If writeKey is omitted, get the default script
* Updates data collection api host in the script itself
* If needed, sets an HTTPOnly anonymous session cookie (and corresponding set-at cookie)
*
* @param {Event} event
* @param {Cache} cache
* @param {Object} cookieData
* @param {String} cookieData.anonymousId
* @param {Date} cookieData.expires
* @param {Object} config
*/
async function handleScript(
event,
cache,
{ anonymousId, expires },
{
SCRIPT_PATH_PREFIX,
DEFAULT_WRITE_KEY,
COLLECTION_API_PATH_PREFIX,
COOKIE_NAME,
REFRESH_THRESHOLD
}
) {
const { request } = event
const { pathname, hostname } = new URL(request.url)
let [_, writeKey] = pathname.split(`/${SCRIPT_PATH_PREFIX}/`)
if (!writeKey) writeKey = DEFAULT_WRITE_KEY
let response
const cached = await cache.match(request)
if (cached) {
response = cached
} else {
const endpoint = `https://cdn.segment.com/analytics.js/v1/${writeKey}/analytics.min.js`
const originalResponse = await fetch(new Request(endpoint, request))
const newResponse = originalResponse.clone()
const analyticsjs = await originalResponse.text()
const modifiedAnalyticsjs = analyticsjs.replace(
/\api\.segment\.io\/v1/g,
`${hostname}/${COLLECTION_API_PATH_PREFIX}`
)
response = new Response(modifiedAnalyticsjs, newResponse)
event.waitUntil(cache.put(request, response.clone()))
}
if (!anonymousId || expiresSoon(expires, REFRESH_THRESHOLD)) {
const oneYearFromNow = new Date()
oneYearFromNow.setFullYear(oneYearFromNow.getFullYear() + 1)
response.headers.append(
'Set-Cookie',
createCookie(COOKIE_NAME, uuid(), oneYearFromNow)
)
response.headers.append(
'Set-Cookie',
createCookie(`${COOKIE_NAME}_set`, oneYearFromNow.toUTCString(), oneYearFromNow)
)
}
return response
}
/**
* Serve first party data collection API
*
* Serves a handler to modify and forward events to Segment at the default || configured path prefix
* Mods:
* If present in the request cookie, overwrites anonymousId with edge-side cookie value
*
* @param {Request} request
* @param {Object} cookieData
* @param {String} cookieData.anonymousId
* @param {Object} config
*/
async function handleDataCollection(
request,
{ anonymousId },
{ COLLECTION_API_PATH_PREFIX }
) {
const originalRequest = request.clone()
const body = JSON.stringify({
...(await request.json()),
...(anonymousId ? { anonymousId } : {})
})
const { pathname, hostname } = new URL(request.url)
const correctPath = pathname.replace(COLLECTION_API_PATH_PREFIX, 'v1')
const newRequest = new Request(
`https://api.segment.io${correctPath}`,
new Request(originalRequest, { body })
)
newRequest.headers.append('origin', `https://${hostname}`)
return await fetch(newRequest)
}
/**
* Serve first party integration list API
*
* Serves a handler to passthrough list requests for default || passed writeKey at the default || configured path prefix
*
* @param {Request} request
* @param {Object} config
*/
async function handleIntegrationListing(
request,
{ INTEGRATION_LIST_PATH_PREFIX, DEFAULT_WRITE_KEY }
) {
const { pathname } = new URL(request.url)
let [_, writeKey] = pathname.split(`/${INTEGRATION_LIST_PATH_PREFIX}/`)
if (!writeKey) writeKey = DEFAULT_WRITE_KEY
const endpoint = `https://cdn.segment.com/v1/projects/${writeKey}/integrations`
return await fetch(new Request(endpoint, new Request(request, { body })))
}
/**
* Serve first party anonymousID echo API
*
* @param {Request} request
* @param {Object} config
*/
async function handleEcho(request, { anonymousId }) {
if (anonymousId) {
return new Response(JSON.stringify({ anonymousId }), {
headers: new Headers({
'Content-Type': 'application/json'
})
})
}
new Response('No AnonymousId', { status: 404 })
}
/**
* HELPERS
*/
/**
* Check if url path begins with a specified prefix
* @param {NAMESPACE} KV
*/
async function hydrateConfig(KV) {
const keys = [
'cookie_name',
'script_path_prefix',
'collection_api_path_prefix',
'integration_list_path_prefix',
'refresh_threshold',
'default_write_key'
]
return Promise.all(
keys.map(async k => {
return { [k.toUpperCase()]: (await KV.get(k)) || '' }
})
).reduce((config, { key, storedKValue }) => {
if (storedKValue) {
config[key] = storedKValue
}
return config
}, STATIC_CONFIG)
}
/**
* Check if url path begins with a specified prefix
*/
function startsWith(url, prefix) {
if (url.pathname.startsWith(`/${prefix}`)) return true
return false
}
/**
* Check if the anonymousId is due to be refreshed
* (ie. is our expiration closer than our threshold window allows?)
*/
function expiresSoon(when, REFRESH_THRESHOLD) {
// eg. 45 days from now
const threshold = new Date()
threshold.setDate(threshold.getDate() + REFRESH_THRESHOLD)
// is expiration in less than eg. 45 days?
if (when < threshold) return true
else return false
}
/**
* Encode a cookie string suited for our use case
*/
function createCookie(name, value, expires) {
return `${encodeURIComponent(name)}=${encodeURIComponent(
value
)}; Expires=${expires.toUTCString()}; SameSite=Strict; Secure; HttpOnly`
}
/**
* Generate a spec-compliant uuid-v4
* adapted from: https://gist.github.com/bentranter/ed524091170137a72c1d54d641493c1f
*/
function uuid() {
const bytes = crypto.getRandomValues(new Uint8Array(16))
bytes[6] = (bytes[6] & 0x0f) | 0x40
bytes[8] = (bytes[8] & 0xbf) | 0x80
const chars = [...bytes].map(byte => byte.toString(16))
const insertionPoints = [4, 6, 8, 10]
return chars.reduce((uuid, char, index) => {
if (insertionPoints.includes(index)) {
return (uuid += `-${char}`)
} else {
return (uuid += char)
}
})
}
/**
* Grabs the anonymousId and expiration time from the cookies in the request header
*
* Adapted from: https://developers.cloudflare.com/workers/templates/pages/cookie_extract/
*
* @param {Request} request incoming Request
* @param {string} name of the edge-side cookie
*/
function getCookieData(request, name) {
let anonymousId = null
let expires = null
let cookieString = request.headers.get('Cookie')
if (cookieString) {
let cookies = cookieString.split(';')
cookies.forEach(cookie => {
let cookieName = cookie.split('=')[0].trim()
if (cookieName === name) {
anonymousId = cookie.split('=')[1]
}
if (cookieName === `${name}_set`) {
expires = new Date(decodeURIComponent(cookie.split('=')[1]))
}
})
}
return { anonymousId, expires }
}
/**
* Ship the error with some helpful request context as JSON to the specified endpoint
*
* ADAPTED from https://github.com/bustle/cf-sentry/
*
* @param {String} endpoint
* @param {Error} err the error
* @param {Request} request incoming Request
*/
async function log(endpoint, err, request) {
const body = JSON.stringify(errToJson(err, request))
const res = await fetch(endpoint, {
method: 'POST',
headers: {
'Content-Type': 'application/json'
},
body
})
if (res.status === 200) {
return
}
// We couldn't send to error endpoint, try to log the response at least
console.error({ httpStatus: res.status, ...(await res.json()) }) // eslint-disable-line no-console
}
/**
* Encode the parsed and formatted error as JSON
*
* ADAPTED from https://github.com/bustle/cf-sentry/
*
* @param {Error} err the error
* @param {Request} request incoming Request
*/
function errToJson(err, request) {
const errType = err.name || (err.contructor || {}).name
const frames = parse(err)
const extraKeys = Object.keys(err).filter(
key => !['name', 'message', 'stack'].includes(key)
)
return {
message: errType + ': ' + (err.message || '<no message>'),
exception: {
values: [
{
type: errType,
value: err.message,
stacktrace: frames.length ? { frames: frames.reverse() } : undefined
}
]
},
extra: extraKeys.length
? {
[errType]: extraKeys.reduce((obj, key) => ({ ...obj, [key]: err[key] }), {})
}
: undefined,
platform: 'worker',
timestamp: Date.now() / 1000,
request:
request && request.url
? {
method: request.method,
url: request.url,
query_string: request.query,
headers: request.headers,
data: request.body
}
: undefined
}
}
/**
* Parse errors.
*
* ADAPTED from https://github.com/bustle/cf-sentry/
*
* @param {Error} err the error\
*/
function parse(err) {
return (err.stack || '')
.split('\n')
.slice(1)
.map(line => {
if (line.match(/^\s*[-]{4,}$/)) {
return { filename: line }
}
// From https://github.com/felixge/node-stack-trace/blob/1ec9ba43eece124526c273c917104b4226898932/lib/stack-trace.js#L42
const lineMatch = line.match(
/at (?:(.+)\s+\()?(?:(.+?):(\d+)(?::(\d+))?|([^)]+))\)?/
)
if (!lineMatch) {
return
}
return {
function: lineMatch[1] || undefined,
filename: lineMatch[2] || undefined,
lineno: +lineMatch[3] || undefined,
colno: +lineMatch[4] || undefined,
in_app: lineMatch[5] !== 'native' || undefined
}
})
.filter(Boolean)
}
@mmcspiritt
Copy link

Come back!!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment