Skip to content

Instantly share code, notes, and snippets.

@GregBrimble
Last active March 4, 2021 10:45
Show Gist options
  • Save GregBrimble/cf110434f6e48829125c3eb2ca70ce3d to your computer and use it in GitHub Desktop.
Save GregBrimble/cf110434f6e48829125c3eb2ca70ce3d to your computer and use it in GitHub Desktop.
A Cloudflare Workers proxy for Archive.org
addEventListener('fetch', event => {
event.respondWith(handleError(handleRequest, event.request))
})
class InvalidURLPathnameError extends Error {
constructor(pathname) {
super(`Invalid URL pathname: ${pathname}`)
this.name = "InvalidURLPathnameError"
}
}
const BOTTLE_REGEX = /^(.*)\.\w+\.bottle(?:\.\d+)?\.tar\.gz$/i
const extractPartsFromURL = (url) => {
try {
const parts = url.pathname.split('/')
const org = parts[1]
const packageNameVersion = BOTTLE_REGEX.exec(parts[3])[1]
const packageName = packageNameVersion.substring(0, packageNameVersion.lastIndexOf('-'))
switch (org) {
case 'linuxbrew': {
return {
item: org,
file: `bottles/${parts[3]}`
}
}
case 'brewsci': {
const repo = parts[2] === 'core' ? 'bottles' : `bottles-${parts[2]}`
return {
item: org,
file: `${repo}/${parts[3]}`
}
}
default: {
const repo = parts[2] === 'bottles' ? 'core' : parts[2].split('-')[1]
return {
item: `${org}-${repo}-${packageName}`,
file: parts[3]
}
}
}
} catch (error) {
throw new InvalidURLPathnameError(url.pathname)
}
}
const flattenRedirectResponseToRequest = (response, request) => {
if (response.headers.has('location')) {
let redirectURL = response.headers.get('location')
if (!redirectURL.startsWith('http')) {
// redirectURL is a relative URI and must be resolved against the request URI: https://tools.ietf.org/html/rfc7231#section-7.1.2
redirectURL = new URL(redirectURL, request).toString()
}
return new Request(redirectURL)
}
}
const getFileMetadata = async ({ item, file }) => {
const request = new Request(`https://${item}.s3.us.archive.org/${file}`)
let response = await fetch(request, { method: 'HEAD', cf: { cacheEverything: true }})
const redirectRequest = flattenRedirectResponseToRequest(response, request)
if (redirectRequest) {
response = await fetch(redirectRequest, { method: 'HEAD', cf: { cacheEverything: true, cacheKey: `metadata:${item}:${file}` }})
}
if (response.ok) {
return {
digest: `MD5=${response.headers.get('ETag').replaceAll('"', '')}`
// Also available: upload time, file size, meta description etc.
}
} else {
return {}
}
}
const handleError = async (handler, request) => {
try {
return await handler(request)
} catch (error) {
if (error instanceof InvalidURLPathnameError) {
return new Response("Could not parse URL pathname", { status: 400 })
} else {
return new Response(error.message)
// TODO: Log error
return new Response("Internal Error", { status: 500 })
}
}
}
/***
* Main handler function
*/
const handleRequest = async (request) => {
const url = new URL(request.url)
const pathname = url.pathname
const { item, file } = extractPartsFromURL(url)
const transformedRequest = new Request(`https://archive.org/download/${item}/${file}`)
// Makes a GET request to the transformed URL (on Archive.org) and cache the response
// We can easily customize the caching rules: https://developers.cloudflare.com/workers/examples/cache-using-fetch
let response = await fetch(transformedRequest, { cf: { cacheEverything: true }})
const redirectRequest = flattenRedirectResponseToRequest(transformedRequest, request)
if (redirectRequest) {
response = await fetch(redirectRequest, { cf: { cacheEverything: true, cacheKey: `${item}:${file}` }})
}
if (response.ok) {
const metadata = await getFileMetadata({ item, file })
return new Response(response.body, {
status: response.status,
statusText: response.statusText,
headers: {
...(Object.fromEntries(response.headers.entries())),
"X-ARCHIVE-URL": transformedRequest.url,
...metadata
}
})
} else if (response.status === 404) {
return new Response("Not Found", { status: 404 })
} else {
return new Response("Bad Gateway", { status: 502 })
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment