Skip to content

Instantly share code, notes, and snippets.

@bjoerge
Created March 27, 2020 09:44
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bjoerge/5922173cf964a496a1a5cb51a906c7c2 to your computer and use it in GitHub Desktop.
Save bjoerge/5922173cf964a496a1a5cb51a906c7c2 to your computer and use it in GitHub Desktop.
Find missing assets in dataset
import got from "got"
import createClient from "@sanity/client"
import {asyncScheduler, concat, defer, EMPTY, from} from "rxjs"
import {
map,
mergeMap,
retry,
scan,
takeLast,
tap,
throttleTime,
timeout,
} from "rxjs/operators"
import {uniq} from "./uniq"
const BATCH_SIZE = 1000
const START_AT = 0
const PROJECT_ID = '<projectId>'
const DATASET = '<dataset>'
const client = createClient({
projectId: PROJECT_ID,
dataset: DATASET,
useCdn: true,
})
const checkMissingImage = (url) =>
defer(() => got.head(url, {throwHttpErrors: false})).pipe(
timeout(10000),
map((response) => response.statusCode),
tap((statusCode) => {
if (statusCode !== 200 && statusCode !== 404) {
throw new Error(`Unexpected status code ${statusCode} from HEAD ${url}`)
}
}),
retry(4),
map((statusCode) => statusCode === 200)
)
interface BatchProgress {
batch: [number, number]
remainingInBatch: number
missingUrls: string[]
}
const getNextBatch = (start, end) =>
defer(() =>
client.observable.fetch(
`*[_type == "sanity.imageAsset"] | order(_createdAt asc) [${start}...${end}].url`
)
).pipe(
mergeMap((imageUrls) => {
if (imageUrls.length === 0) {
return EMPTY
}
return concat(
from(imageUrls).pipe(
mergeMap(
(url) =>
checkMissingImage(url).pipe(
map((exists) => (exists ? null : url))
),
500
),
scan(
(acc: BatchProgress, missingUrl: string | null): BatchProgress => ({
...acc,
missingUrls: acc.missingUrls.concat(missingUrl ? missingUrl : []),
remainingInBatch: acc.remainingInBatch - 1,
}),
{
batch: [start, end],
remainingInBatch: imageUrls.length,
missingUrls: [],
}
)
),
getNextBatch(end, end + BATCH_SIZE)
)
})
)
const run$ = getNextBatch(START_AT, START_AT + BATCH_SIZE).pipe(
scan(
(acc: BatchProgress | null, nextState: BatchProgress): BatchProgress => ({
...nextState,
missingUrls: acc
? uniq(acc.missingUrls.concat(nextState.missingUrls))
: [],
})
),
throttleTime(500, asyncScheduler, {leading: true, trailing: true}),
tap((state: BatchProgress) => {
console.clear()
console.log(
"Current batch: %d…%d (%d remaining in batch)",
state.batch[0],
state.batch[1],
state.remainingInBatch
)
console.log(
"Found %d missing image(s) so far\n%s",
state.missingUrls.length,
state.missingUrls.join("\n")
)
}),
takeLast(1),
map((final: BatchProgress) => {
console.clear()
console.log(
"Done! Found %d missing images\n%s",
final.missingUrls.length,
final.missingUrls.join("\n")
)
})
)
run$.subscribe()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment