Skip to content

Instantly share code, notes, and snippets.

@crazy4groovy
Last active November 14, 2023 22:56
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save crazy4groovy/02c34adba3c25c54bf8bbccf9d4c431f to your computer and use it in GitHub Desktop.
Save crazy4groovy/02c34adba3c25c54bf8bbccf9d4c431f to your computer and use it in GitHub Desktop.
scrape midjourney "recent showcase" images (into folder, per hour) (JavaScript, Deno)
import { Timeout, TimeoutError } from "https://deno.land/x/timeout/mod.ts"
const delay = (ms) => new Promise((res) => setTimeout(res, ms));
function newThrottler({ isBusy, lock, unlock, waitMs, size }) {
async function throttler(cb, ...args) {
size(1);
await Promise.resolve();
while (!!isBusy()) {
await delay(waitMs()); // waits in event loop queue, until it interrupts for another attempt!
}
lock();
// ... DO ALL WORK for result
const result = await cb.call(this, ...args);
unlock();
size(-1);
return result;
}
throttler.size = () => size(); // read-only
throttler.isBusy = isBusy;
return throttler;
}
const throttler = (threads: number) => newThrottler((function(){
let size = 0;
let semaphore = 0;
return {
isBusy: () => (semaphore >= threads),
lock: () => (semaphore += 1),
unlock: () => (semaphore -= 1),
waitMs: () => 1000 + (1000 * Math.random()),
size: (n) => n ? (size += n) : size,
};
})());
export const createDownloadThrottled =
(threads: number) => {
const thr = throttler(threads);
function dl(...args) {
thr(downloadFile, ...args);
};
Object.entries(thr).forEach(([k, v]) => dl[k] = v );
return dl;
}
// Eg. const dl = createDownloadThrottled(5)
// dl('https://a.com/1.jpg', '~/imgs/1.jpg', myHeaders)
// console.log(dl.size());
// console.log(dl.isBusy());
export async function downloadFile(
source: string,
destination: string,
headers: any = {},
): Promise<boolean> {
try {
const req = fetch(source, { headers });
const response = await Timeout.race([req], headers.timeoutMs || 9000);
delete headers.timeoutMs;
const blob = await response.blob();
const data = new Uint8Array(await blob.arrayBuffer());
const file = await Deno.create(destination);
await Deno.writeAll(file, data);
Deno.close(file.rid);
return true;
} catch(err) {
console.error(`ERROR while dl'ing ${source}:`, err.message);
if(err instanceof TimeoutError) {
console.error(`ERROR Timed out; skipping: ${source}`);
return false;
}
console.error("---RETRYING...");
await delay(1000);
return downloadFile(source, destination, headers);
}
}
// https://chriszarate.github.io/bookmarkleter/
async function saveImageToDisk(directoryHandle, imageUrl, filename) {
const fileHandle = await directoryHandle.getFileHandle(filename, { create: true })
const outStream = await fileHandle.createWritable()
const response = await fetch(imageUrl)
const blob = await response.blob()
await outStream.write(blob)
await outStream.close()
}
async function saveImagesToDisk(directoryHandle, imageUrls, filenames, root) {
try {
const subdirectoryHandle = await directoryHandle.getDirectoryHandle(root, { create: false })
const prs = imageUrls.map(
async (imageUrl, i) => saveImageToDisk(subdirectoryHandle, imageUrl, filenames[i], root))
await Promise.all(prs)
console.log(imageUrls.length + ' Images saved to disk successfully!')
} catch (error) {
console.error('Error saving images:', error)
}
}
async function crudJobs(directoryHandle, date, newJobs) {
let create = true
for await (const e of directoryHandle.entries()) {
create && (e[0] === date) && (create = false)
if (!create) break;
}
const subdirectoryHandle = await directoryHandle.getDirectoryHandle(date, { create })
const fileHandle = await subdirectoryHandle.getFileHandle(`___jobs.yaml`, { create })
if (newJobs) {
const outStream = await fileHandle.createWritable()
await outStream.write(newJobs)
await outStream.close()
return
}
return fileHandle
.getFile()
.then(f => f.text())
.then(txt => txt.split('\n').filter(Boolean))
}
let directoryHandle
async function downloadImages(rows, imageUrls, filenames) {
const today = new Date()
const yesterday = new Date()
yesterday.setDate(today.getDate() - 1)
console.log('TODAY:', today)
try {
directoryHandle = directoryHandle || await window.showDirectoryPicker()
const jobs = [
await crudJobs(directoryHandle, today.toJSON().split('T')[0]),
await crudJobs(directoryHandle, yesterday.toJSON().split('T')[0])
]
for (let i = 0; i < rows.length; i++) {
if (jobs.flat().includes(rows[i])) {
rows.splice(i, 1)
imageUrls.splice(i, 1)
filenames.splice(i, 1)
i--;
}
}
await saveImagesToDisk(directoryHandle, imageUrls, filenames, today.toJSON().split('T')[0])
const newJobs = [...jobs[0], ...rows].filter(Boolean).join('\n')
await crudJobs(directoryHandle, today.toJSON().split('T')[0], newJobs)
} catch (error) {
console.error('Error requesting file system permission:' + error.message)
}
}
const BUILD_REGEX = /"buildId":"([^"]+)"/
// https://www.midjourney.com/_next/data/${buildId}/showcase/recent.json
async function main() {
if (window.location.href !== 'https://legacy.midjourney.com/showcase/recent/')
window.location.href = 'https://legacy.midjourney.com/showcase/recent/'
const html = await fetch('.').then(r => r.text()) // get latest
const id = BUILD_REGEX.exec(html)[1]
id || console.log('Not Found: buildId')
const r = await fetch(`https://legacy.midjourney.com/_next/data/${id}/showcase/recent.json`)
.then(r => r.json())
const jobRows = r.pageProps.jobs
.filter(j => j.event.seedImageURL)
.map(j => [j.username, j.reference_job_id, j.event])
const { rows, imageUrls, filenames } = jobRows.reduce((m, row) => {
const [username, id, event] = row
const filename = `${username.replace(/[^\w]/g, '-')}__${id}.png`
m.rows.push('- ' + JSON.stringify(row))
m.imageUrls.push(event.seedImageURL)
m.filenames.push(filename)
return m
}, { rows:[], imageUrls:[], filenames:[] })
// Example usage
// const imageUrls = [
// 'https://cdn.midjourney.com/34c81180-ae94-4f78-b293-0f65af104168/0_2.png'
// ]
await downloadImages(rows, imageUrls, filenames)
setTimeout(main, 60 * 60 * 1000)
}
main()
import { ensureDirSync, existsSync } from 'https://deno.land/std/fs/mod.ts';
import { createDownloadThrottled } from "./dl.deno.ts";
const dl = createDownloadThrottled(4);
const waitMin = 9;
const rootFolder = '___raw-images';
const imgSet = new Set();
const wait = () => new Promise(r => setTimeout(r, 1000 * 60 * waitMin + (1000 * 60 * Math.random())))
while(true) {
console.log('TRY', new Date().toJSON());
const h = await fetch('https://legacy.midjourney.com/showcase/recent/')
.then(r => r.text())
.catch(() => null);
if (!h) {
await wait(); continue;
}
const id = h.match(/buildId\":\"([^\"]+)/gm)[0].split('"').pop();
const r = await fetch(`https://legacy.midjourney.com/_next/data/${id}/showcase/recent.json`)
.then(r => r.json())
.catch(() => null);
if (!r) {
await wait(); continue;
}
if (!r || !r.pageProps.jobs.length) { await wait(); continue; }
const jobs = r.pageProps.jobs.map(j => [j.username, j.reference_job_id, j.event]).filter(([a, b, e]) => e.seedImageURL);
const rootFolder = '___raw-images';
let yesterday = new Date(); // current date and time
yesterday.setDate(yesterday.getDate() - 1); // set back one day
yesterday = yesterday.toJSON().split('T')[0];
for (let i = 0; i < 20; i++) {
let yesterday = new Date(); // current date and time
yesterday.setDate(yesterday.getDate() - i); // set back one day
yesterday = yesterday.toJSON().split('T')[0];
jobs.forEach(([username, id, event]) => {
const filename = `${username.replace(/[^\w]/g, '-')}__${id}.png`;
if (existsSync(`${rootFolder}/${yesterday}/${filename}`)) imgSet.add(filename);
if (existsSync(`${rootFolder}/${yesterday}/${filename.replace(/\.png/, '.jpg')}`)) imgSet.add(filename);
});
}
let now = new Date().toJSON().split('T')[0];
ensureDirSync(`${rootFolder}/${now}`);
jobs.forEach(([username, id, event]) => {
const filename = `${username.replace(/[^\w]/g, '-')}__${id}.png`;
if (imgSet.has(filename)) return;
imgSet.add(filename);
const file = `${rootFolder}/${now}/${filename}`;
dl(event.seedImageURL, file);
Deno.writeTextFileSync(
`${rootFolder}/${now}/___jobs.yaml`,
'\n- ' + JSON.stringify([username, id, event]),
{ create: true, append: true }
);
console.log('DL:', file);
})
await wait();
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment