Skip to content

Instantly share code, notes, and snippets.

@Darep
Created June 15, 2024 22:17
Show Gist options
  • Save Darep/46c0aa7f65753291f7140e4c09f8405c to your computer and use it in GitHub Desktop.
Save Darep/46c0aa7f65753291f7140e4c09f8405c to your computer and use it in GitHub Desktop.
Scrape Flockler site articles and/or images to local files.
/**
* Scrape Flockler site articles and/or images to local files.
*
* Usage:
* $ bun fl-scrape.ts --get-articles --get-images
*/
// change this to your Flockler site ID
const SITE_ID = 1;
let articles = [] as { cover_url: string; body: string }[];
async function getArticles() {
let older = null;
const response = await fetch(
`https://api.flockler.com/v1/sites/${SITE_ID}/articles`
);
const data = await response.json();
articles = data.articles;
older = data.pagination.older;
console.log(data.articles);
while (older) {
console.log(older);
const response = await fetch(older);
const data = await response.json();
articles = articles.concat(data.articles);
older = data.pagination.older;
}
await Bun.write('flockler-export.json', JSON.stringify(articles, null, 2));
}
async function downloadImage(url: string) {
const origUrl = url
.replace(/_(s|c|l|m)\d+x\d+/, '')
.replace(/_m\d+x\d+/, '')
.replace(/_q\d+/, '')
.replace('_noupscale', '')
.replace('_contain', '');
const filename = url.match(
/https:\/\/flockler\.com\/thumbs\/sites\/\d+\/([^\/"]+)/
)?.[1];
if (!filename) {
console.error('error: no filename for url', url);
return;
}
const origFilename = filename
.replace(/_(s|c|l|m)\d+x\d+/, '')
.replace(/_m\d+x\d+/, '')
.replace(/_q\d+/, '')
.replace('_noupscale', '')
.replace('_contain', '');
console.log(`Downloading ${filename}`);
const thumbResponse = await fetch(url);
const thumbPath = `images/thumbs/${filename}`;
await Bun.write(thumbPath, thumbResponse);
console.log(`Saved thumb to ${thumbPath}`);
const origResponse = await fetch(origUrl);
const origPath = `images/files/${origFilename}`;
await Bun.write(origPath, origResponse);
console.log(`Saved original to ${origPath}`);
}
async function getImages(useFile) {
if (useFile) {
const file = Bun.file('flockler-export.json');
articles = JSON.parse(await file.text());
} else if (!articles) {
console.error('error: no articles!');
return;
}
for (const article of articles) {
if (
article.cover_url &&
article.cover_url.includes(`flockler.com/thumbs/sites/${SITE_ID}`)
) {
downloadImage(article.cover_url);
}
const imageMatches = (article.body as string).matchAll(
/https:\/\/flockler\.com\/thumbs\/sites\/\d+\/([^\/"]+)/g
);
for (const img of imageMatches) {
const imgUrl = img[0];
if (imgUrl) {
downloadImage(imgUrl);
}
}
}
}
const args = Bun.argv;
const shouldGetArticles = args.includes('--get-articles');
const shouldGetImages = args.includes('--get-images');
if (shouldGetArticles) {
console.log('Getting articles');
getArticles();
console.log('Done!');
}
if (shouldGetImages) {
console.log('Getting images');
getImages(!shouldGetArticles);
console.log('Done!');
}
if (!shouldGetArticles && !shouldGetImages) {
console.log('No action specified. Use --get-articles and --get-images');
}
export {};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment