albertodeago/index.js

## index.js
const puppeteer = require("puppeteer");
const fs = require("fs");

const pageUrl = "https://imgflip.com/memetemplates";
const viewport = {
  width: 1600,
  height: 1200,
};

const MEME_LIST_SELECTOR = ".mt-boxes";
const MEME_BOX_SELECTOR = `${MEME_LIST_SELECTOR} .mt-box`;
const MEME_TITLE_SELECTOR = `${MEME_LIST_SELECTOR} .mt-title`;
const MEME_SRC_SELECTOR = `${MEME_LIST_SELECTOR} .mt-img-wrap img`;
const NEXT_PAGE_SELECTOR_DISABLED = ".pager .pager-next.pager-disabled";
const NEXT_PAGE_SELECTOR = ".pager .pager-next";
/**
 * This is an helper function to wait some times. Usually we should not wait fixed
 * amount of times but I'm doing this to avoid "bombarding" imgflip. The last thing
 * that I want is to create issues to an amazing service like imgflip is.
 * @param {number} time
 * @returns {Promise<void>}
 */
const wait = (time) => new Promise((resolve) => setTimeout(resolve, time));

/**
 * Given a page, returns all the memes in the page in the format of title, id and image url
 * @returns {Promise<Array<{title: string, imageId: string, imageUrl: string}>}
 */
const getMemeList = async (page) => {
  return page.evaluate(
    (boxSelector, titleSelector, srcSelector) => {
      const memeList = [...document.querySelectorAll(boxSelector)];
      return memeList.map(($el) => {
        const title = $el
          .querySelector(titleSelector)
          .innerText.replace(/[^a-z0-9]/gi, "_")
          .toLowerCase();
        const href = $el
          .querySelector(srcSelector)
          .getAttribute("src")
          .replace("/", "");
        const hrefSplit = href.split("/");
        const imageId = hrefSplit[hrefSplit.length - 1];
        const imageUrl = "https://i.imgflip.com/" + imageId;

        return {
          title,
          imageId,
          imageUrl,
        };
      });
    },
    MEME_BOX_SELECTOR,
    MEME_TITLE_SELECTOR,
    MEME_SRC_SELECTOR
  );
};

/**
 * Given a puppeteer response, download the image and save it to the disk
 * @param {HTTPResponse} source
 * @param {string} title
 */
const downloadImage = async function (source, title) {
  const buffer = await source.buffer();
  fs.writeFileSync("./memes/" + title + ".jpg", buffer);
};

/**
 * Download all the memes in the provided page
 */
const downloadMemeInPage = async function (page) {
  let pageMemeList = await getMemeList(page);

  for (let i = 0; i < pageMemeList.length; ++i) {
    const memeObj = pageMemeList[i];
    const viewSource = await page.goto(memeObj.imageUrl);
    await wait(1000);

    // check if the response of the page is not an xml (xml means error page)
    const responseHeaders = viewSource.headers();
    const responseContentType = responseHeaders["content-type"];
    // console.log(responseContentType);
    if (!responseContentType || responseContentType.indexOf("xml") === -1) {
      // it's an image, download it
      await downloadImage(viewSource, memeObj.title);
    }

    // go back to go to the next image
    await page.goBack();
  }
};

async function run() {
  const browser = await puppeteer.launch({
    headless: false,
  });
  const page = await browser.newPage();
  await page.setViewport(viewport);
  await page.goto(pageUrl);

  let lastPage = false;
  while (!lastPage) {
    await downloadMemeInPage(page);
    lastPage = await page.evaluate((sel) => {
      return document.querySelector(sel) !== null;
    }, NEXT_PAGE_SELECTOR_DISABLED);
    if (!lastPage) await page.click(NEXT_PAGE_SELECTOR);
  }

  browser.close();
}

run();
	const puppeteer = require("puppeteer");
	const fs = require("fs");

	const pageUrl = "https://imgflip.com/memetemplates";
	const viewport = {
	width: 1600,
	height: 1200,
	};

	const MEME_LIST_SELECTOR = ".mt-boxes";
	const MEME_BOX_SELECTOR = `${MEME_LIST_SELECTOR} .mt-box`;
	const MEME_TITLE_SELECTOR = `${MEME_LIST_SELECTOR} .mt-title`;
	const MEME_SRC_SELECTOR = `${MEME_LIST_SELECTOR} .mt-img-wrap img`;
	const NEXT_PAGE_SELECTOR_DISABLED = ".pager .pager-next.pager-disabled";
	const NEXT_PAGE_SELECTOR = ".pager .pager-next";
	/**
	* This is an helper function to wait some times. Usually we should not wait fixed
	* amount of times but I'm doing this to avoid "bombarding" imgflip. The last thing
	* that I want is to create issues to an amazing service like imgflip is.
	* @param {number} time
	* @returns {Promise<void>}
	*/
	const wait = (time) => new Promise((resolve) => setTimeout(resolve, time));

	/**
	* Given a page, returns all the memes in the page in the format of title, id and image url
	* @returns {Promise<Array<{title: string, imageId: string, imageUrl: string}>}
	*/
	const getMemeList = async (page) => {
	return page.evaluate(
	(boxSelector, titleSelector, srcSelector) => {
	const memeList = [...document.querySelectorAll(boxSelector)];
	return memeList.map(($el) => {
	const title = $el
	.querySelector(titleSelector)
	.innerText.replace(/[^a-z0-9]/gi, "_")
	.toLowerCase();
	const href = $el
	.querySelector(srcSelector)
	.getAttribute("src")
	.replace("/", "");
	const hrefSplit = href.split("/");
	const imageId = hrefSplit[hrefSplit.length - 1];
	const imageUrl = "https://i.imgflip.com/" + imageId;

	return {
	title,
	imageId,
	imageUrl,
	};
	});
	},
	MEME_BOX_SELECTOR,
	MEME_TITLE_SELECTOR,
	MEME_SRC_SELECTOR
	);
	};

	/**
	* Given a puppeteer response, download the image and save it to the disk
	* @param {HTTPResponse} source
	* @param {string} title
	*/
	const downloadImage = async function (source, title) {
	const buffer = await source.buffer();
	fs.writeFileSync("./memes/" + title + ".jpg", buffer);
	};

	/**
	* Download all the memes in the provided page
	*/
	const downloadMemeInPage = async function (page) {
	let pageMemeList = await getMemeList(page);

	for (let i = 0; i < pageMemeList.length; ++i) {
	const memeObj = pageMemeList[i];
	const viewSource = await page.goto(memeObj.imageUrl);
	await wait(1000);

	// check if the response of the page is not an xml (xml means error page)
	const responseHeaders = viewSource.headers();
	const responseContentType = responseHeaders["content-type"];
	// console.log(responseContentType);
	if (!responseContentType \|\| responseContentType.indexOf("xml") === -1) {
	// it's an image, download it
	await downloadImage(viewSource, memeObj.title);
	}

	// go back to go to the next image
	await page.goBack();
	}
	};

	async function run() {
	const browser = await puppeteer.launch({
	headless: false,
	});
	const page = await browser.newPage();
	await page.setViewport(viewport);
	await page.goto(pageUrl);

	let lastPage = false;
	while (!lastPage) {
	await downloadMemeInPage(page);
	lastPage = await page.evaluate((sel) => {
	return document.querySelector(sel) !== null;
	}, NEXT_PAGE_SELECTOR_DISABLED);
	if (!lastPage) await page.click(NEXT_PAGE_SELECTOR);
	}

	browser.close();
	}

	run();