peolic/scrapers-list.js

## scrapers-list.js
#!/usr/bin/env node
// @ts-check
'use strict';

// Execute with working directory @ root of CommunityScrapers
// Dependency: https://npmjs.com/package/yaml

const fs = require('fs');
const path = require('path');

const _moduleSearchPaths = (mod) => ([
  path.resolve(process.cwd(), './validator/node_modules'),
  ...(require.resolve.paths(mod) || []),
]);
const _yamlModule = require.resolve('yaml', { paths: _moduleSearchPaths('yaml') });

const YAML = require(_yamlModule);

// -------------
// Configuration
// -------------

/**
 * Separate Markdown table columns using ` | ` instead of `|`?
 */
const addSpacesBetweenColumns = false;

/**
 * Array of prefixes to remove from all hosts.
 * @type {string[]}
 */
const unwantedPrefixes = [
  'en',
  'free',
  'new',
  'tour',
  'www',
];

/**
 * A list of sites that have not-standard URLs in the list.
 * Example, for source url `www.mypornsite.xxx/scenes/`:
 *   Keys are the generated hosts to override, can be either one of:
 *     (note that some prefixes are always stripped from the hostname, check `unwantedPrefixes` above)
 *     1. Hostname            = `mypornsite.xxx`
 *     2. Hostname + Pathname = `mypornsite.xxx/scenes/`
 *   Values can be either a string or a list of strings to override the host with.
 * @type {{ [site: string]: string|string[] }}
 */
const siteNameOverride = {
  'api.metadataapi.net': 'metadataapi.net (JSON API)',
  'enasianmusume.kin8tengoku.com': 'kin8tengoku.com',
  'metadataapi.net': 'metadataapi.net (URL)',
  'mgstage.com': 'www.mgstage.com',
  'newsensations.com/tour_ns/dvds': 'newsensations.com/tour_ns/',
  'newsensations.com/tour_ns/updates': 'newsensations.com/tour_ns/',
  'newsensations.com/tour_rs/': 'newsensations.com/tour_rs/',
  'purgatoryx.com': 'tour.purgatoryx.com',
  'trans500.com/tour/': 'trans500.com/tour/',
  'trans500.com/tour3/': 'trans500.com/tour3/',
  'trans500.com/tourespanol': 'trans500.com/tourespanol',
  'wicked.com/en/movie/': 'wicked.com (/movies)',
};

/**
 * @type {{ [lookupKey: string]: string[] }}
 */
const scriptSites = {
  'AdultimeAPI.yml|scene': ['adultime.com'],
  'IAFD.yml|performer': ['iafd.com'],
  'JacquieEtMichelTV.yml|scene': ['jacquieetmicheltv.net'],
  'JavLibrary_python.yml|scene': [],
  'MindGeekAPI.yml|scene': [],
  'multiscrape.yml|performer': [],
  'performer-image-dir.yml|performer': [],
  'stash-sqlite.yml|performer': [],
  'torrent.yml|scene': [],
  'xbvrdb.yml|scene': [],
};

// -------------

/**
 * @typedef ScraperHost
 * @property {string} host
 * @property {boolean} [scene]
 * @property {boolean} [performer]
 * @property {boolean} [movie]
 * @property {boolean} [gallery]
 * @property {boolean} usesPython
 * @property {boolean} usesNode
 */

/**
 * @typedef Scraper
 * @property {string} fileName
 * @property {ScraperHost[]} hosts
 * @property {boolean} usesCDP
 */

const columnSep = addSpacesBetweenColumns ? ' | ' : '|';

const escapeRegex = (/** @type {string} */string) => string.replace(/[-\/\\^$*+?.()|[\]{}]/g, '\\$&');
const prefixes = unwantedPrefixes.map(escapeRegex).join('|');
const prefixesPattern = new RegExp(String.raw`^(${prefixes})\.`);

/**
 * Make valid hosts for the scrapers list from a partial URL string.
 * @param {string} url
 * @returns {string[]}
 */
const makeHosts = (url) => {
  const urlObj = new URL(url.replace(/^(?!https?:)/, 'http://'));
  const hostname = urlObj.hostname.replace(prefixesPattern, '');

  const overrideKeys = [
    hostname,
    hostname + urlObj.pathname,
  ];

  let result = undefined;
  let idx = 0;
  while (idx < overrideKeys.length && result === undefined) {
    result = siteNameOverride[overrideKeys[idx]];
    idx++;
  }

  if (result === undefined) {
    result = [hostname];
  }

  return result instanceof Array ? result : [result];
};

const queryURLVariablePattern = /(?<=\{)([a-z]+)(?!=\})/g;

/**
 * @param {string} [queryURL]
 * @param {Object.<string, { regex: string, with: string }[]>} [queryURLReplace]
 */
const parseQueryURL = (queryURL, queryURLReplace) => {
  if (!queryURL) {
    return [];
  }
  if (!queryURLReplace) {
    return [queryURL];
  }
  const urls = [];
  queryURL.match(queryURLVariablePattern).forEach((key) => {
    const keyPattern = new RegExp(String.raw`\{${key}\}`, 'g');
    if (!queryURLReplace[key]) return;
    queryURLReplace[key].forEach(({ regex, with: repl }) => {
      if (regex.trim() === '$') return;
      urls.push(queryURL.replace(keyPattern, repl));
    });
  });
  return urls;
};

/**
 * Zip two arrays of equal length.
 * @param {any[]} a
 * @param {any[]} b
 */
const zip = (a, b) => a.map((k, i) => [k, b[i]]);

const listPath = path.resolve(process.cwd(), './SCRAPERS-LIST.md');

/**
 * @returns {string[]}
 */
const getScrapers = () => {
  const scrapersDir = path.resolve(process.cwd(), './scrapers');
  return fs.readdirSync(scrapersDir).reduce(
    (acc, fname) => (fname.endsWith('.yml') ? acc.concat(path.join(scrapersDir, fname)) : acc),
    []
  );
};

let markdownHeader = '';

// Values here are only a fallback, the script grabs the current columns from the list below.
/** @type {{align: string, title: string}[]} */
let tableColumns = [
  {align: '' , title: 'Supported Site'},
  {align: '' , title: 'Scraper'},
  {align: '^', title: 'S'},
  {align: '^', title: 'G'},
  {align: '^', title: 'M'},
  {align: '^', title: 'P'},
  {align: '^', title: 'Needs'},
  {align: '^', title: 'Contents'},
];
const knownNeeds = [
  'cdp',
  // 'python3',
  // 'node',
];

/**
 * @returns {[Object.<string, string[]>, Object.<string, string>]}
 */
const _getCurrentData = () => {
  /** @type {Object.<string, string[]>} */
  const needsMap = {};
  /** @type {Object.<string, string>} */
  const contentMap = {};

  const md = fs.readFileSync(listPath, 'utf8');
  const lines = md.split(/\r?\n/g);
  const tableHeader = lines.findIndex((line) => (line.match(/\|/g) || []).length === (tableColumns.length - 1));

  markdownHeader = lines.slice(0, tableHeader).join('\n');

  /**
   * Joined column titles and headers.
   * @type {[string, string][]}
   */
  // @ts-ignore
  const tableColumnHeaders = zip(...lines.slice(tableHeader, tableHeader + 2).map((h) => h.split('|')));

  /** @type {{align: string, title: string}[]} */
  const currentTableColumns = tableColumnHeaders.map(([title, header]) => {
    title = title.trim();
    header = header.trim();

    const left = header.slice(0, 1);
    const right = header.slice(-1);
    const align = (left === ':' && right === ':') ? '^' : (right === ':') ? '>' : (left === ':') ? '<' : '';

    return { align, title };
  });

  if (currentTableColumns.length > 0) {
    tableColumns = currentTableColumns;
  }

  lines.slice(tableHeader + 2).forEach((line) => {
    const [
      host,
      , // fileName,
      , // scenes,
      , // gallery,
      , // movies,
      , // performers,
      needs,
      contents,
    ] = line.split('|');
    if (typeof contents === 'string' && contents.trim() !== '-') {
      contentMap[host.trim()] = contents.trim();
    }
    if (typeof needs === 'string' && needs.trim() !== '-') {
      needsMap[host.trim()] = needs
        .trim()
        .split(', ')
        .filter((n) => !knownNeeds.includes(n.toLowerCase()));
    }
  });

  return [needsMap, contentMap];
};

const [hostNeedsMap, hostContentMap] = _getCurrentData();

/**
 * @param {typeof tableColumns[number]} column
 * @returns {string}
 */
const makeColumnHeader = ({ align, title }) => {
  if (!align) {
    return '-'.repeat(Math.max(1, title.length));
  }

  const left = (align === '^' || align === '<') ? ':' : '';
  const right = (align === '^' || align === '>') ? ':' : '';
  const len = title.length - (left ? 1 : 0) - (right ? 1 : 0);

  return left + '-'.repeat(Math.max(1, len)) + right;
};

/**
 * @param {string[][]} accumulator
 * @param {typeof tableColumns[number]} column
 * @param {number} idx
 * @returns {string[][]}
 */
const tableHeaderReducer = ([titles, headers], column, idx) => {
  const columnHeader = makeColumnHeader(column);
  const len = column.title.length;
  const max = columnHeader.length;

  if (len >= max) {
    titles.push(column.title);
  } else {
    // Center
    titles.push(
      column.title
        .padStart(len + Math.floor((max - len) / 2), ' ')
        .padEnd(max, ' ')
    );
  }

  return [titles, headers.concat(columnHeader)];
};

/**
 * Icons to use
 * @param {boolean} val
 */
const getIcon = (val) => val ? ':heavy_check_mark:' : ':x:';

/**
 * @param {Scraper} scraper
 */
const makeTableEntry = (scraper) => {
  const { fileName, hosts, usesCDP } = scraper;
  return hosts.map((hostObj) => {
    const { host } = hostObj;

    const scene = getIcon(!!hostObj['scene']);
    const gallery = getIcon(!!hostObj['gallery']);
    const movie = getIcon(!!hostObj['movie']);
    const performer = getIcon(!!hostObj['performer']);

    /** @type {string[]} */
    const needsArray = [].concat(hostNeedsMap[host] || []);
    // Anything handled here should be added to the `knownNeeds` array.
    (usesCDP) && needsArray.push('CDP');
    // (hostObj.usesPython) && needsArray.push('python3');
    // (hostObj.usesNode) && needsArray.push('node');
    const needs = (needsArray.length > 0) ? needsArray.join(', ') : '-';

    // Content not available in scraper config files,
    // so use the data from the current list.
    const content = hostContentMap[host] || '-';

    return [host, fileName, scene, gallery, movie, performer, needs, content].join(columnSep);
  });
};

let results = [];

const mappingPattern = /^([a-z]+)By(Fragment|Name|URL)$/;

const yamlLoadOptions = {
  prettyErrors: true,
  version: '1.2',
};

for (const file of getScrapers()) {
  const relPath = path.relative(process.cwd(), file);

  let data;
  try {
    const contents = fs.readFileSync(file, 'utf8');
    data = YAML.parse(contents, yamlLoadOptions);
  } catch (error) {
    console.error(`\x1b[31mError parsing\x1b[0m ${relPath}:`);
    error.stack = null;
    console.error(error);
    break;
  }

  /** @type {Scraper} */
  const scraper = {
    fileName: path.basename(file),
    hosts: [],
    usesCDP: (data.driver && data.driver.useCDP)
  };

  Object.entries(data).forEach(([key, value]) => {
    const match = mappingPattern.exec(key);
    if (!match) {
      return;
    }

    /** @type {'scene'|'performer'|'movie'|'gallery'} */
    const objectType = (match[1]);
    /** @type {'URL'|'Fragment'|'Name'} */
    const scraperMethod = (match[2]);

    /**
     * @typedef ConfigSlim
     * @property {'scrapeXPath'|'scrapeJson'|'script'|'stash'} action
     * @property {string | string[]} [url]
     * @property {string} [queryURL]
     * @property {Object.<string, { regex: string, with: string }[]>} [queryURLReplace]
     * @property {string[]} [script]
     */

    /** @type {ConfigSlim[]} */
    (value instanceof Array ? value : [value]).forEach((config) => {
      const { action, url: urls, queryURL, queryURLReplace } = config;

      /** @type {Object.<string, boolean>} */
      const uses = {};
      if (action === 'script' && config.script) {
        uses.python = ['python', 'python3'].includes(config.script[0]);
        uses.node = config.script[0] === 'node';
      }

      const queryURLs = parseQueryURL(queryURL, queryURLReplace);

      if (urls || queryURLs.length) {
        for (const url of (urls || queryURLs)) {
          const hosts = makeHosts(url);

          for (const curHost of hosts) {
            if (/^\$\d/.test(curHost))
              continue;
            let hostItem = scraper.hosts.find(h => h.host === curHost);
            if (!hostItem) {
              const newLength = scraper.hosts.push(
                /** @type {ScraperHost} */
                ({ host: curHost })
              );
              hostItem = scraper.hosts[newLength - 1];
            }
            hostItem[objectType] = true;
            hostItem.usesPython = Boolean(uses.python);
            hostItem.usesNode = Boolean(uses.node);
          }
        }

      } else if (action === 'script') {
        /** @type {string[]} */
        let hosts;

        const lookupKey = `${scraper.fileName}|${objectType}`;
        if (scriptSites[lookupKey]) {
          hosts = scriptSites[lookupKey];
        } else {
          const hostObj = scraper.hosts.find((h) => h[objectType]);
          if (!hostObj) {
            console.log(`Skipped (unknown URL): ${scraper.fileName} ${key}`);
            return;
          }

          hosts = [hostObj.host];
        }

        for (const host of hosts) {
          let hostItem = scraper.hosts.find(h => h.host === host);
          if (!hostItem) {
            const newLength = scraper.hosts.push(
              /** @type {ScraperHost} */
              ({ host })
            );
            hostItem = scraper.hosts[newLength - 1];
          }
          hostItem[objectType] = true;
          hostItem.usesPython = Boolean(uses.python);
          hostItem.usesNode = Boolean(uses.node);
        }

      } else {
        console.log(`TODO: ${scraper.fileName} ${key}`);
      }
    });
  });

  results = results.concat(makeTableEntry(scraper));
}

const markdown = `${markdownHeader}
${
  tableColumns
    .reduce(tableHeaderReducer, [[], []])
    .map((arr) => arr.join(columnSep))
    .join('\n')
}
${results.sort((a, b) => a.toLowerCase().localeCompare(b.toLowerCase())).join('\n')}
`;

fs.writeFileSync(listPath, markdown, 'utf8');
	#!/usr/bin/env node
	// @ts-check
	'use strict';

	// Execute with working directory @ root of CommunityScrapers
	// Dependency: https://npmjs.com/package/yaml

	const fs = require('fs');
	const path = require('path');

	const _moduleSearchPaths = (mod) => ([
	path.resolve(process.cwd(), './validator/node_modules'),
	...(require.resolve.paths(mod) \|\| []),
	]);
	const _yamlModule = require.resolve('yaml', { paths: _moduleSearchPaths('yaml') });

	const YAML = require(_yamlModule);

	// -------------
	// Configuration
	// -------------

	/**
	* Separate Markdown table columns using ` \| ` instead of `\|`?
	*/
	const addSpacesBetweenColumns = false;

	/**
	* Array of prefixes to remove from all hosts.
	* @type {string[]}
	*/
	const unwantedPrefixes = [
	'en',
	'free',
	'new',
	'tour',
	'www',
	];

	/**
	* A list of sites that have not-standard URLs in the list.
	* Example, for source url `www.mypornsite.xxx/scenes/`:
	* Keys are the generated hosts to override, can be either one of:
	* (note that some prefixes are always stripped from the hostname, check `unwantedPrefixes` above)
	* 1. Hostname = `mypornsite.xxx`
	* 2. Hostname + Pathname = `mypornsite.xxx/scenes/`
	* Values can be either a string or a list of strings to override the host with.
	* @type {{ [site: string]: string\|string[] }}
	*/
	const siteNameOverride = {
	'api.metadataapi.net': 'metadataapi.net (JSON API)',
	'enasianmusume.kin8tengoku.com': 'kin8tengoku.com',
	'metadataapi.net': 'metadataapi.net (URL)',
	'mgstage.com': 'www.mgstage.com',
	'newsensations.com/tour_ns/dvds': 'newsensations.com/tour_ns/',
	'newsensations.com/tour_ns/updates': 'newsensations.com/tour_ns/',
	'newsensations.com/tour_rs/': 'newsensations.com/tour_rs/',
	'purgatoryx.com': 'tour.purgatoryx.com',
	'trans500.com/tour/': 'trans500.com/tour/',
	'trans500.com/tour3/': 'trans500.com/tour3/',
	'trans500.com/tourespanol': 'trans500.com/tourespanol',
	'wicked.com/en/movie/': 'wicked.com (/movies)',
	};

	/**
	* @type {{ [lookupKey: string]: string[] }}
	*/
	const scriptSites = {
	'AdultimeAPI.yml\|scene': ['adultime.com'],
	'IAFD.yml\|performer': ['iafd.com'],
	'JacquieEtMichelTV.yml\|scene': ['jacquieetmicheltv.net'],
	'JavLibrary_python.yml\|scene': [],
	'MindGeekAPI.yml\|scene': [],
	'multiscrape.yml\|performer': [],
	'performer-image-dir.yml\|performer': [],
	'stash-sqlite.yml\|performer': [],
	'torrent.yml\|scene': [],
	'xbvrdb.yml\|scene': [],
	};

	// -------------

	/**
	* @typedef ScraperHost
	* @property {string} host
	* @property {boolean} [scene]
	* @property {boolean} [performer]
	* @property {boolean} [movie]
	* @property {boolean} [gallery]
	* @property {boolean} usesPython
	* @property {boolean} usesNode
	*/

	/**
	* @typedef Scraper
	* @property {string} fileName
	* @property {ScraperHost[]} hosts
	* @property {boolean} usesCDP
	*/

	const columnSep = addSpacesBetweenColumns ? ' \| ' : '\|';

	const escapeRegex = (/** @type {string} /string) => string.replace(/[-\/\\^$+?.()\|[\]{}]/g, '\\$&');
	const prefixes = unwantedPrefixes.map(escapeRegex).join('\|');
	const prefixesPattern = new RegExp(String.raw`^(${prefixes})\.`);

	/**
	* Make valid hosts for the scrapers list from a partial URL string.
	* @param {string} url
	* @returns {string[]}
	*/
	const makeHosts = (url) => {
	const urlObj = new URL(url.replace(/^(?!https?:)/, 'http://'));
	const hostname = urlObj.hostname.replace(prefixesPattern, '');

	const overrideKeys = [
	hostname,
	hostname + urlObj.pathname,
	];

	let result = undefined;
	let idx = 0;
	while (idx < overrideKeys.length && result === undefined) {
	result = siteNameOverride[overrideKeys[idx]];
	idx++;
	}

	if (result === undefined) {
	result = [hostname];
	}

	return result instanceof Array ? result : [result];
	};

	const queryURLVariablePattern = /(?<=\{)([a-z]+)(?!=\})/g;

	/**
	* @param {string} [queryURL]
	* @param {Object.<string, { regex: string, with: string }[]>} [queryURLReplace]
	*/
	const parseQueryURL = (queryURL, queryURLReplace) => {
	if (!queryURL) {
	return [];
	}
	if (!queryURLReplace) {
	return [queryURL];
	}
	const urls = [];
	queryURL.match(queryURLVariablePattern).forEach((key) => {
	const keyPattern = new RegExp(String.raw`\{${key}\}`, 'g');
	if (!queryURLReplace[key]) return;
	queryURLReplace[key].forEach(({ regex, with: repl }) => {
	if (regex.trim() === '$') return;
	urls.push(queryURL.replace(keyPattern, repl));
	});
	});
	return urls;
	};

	/**
	* Zip two arrays of equal length.
	* @param {any[]} a
	* @param {any[]} b
	*/
	const zip = (a, b) => a.map((k, i) => [k, b[i]]);

	const listPath = path.resolve(process.cwd(), './SCRAPERS-LIST.md');

	/**
	* @returns {string[]}
	*/
	const getScrapers = () => {
	const scrapersDir = path.resolve(process.cwd(), './scrapers');
	return fs.readdirSync(scrapersDir).reduce(
	(acc, fname) => (fname.endsWith('.yml') ? acc.concat(path.join(scrapersDir, fname)) : acc),
	[]
	);
	};

	let markdownHeader = '';

	// Values here are only a fallback, the script grabs the current columns from the list below.
	/** @type {{align: string, title: string}[]} */
	let tableColumns = [
	{align: '' , title: 'Supported Site'},
	{align: '' , title: 'Scraper'},
	{align: '^', title: 'S'},
	{align: '^', title: 'G'},
	{align: '^', title: 'M'},
	{align: '^', title: 'P'},
	{align: '^', title: 'Needs'},
	{align: '^', title: 'Contents'},
	];
	const knownNeeds = [
	'cdp',
	// 'python3',
	// 'node',
	];

	/**
	* @returns {[Object.<string, string[]>, Object.<string, string>]}
	*/
	const _getCurrentData = () => {
	/** @type {Object.<string, string[]>} */
	const needsMap = {};
	/** @type {Object.<string, string>} */
	const contentMap = {};

	const md = fs.readFileSync(listPath, 'utf8');
	const lines = md.split(/\r?\n/g);
	const tableHeader = lines.findIndex((line) => (line.match(/\\|/g) \|\| []).length === (tableColumns.length - 1));

	markdownHeader = lines.slice(0, tableHeader).join('\n');

	/**
	* Joined column titles and headers.
	* @type {[string, string][]}
	*/
	// @ts-ignore
	const tableColumnHeaders = zip(...lines.slice(tableHeader, tableHeader + 2).map((h) => h.split('\|')));

	/** @type {{align: string, title: string}[]} */
	const currentTableColumns = tableColumnHeaders.map(([title, header]) => {
	title = title.trim();
	header = header.trim();

	const left = header.slice(0, 1);
	const right = header.slice(-1);
	const align = (left === ':' && right === ':') ? '^' : (right === ':') ? '>' : (left === ':') ? '<' : '';

	return { align, title };
	});

	if (currentTableColumns.length > 0) {
	tableColumns = currentTableColumns;
	}

	lines.slice(tableHeader + 2).forEach((line) => {
	const [
	host,
	, // fileName,
	, // scenes,
	, // gallery,
	, // movies,
	, // performers,
	needs,
	contents,
	] = line.split('\|');
	if (typeof contents === 'string' && contents.trim() !== '-') {
	contentMap[host.trim()] = contents.trim();
	}
	if (typeof needs === 'string' && needs.trim() !== '-') {
	needsMap[host.trim()] = needs
	.trim()
	.split(', ')
	.filter((n) => !knownNeeds.includes(n.toLowerCase()));
	}
	});

	return [needsMap, contentMap];
	};

	const [hostNeedsMap, hostContentMap] = _getCurrentData();

	/**
	* @param {typeof tableColumns[number]} column
	* @returns {string}
	*/
	const makeColumnHeader = ({ align, title }) => {
	if (!align) {
	return '-'.repeat(Math.max(1, title.length));
	}

	const left = (align === '^' \|\| align === '<') ? ':' : '';
	const right = (align === '^' \|\| align === '>') ? ':' : '';
	const len = title.length - (left ? 1 : 0) - (right ? 1 : 0);

	return left + '-'.repeat(Math.max(1, len)) + right;
	};

	/**
	* @param {string[][]} accumulator
	* @param {typeof tableColumns[number]} column
	* @param {number} idx
	* @returns {string[][]}
	*/
	const tableHeaderReducer = ([titles, headers], column, idx) => {
	const columnHeader = makeColumnHeader(column);
	const len = column.title.length;
	const max = columnHeader.length;

	if (len >= max) {
	titles.push(column.title);
	} else {
	// Center
	titles.push(
	column.title
	.padStart(len + Math.floor((max - len) / 2), ' ')
	.padEnd(max, ' ')
	);
	}

	return [titles, headers.concat(columnHeader)];
	};

	/**
	* Icons to use
	* @param {boolean} val
	*/
	const getIcon = (val) => val ? ':heavy_check_mark:' : ':x:';

	/**
	* @param {Scraper} scraper
	*/
	const makeTableEntry = (scraper) => {
	const { fileName, hosts, usesCDP } = scraper;
	return hosts.map((hostObj) => {
	const { host } = hostObj;

	const scene = getIcon(!!hostObj['scene']);
	const gallery = getIcon(!!hostObj['gallery']);
	const movie = getIcon(!!hostObj['movie']);
	const performer = getIcon(!!hostObj['performer']);

	/** @type {string[]} */
	const needsArray = [].concat(hostNeedsMap[host] \|\| []);
	// Anything handled here should be added to the `knownNeeds` array.
	(usesCDP) && needsArray.push('CDP');
	// (hostObj.usesPython) && needsArray.push('python3');
	// (hostObj.usesNode) && needsArray.push('node');
	const needs = (needsArray.length > 0) ? needsArray.join(', ') : '-';

	// Content not available in scraper config files,
	// so use the data from the current list.
	const content = hostContentMap[host] \|\| '-';

	return [host, fileName, scene, gallery, movie, performer, needs, content].join(columnSep);
	});
	};

	let results = [];

	const mappingPattern = /^([a-z]+)By(Fragment\|Name\|URL)$/;

	const yamlLoadOptions = {
	prettyErrors: true,
	version: '1.2',
	};

	for (const file of getScrapers()) {
	const relPath = path.relative(process.cwd(), file);

	let data;
	try {
	const contents = fs.readFileSync(file, 'utf8');
	data = YAML.parse(contents, yamlLoadOptions);
	} catch (error) {
	console.error(`\x1b[31mError parsing\x1b[0m ${relPath}:`);
	error.stack = null;
	console.error(error);
	break;
	}

	/** @type {Scraper} */
	const scraper = {
	fileName: path.basename(file),
	hosts: [],
	usesCDP: (data.driver && data.driver.useCDP)
	};

	Object.entries(data).forEach(([key, value]) => {
	const match = mappingPattern.exec(key);
	if (!match) {
	return;
	}

	/** @type {'scene'\|'performer'\|'movie'\|'gallery'} */
	const objectType = (match[1]);
	/** @type {'URL'\|'Fragment'\|'Name'} */
	const scraperMethod = (match[2]);

	/**
	* @typedef ConfigSlim
	* @property {'scrapeXPath'\|'scrapeJson'\|'script'\|'stash'} action
	* @property {string \| string[]} [url]
	* @property {string} [queryURL]
	* @property {Object.<string, { regex: string, with: string }[]>} [queryURLReplace]
	* @property {string[]} [script]
	*/

	/** @type {ConfigSlim[]} */
	(value instanceof Array ? value : [value]).forEach((config) => {
	const { action, url: urls, queryURL, queryURLReplace } = config;

	/** @type {Object.<string, boolean>} */
	const uses = {};
	if (action === 'script' && config.script) {
	uses.python = ['python', 'python3'].includes(config.script[0]);
	uses.node = config.script[0] === 'node';
	}

	const queryURLs = parseQueryURL(queryURL, queryURLReplace);

	if (urls \|\| queryURLs.length) {
	for (const url of (urls \|\| queryURLs)) {
	const hosts = makeHosts(url);

	for (const curHost of hosts) {
	if (/^\$\d/.test(curHost))
	continue;
	let hostItem = scraper.hosts.find(h => h.host === curHost);
	if (!hostItem) {
	const newLength = scraper.hosts.push(
	/** @type {ScraperHost} */
	({ host: curHost })
	);
	hostItem = scraper.hosts[newLength - 1];
	}
	hostItem[objectType] = true;
	hostItem.usesPython = Boolean(uses.python);
	hostItem.usesNode = Boolean(uses.node);
	}
	}

	} else if (action === 'script') {
	/** @type {string[]} */
	let hosts;

	const lookupKey = `${scraper.fileName}\|${objectType}`;
	if (scriptSites[lookupKey]) {
	hosts = scriptSites[lookupKey];
	} else {
	const hostObj = scraper.hosts.find((h) => h[objectType]);
	if (!hostObj) {
	console.log(`Skipped (unknown URL): ${scraper.fileName} ${key}`);
	return;
	}

	hosts = [hostObj.host];
	}

	for (const host of hosts) {
	let hostItem = scraper.hosts.find(h => h.host === host);
	if (!hostItem) {
	const newLength = scraper.hosts.push(
	/** @type {ScraperHost} */
	({ host })
	);
	hostItem = scraper.hosts[newLength - 1];
	}
	hostItem[objectType] = true;
	hostItem.usesPython = Boolean(uses.python);
	hostItem.usesNode = Boolean(uses.node);
	}

	} else {
	console.log(`TODO: ${scraper.fileName} ${key}`);
	}
	});
	});

	results = results.concat(makeTableEntry(scraper));
	}

	const markdown = `${markdownHeader}
	${
	tableColumns
	.reduce(tableHeaderReducer, [[], []])
	.map((arr) => arr.join(columnSep))
	.join('\n')
	}
	${results.sort((a, b) => a.toLowerCase().localeCompare(b.toLowerCase())).join('\n')}
	`;

	fs.writeFileSync(listPath, markdown, 'utf8');