dacog/sanitizeHtmlForTelegram.js

## sanitizeHtmlForTelegram.js
/**
 * sanitizeHtmlForTelegram
 *
 * I am using ActivePieces.com to publish a telegram message using the API, but ince the last API update the HTML tags
 * and Markdown support is limited. To make the automation easier (by using the same string in many elemetns) I needed
 * a way to sanitize the string to use it for Telegram.
 * Here is such a function.
 *
 * This function sanitizes a given HTML string to ensure compatibility with the Telegram API's supported HTML tags.
 * It removes all unsupported HTML tags and retains only those that are allowed by Telegram, such as <b>, <i>, <a>, etc.
 * The function uses a combination of regular expressions and DOM manipulation (via jsdom in non-browser environments)
 * to parse and clean the HTML content.
 * The sanitized string is suitable for use in Telegram API requests where styled HTML text is required.
 *
 * @param {string} inputString - The HTML string to be sanitized.
 * @returns {string} The sanitized HTML string, retaining only Telegram API supported tags.
 */

const { JSDOM } = require("jsdom");

function cleanString(inputString) {
    const dom = new JSDOM(inputString);
    const document = dom.window.document;
    const { TEXT_NODE, ELEMENT_NODE } = dom.window.Node;

    const allowedTagsRegex = {
        'b': /<\/?b>/gi,
        'strong': /<\/?strong>/gi,
        'i': /<\/?i>/gi,
        'em': /<\/?em>/gi,
        'code': /<\/?code>/gi,
        's': /<\/?s>/gi,
        'strike': /<\/?strike>/gi,
        'del': /<\/?del>/gi,
        'u': /<\/?u>/gi,
        'pre': /<\/?pre( language="[a-zA-Z0-9]+")?>/gi
        // Add other allowed tags as needed
    };

    function cleanNode(node) {
        if (node.nodeType === TEXT_NODE) {
            return node.nodeValue;
        } else if (node.nodeType === ELEMENT_NODE && allowedTagsRegex[node.tagName.toLowerCase()]) {
            let openingTag = '<' + node.tagName.toLowerCase() + '>';
            let closingTag = '</' + node.tagName.toLowerCase() + '>';
            return openingTag + Array.from(node.childNodes).map(cleanNode).join('') + closingTag;
        } else {
            return Array.from(node.childNodes).map(cleanNode).join('');
        }
    }

    return Array.from(document.body.childNodes).map(cleanNode).join('');
}

export const code = async (inputs) => {
    if (inputs && typeof inputs.mastodon_content === 'string') {
        const cleanedString = cleanString(inputs.mastodon_content);

        if (cleanedString) {
            return cleanedString;
        } else {
            console.log('Cleaned string is empty');
            return false;
        }
    } else {
        console.error('inputs is undefined or mastodon_content is not a string');
        return false;
    }
};

// Example usage
const inputString = `<p>📢 Hey everyone! I just posted a new article on my blog...`;
const result = await code({ mastodon_content: inputString });
console.log(result);
	/**
	* sanitizeHtmlForTelegram
	*
	* I am using ActivePieces.com to publish a telegram message using the API, but ince the last API update the HTML tags
	* and Markdown support is limited. To make the automation easier (by using the same string in many elemetns) I needed
	* a way to sanitize the string to use it for Telegram.
	* Here is such a function.
	*
	* This function sanitizes a given HTML string to ensure compatibility with the Telegram API's supported HTML tags.
	* It removes all unsupported HTML tags and retains only those that are allowed by Telegram, such as <b>, <i>, <a>, etc.
	* The function uses a combination of regular expressions and DOM manipulation (via jsdom in non-browser environments)
	* to parse and clean the HTML content.
	* The sanitized string is suitable for use in Telegram API requests where styled HTML text is required.
	*
	* @param {string} inputString - The HTML string to be sanitized.
	* @returns {string} The sanitized HTML string, retaining only Telegram API supported tags.
	*/

	const { JSDOM } = require("jsdom");

	function cleanString(inputString) {
	const dom = new JSDOM(inputString);
	const document = dom.window.document;
	const { TEXT_NODE, ELEMENT_NODE } = dom.window.Node;

	const allowedTagsRegex = {
	'b': /<\/?b>/gi,
	'strong': /<\/?strong>/gi,
	'i': /<\/?i>/gi,
	'em': /<\/?em>/gi,
	'code': /<\/?code>/gi,
	's': /<\/?s>/gi,
	'strike': /<\/?strike>/gi,
	'del': /<\/?del>/gi,
	'u': /<\/?u>/gi,
	'pre': /<\/?pre( language="[a-zA-Z0-9]+")?>/gi
	// Add other allowed tags as needed
	};

	function cleanNode(node) {
	if (node.nodeType === TEXT_NODE) {
	return node.nodeValue;
	} else if (node.nodeType === ELEMENT_NODE && allowedTagsRegex[node.tagName.toLowerCase()]) {
	let openingTag = '<' + node.tagName.toLowerCase() + '>';
	let closingTag = '</' + node.tagName.toLowerCase() + '>';
	return openingTag + Array.from(node.childNodes).map(cleanNode).join('') + closingTag;
	} else {
	return Array.from(node.childNodes).map(cleanNode).join('');
	}
	}

	return Array.from(document.body.childNodes).map(cleanNode).join('');
	}

	export const code = async (inputs) => {
	if (inputs && typeof inputs.mastodon_content === 'string') {
	const cleanedString = cleanString(inputs.mastodon_content);

	if (cleanedString) {
	return cleanedString;
	} else {
	console.log('Cleaned string is empty');
	return false;
	}
	} else {
	console.error('inputs is undefined or mastodon_content is not a string');
	return false;
	}
	};

	// Example usage
	const inputString = `<p>📢 Hey everyone! I just posted a new article on my blog...`;
	const result = await code({ mastodon_content: inputString });
	console.log(result);