Created
December 22, 2023 11:19
-
-
Save dacog/0d501dea6033995450726594538ff10a to your computer and use it in GitHub Desktop.
Telegram API-Compatible HTML Sanitizer in JavaScript
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* sanitizeHtmlForTelegram | |
* | |
* I am using ActivePieces.com to publish a telegram message using the API, but ince the last API update the HTML tags | |
* and Markdown support is limited. To make the automation easier (by using the same string in many elemetns) I needed | |
* a way to sanitize the string to use it for Telegram. | |
* Here is such a function. | |
* | |
* This function sanitizes a given HTML string to ensure compatibility with the Telegram API's supported HTML tags. | |
* It removes all unsupported HTML tags and retains only those that are allowed by Telegram, such as <b>, <i>, <a>, etc. | |
* The function uses a combination of regular expressions and DOM manipulation (via jsdom in non-browser environments) | |
* to parse and clean the HTML content. | |
* The sanitized string is suitable for use in Telegram API requests where styled HTML text is required. | |
* | |
* @param {string} inputString - The HTML string to be sanitized. | |
* @returns {string} The sanitized HTML string, retaining only Telegram API supported tags. | |
*/ | |
const { JSDOM } = require("jsdom"); | |
function cleanString(inputString) { | |
const dom = new JSDOM(inputString); | |
const document = dom.window.document; | |
const { TEXT_NODE, ELEMENT_NODE } = dom.window.Node; | |
const allowedTagsRegex = { | |
'b': /<\/?b>/gi, | |
'strong': /<\/?strong>/gi, | |
'i': /<\/?i>/gi, | |
'em': /<\/?em>/gi, | |
'code': /<\/?code>/gi, | |
's': /<\/?s>/gi, | |
'strike': /<\/?strike>/gi, | |
'del': /<\/?del>/gi, | |
'u': /<\/?u>/gi, | |
'pre': /<\/?pre( language="[a-zA-Z0-9]+")?>/gi | |
// Add other allowed tags as needed | |
}; | |
function cleanNode(node) { | |
if (node.nodeType === TEXT_NODE) { | |
return node.nodeValue; | |
} else if (node.nodeType === ELEMENT_NODE && allowedTagsRegex[node.tagName.toLowerCase()]) { | |
let openingTag = '<' + node.tagName.toLowerCase() + '>'; | |
let closingTag = '</' + node.tagName.toLowerCase() + '>'; | |
return openingTag + Array.from(node.childNodes).map(cleanNode).join('') + closingTag; | |
} else { | |
return Array.from(node.childNodes).map(cleanNode).join(''); | |
} | |
} | |
return Array.from(document.body.childNodes).map(cleanNode).join(''); | |
} | |
export const code = async (inputs) => { | |
if (inputs && typeof inputs.mastodon_content === 'string') { | |
const cleanedString = cleanString(inputs.mastodon_content); | |
if (cleanedString) { | |
return cleanedString; | |
} else { | |
console.log('Cleaned string is empty'); | |
return false; | |
} | |
} else { | |
console.error('inputs is undefined or mastodon_content is not a string'); | |
return false; | |
} | |
}; | |
// Example usage | |
const inputString = `<p>📢 Hey everyone! I just posted a new article on my blog...`; | |
const result = await code({ mastodon_content: inputString }); | |
console.log(result); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment