Created
May 28, 2025 17:11
-
-
Save trycf/52081ebdcf5b4b6cbd0e74e5bd908b95 to your computer and use it in GitHub Desktop.
TryCF Gist
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<cfscript> | |
htmlText = ' | |
<html> | |
<body> | |
<img src="image1.jpg" alt="First Image" width="100"> | |
<p>Some text here</p> | |
<img src="image2.png" alt="Second Image" height="200" class="img-responsive"> | |
<table style="border-spacing:0"> | |
<tbody style="font-family:-apple-system,Segoe UI,HelveticaNeue-Bold,Helvetica Neue Bold,Helvetica Neue,Helvetica,Arial,sans-serif"> | |
<tr> | |
<td><img alt="like" src="https://outlook-1.cdn.office.net/assets/reaction/heart.png" style="height:25px; width:25px"> | |
</td> | |
<td><span style="font-weight:bold; padding-left:3px">Phil Cording</span> <span>reacted to your message:</span> | |
</td> | |
</tr> | |
</tbody> | |
</table> | |
</body> | |
</html> | |
'; | |
// Function to parse attributes from a tag | |
function parseAttributes(tag) { | |
var attrPattern = '(\w+)\s*=\s*["'']?([^""'' >]+)["'']?'; | |
var matchArray = reMatch(attrPattern, tag, true); | |
var attrStruct = {}; | |
for (match in matchArray) { | |
var parts = reFind(attrPattern, match, 1, true); | |
if (arrayLen(parts.pos) GTE 3) { | |
var key = mid(match, parts.pos[2], parts.len[2]); | |
var val = mid(match, parts.pos[3], parts.len[3]); | |
attrStruct[key] = val; | |
} | |
} | |
return attrStruct; | |
} | |
// Main logic to extract <img> elements | |
function extractImgTags(html) { | |
var imgTagPattern = "<img\b[^>]*>"; | |
var imgTags = reMatch(imgTagPattern, html, true); | |
var result = []; | |
for (tag in imgTags) { | |
arrayAppend(result, parseAttributes(tag)); | |
} | |
return result; | |
} | |
// Get the array of <img> attributes | |
imgArray = extractImgTags(htmlText); | |
// Debug output | |
writeDump(var=imgArray, label="Extracted <img> tags", format="html"); | |
regexPattern = "<img\b[^>]*(like|heart|thumbs\s*up)[^>]*>"; | |
if (REFindNoCase(regexPattern, htmlText) > 0) { | |
writeOutput("Contains reaction image"); | |
}else { | |
writeOutput("No reaction image"); | |
}; | |
WriteDump( | |
var=REFindNoCase("surprised|laugh|sad|celebrate|heart|like|thumbs", htmlText) | |
); | |
WriteDump( | |
var=REFindNoCase("reacted|reaction", htmlText) | |
); | |
WriteDump( | |
var=REFindNoCase("surprised|laugh|sad|celebrate|heart|like|thumbs", htmlText, 1, true) | |
); | |
writeOutput( | |
"<br/> " & REFindNoCase("reacted (with|to) your message", htmlText) | |
); | |
// Dirty, broken HTML sample | |
dirtyHtml = " | |
<html> | |
<body> | |
<img src='one.jpg' alt='Image One'> | |
<img src='two.jpg' alt='Image Two'> | |
</body> | |
</html> | |
"; | |
// Step 1: Create a TagSoup parser | |
parser = createObject("java", "org.ccil.cowan.tagsoup.Parser"); | |
// Step 2: Create an InputSource from the HTML string | |
byteStream = createObject("java", "java.io.ByteArrayInputStream").init(dirtyHtml.getBytes("UTF-8")); | |
inputSource = createObject("java", "org.xml.sax.InputSource").init(byteStream); | |
// Step 3: Transform dirty HTML to DOM using SAX and Transformer | |
saxSource = createObject("java", "javax.xml.transform.sax.SAXSource").init(parser, inputSource); | |
transformerFactory = createObject("java", "javax.xml.transform.TransformerFactory").newInstance(); | |
transformer = transformerFactory.newTransformer(); | |
domResult = createObject("java", "javax.xml.transform.dom.DOMResult").init(); | |
transformer.transform(saxSource, domResult); | |
// Step 4: Get the DOM document node | |
xmlDoc = domResult.getNode(); | |
// Step 5: Search for <img> nodes | |
imgNodes = xmlSearch(xmlDoc, "//img"); | |
// Step 6: Build array of <img> HTML strings | |
imgTags = []; | |
for (imgNode in imgNodes) { | |
tagString = "<img"; | |
attrs = imgNode.getAttributes(); | |
for (i = 0; i < attrs.getLength(); i++) { | |
attr = attrs.item(i); | |
tagString &= " " & attr.getNodeName() & "='" & attr.getNodeValue() & "'"; | |
} | |
tagString &= ">"; | |
arrayAppend(imgTags, tagString); | |
} | |
// Step 7: Dump the array | |
writeDump(var=imgTags, label="Extracted <img> Tags"); | |
</cfscript> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment