Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save trycf/52081ebdcf5b4b6cbd0e74e5bd908b95 to your computer and use it in GitHub Desktop.
Save trycf/52081ebdcf5b4b6cbd0e74e5bd908b95 to your computer and use it in GitHub Desktop.
TryCF Gist
<cfscript>
htmlText = '
<html>
<body>
<img src="image1.jpg" alt="First Image" width="100">
<p>Some text here</p>
<img src="image2.png" alt="Second Image" height="200" class="img-responsive">
<table style="border-spacing:0">
<tbody style="font-family:-apple-system,Segoe UI,HelveticaNeue-Bold,Helvetica Neue Bold,Helvetica Neue,Helvetica,Arial,sans-serif">
<tr>
<td><img alt="like" src="https://outlook-1.cdn.office.net/assets/reaction/heart.png" style="height:25px; width:25px">
</td>
<td><span style="font-weight:bold; padding-left:3px">Phil Cording</span> <span>reacted to your message:</span>
</td>
</tr>
</tbody>
</table>
</body>
</html>
';
// Function to parse attributes from a tag
function parseAttributes(tag) {
var attrPattern = '(\w+)\s*=\s*["'']?([^""'' >]+)["'']?';
var matchArray = reMatch(attrPattern, tag, true);
var attrStruct = {};
for (match in matchArray) {
var parts = reFind(attrPattern, match, 1, true);
if (arrayLen(parts.pos) GTE 3) {
var key = mid(match, parts.pos[2], parts.len[2]);
var val = mid(match, parts.pos[3], parts.len[3]);
attrStruct[key] = val;
}
}
return attrStruct;
}
// Main logic to extract <img> elements
function extractImgTags(html) {
var imgTagPattern = "<img\b[^>]*>";
var imgTags = reMatch(imgTagPattern, html, true);
var result = [];
for (tag in imgTags) {
arrayAppend(result, parseAttributes(tag));
}
return result;
}
// Get the array of <img> attributes
imgArray = extractImgTags(htmlText);
// Debug output
writeDump(var=imgArray, label="Extracted <img> tags", format="html");
regexPattern = "<img\b[^>]*(like|heart|thumbs\s*up)[^>]*>";
if (REFindNoCase(regexPattern, htmlText) > 0) {
writeOutput("Contains reaction image");
}else {
writeOutput("No reaction image");
};
WriteDump(
var=REFindNoCase("surprised|laugh|sad|celebrate|heart|like|thumbs", htmlText)
);
WriteDump(
var=REFindNoCase("reacted|reaction", htmlText)
);
WriteDump(
var=REFindNoCase("surprised|laugh|sad|celebrate|heart|like|thumbs", htmlText, 1, true)
);
writeOutput(
"<br/> " & REFindNoCase("reacted (with|to) your message", htmlText)
);
// Dirty, broken HTML sample
dirtyHtml = "
<html>
<body>
<img src='one.jpg' alt='Image One'>
<img src='two.jpg' alt='Image Two'>
</body>
</html>
";
// Step 1: Create a TagSoup parser
parser = createObject("java", "org.ccil.cowan.tagsoup.Parser");
// Step 2: Create an InputSource from the HTML string
byteStream = createObject("java", "java.io.ByteArrayInputStream").init(dirtyHtml.getBytes("UTF-8"));
inputSource = createObject("java", "org.xml.sax.InputSource").init(byteStream);
// Step 3: Transform dirty HTML to DOM using SAX and Transformer
saxSource = createObject("java", "javax.xml.transform.sax.SAXSource").init(parser, inputSource);
transformerFactory = createObject("java", "javax.xml.transform.TransformerFactory").newInstance();
transformer = transformerFactory.newTransformer();
domResult = createObject("java", "javax.xml.transform.dom.DOMResult").init();
transformer.transform(saxSource, domResult);
// Step 4: Get the DOM document node
xmlDoc = domResult.getNode();
// Step 5: Search for <img> nodes
imgNodes = xmlSearch(xmlDoc, "//img");
// Step 6: Build array of <img> HTML strings
imgTags = [];
for (imgNode in imgNodes) {
tagString = "<img";
attrs = imgNode.getAttributes();
for (i = 0; i < attrs.getLength(); i++) {
attr = attrs.item(i);
tagString &= " " & attr.getNodeName() & "='" & attr.getNodeValue() & "'";
}
tagString &= ">";
arrayAppend(imgTags, tagString);
}
// Step 7: Dump the array
writeDump(var=imgTags, label="Extracted <img> Tags");
</cfscript>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment