Last active
January 9, 2024 21:20
-
-
Save Tim-Otte/2343e1ababd813b933134525219d28ae to your computer and use it in GitHub Desktop.
With this script you can convert a string with html tags into an array of html tags with tag name, attributes, inner text and child tags
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
define('HTML_REGEX_PATTERN', '/<([a-zA-Z]+)(?:\s([a-zA-Z]+(?:=(?:".+")|(?:[0-9]+))))*(?:(?:\s\/>)|(?:>(.*)<\/\1>))/'); | |
function isHtmlTag($text) { | |
return preg_match(HTML_REGEX_PATTERN, $text); | |
} | |
function hasHtmlTag($text) { | |
return preg_match_all(HTML_REGEX_PATTERN, $text); | |
} | |
function getDataFromHtml($html) { | |
$tags = array(); | |
$tempHtml = $html; | |
while (strlen($tempHtml) > 0) { | |
// Check if the string includes a html tag | |
if (preg_match_all(HTML_REGEX_PATTERN, $tempHtml, $matches)) { | |
$tagOffset = strpos($tempHtml, $matches[0][0]); | |
// Check if the string starts with the html tag | |
if ($tagOffset > 0) { | |
// Push the text infront of the html tag to the result array | |
array_push($tags, array( | |
'text' => substr($tempHtml, 0, $tagOffset) | |
)); | |
// Remove the text from the string | |
$tempHtml = substr($tempHtml, $tagOffset); | |
} | |
// Extract the attribute data from the html tag | |
$explodedAttributes = strlen($matches[2][0]) > 0 ? explode(' ', $matches[2][0]) : array(); | |
$attributes = array(); | |
// Store each attribute with its name in the $attributes array | |
for ($i=0; $i<count($explodedAttributes); $i++) { | |
$attribute = trim($explodedAttributes[$i]); | |
// Check if the attribute has a value (like style="") or has no value (like required) | |
if (strpos($attribute, '=') !== false) { | |
$splitAttribute = explode('=', $attribute); | |
$attrName = trim($splitAttribute[0]); | |
$attrValue = trim(str_replace('"', '', $splitAttribute[1])); | |
// check if the current attribute is a style attribute | |
if (strtolower($attrName) == 'style') { | |
$attributes[$attrName] = array(); | |
if (strpos($attrValue, ';') !== false) { | |
// Split the style properties and store them in an array | |
$explodedStyles = explode(';', $attrValue); | |
for ($n=0; $n<count($explodedStyles); $n++) { | |
$splitStyle = explode(':', $explodedStyles[$n]); | |
$attributes[$attrName][trim($splitStyle[0])] = trim($splitStyle[1]); | |
} | |
} else { | |
$splitStyle = explode(':', $attrValue); | |
$attributes[$attrName][trim($splitStyle[0])] = trim($splitStyle[1]); | |
} | |
} else { | |
// Store the value directly in the $attributes array if this is not the style attribute | |
$attributes[$attrName] = $attrValue; | |
} | |
} else { | |
$attributes[trim($attribute)] = true; | |
} | |
} | |
// Push the html tag data to the result array | |
array_push($tags, array( | |
'name' => $matches[1][0], | |
'attributes' => $attributes, | |
'innerText' => strip_tags($matches[3][0]), | |
'children' => hasHtmlTag($matches[3][0]) ? getDataFromHtml($matches[3][0]) : null | |
)); | |
// Remove the processed html tag from the html string | |
$tempHtml = substr($tempHtml, strlen($matches[0][0])); | |
} else { | |
array_push($tags, array( | |
'text' => $tempHtml | |
)); | |
$tempHtml = ''; | |
} | |
} | |
return $tags; | |
} |
@sewerp I think the best way to do this would be to replace the desired elements and then traverse the array recursively and create a html string from the information.
/* Your code here ... */
createHtmlFromArray($tagsWithReplacedText);
function createHtmlFromArray($arr) {
if (array_key_exists('text', $arr)) {
return $arr->text;
} else {
/* In this example the attributes are not included in the generated html string. This is just an explanation of my thought process */
return "<{$arr->name}>" . ($arr->children == null ? '' : array_map('createHtmlFromArray', $arr->children)) . "</{$arr->name}>";
}
}
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Is there any chance for a script to do the opposite what this script does? To do what this script doea - then raplace texts and join them again with replaced texts?