-
-
Save rr-it/ab6a02a8b0d979b69d7ec687081a2c4e to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff -ru sourceopt/Classes/Service/CleanHtmlService.php sourceopt_1.0.0.1503498261/Classes/Service/CleanHtmlService.php | |
--- sourceopt/Classes/Service/CleanHtmlService.php 2017-08-23 16:58:07.754294122 +0200 | |
+++ sourceopt_1.0.0.1503498261/Classes/Service/CleanHtmlService.php 2017-08-23 16:57:54.677951125 +0200 | |
@@ -278,7 +278,9 @@ | |
) { | |
$html .= $this->killWhiteSpace($htmlArray[$x]); | |
} else { // remove all line breaks | |
- $html .= $this->killLineBreaks($htmlArray[$x]); | |
+ $temp_html = $htmlArray[$x]; | |
+ $this->removeNewLines($temp_html); | |
+ $html .= $temp_html; | |
} | |
// count up a tab | |
@@ -444,7 +446,7 @@ | |
/** | |
* Remove new lines where unnecessary | |
- * spares line breaks within: pre, textarea, ... | |
+ * spares line breaks within: pre, textarea, input[value] ... | |
* | |
* @param string $html | |
* | |
@@ -456,13 +458,42 @@ | |
'textarea', | |
'pre' | |
]; // eventuell auch: span, script, style | |
- $peaces = preg_split('#(<(' . implode('|', $splitArray) . ').*>.*</\2>)#Uis', $html, -1, PREG_SPLIT_DELIM_CAPTURE); | |
+ $splitArraySingleton = [ | |
+ // singletonTag => ['attributeA', 'attributeB'] | |
+ 'input' => ['value'] | |
+ ]; // other singleton tags: area, base, br, col, command, embed, hr, img, keygen, link, meta, param, source, track, wbr | |
+ | |
+ $pieces = preg_split('#(<(' . implode('|', $splitArray) . ').*>.*</\2>)#Uis', $html, -1, PREG_SPLIT_DELIM_CAPTURE); | |
$html = ""; | |
- for ($i = 0; $i < count($peaces); $i++) { | |
- if (($i + 1) % 3 == 0) { | |
- continue; | |
+ // $pieces[3n+0] - content in front of match (or tail behind last match) | |
+ // $pieces[3n+1] - match: whole tag-element | |
+ // $pieces[3n+2] - tag-name (\2) | |
+ for ($i = 0; $i < count($pieces); $i += 3) { | |
+ | |
+ $piecesSingleton = preg_split('#(<(' . implode('|', array_keys($splitArraySingleton)) . ').*(?:>|/>))#Uis', $pieces[$i + 0], -1, PREG_SPLIT_DELIM_CAPTURE); | |
+ // $piecesSingleton[3n+0] - content in front of match (or tail behind last match) | |
+ // $piecesSingleton[3n+1] - match: whole singleton tag-element | |
+ // $piecesSingleton[3n+2] - tag-name of singleton (\2) | |
+ for ($k = 0; $k < count($piecesSingleton); $k += 3) { | |
+ | |
+ $html .= $this->killLineBreaks($piecesSingleton[$k + 0]); | |
+ | |
+ $piecesAttributes = preg_split('#((?:' . implode('|', $splitArraySingleton[$piecesSingleton[$k + 2]]) . ')=(\'|").*\2)#Uis', $piecesSingleton[$k + 1], -1, PREG_SPLIT_DELIM_CAPTURE); | |
+ // $piecesAttributes[3n+0] - content in front of match (or tail behind last match) | |
+ // $piecesAttributes[3n+1] - match: whole attribute with value | |
+ // $piecesAttributes[3n+2] - quote (single or double) enclosing the value (\2) | |
+ for ($m = 0; $m < count($piecesAttributes); $m += 3) { | |
+ $html .= $this->killLineBreaks($piecesAttributes[$m + 0]); | |
+ $html .= $piecesAttributes[$m + 1]; | |
+ // $piecesAttributes[$m + 2] is garbage | |
+ } | |
+ | |
+ // $piecesSingleton[$k + 2] is garbage | |
} | |
- $html .= (($i - 1) % 3 != 0) ? $this->killLineBreaks($peaces[$i]) : $peaces[$i]; | |
+ | |
+ $html .= $pieces[$i + 1]; | |
+ | |
+ // $pieces[$i + 2] is garbage | |
} | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment