Skip to content

Instantly share code, notes, and snippets.

@nickdunn
Created March 3, 2011 11:27
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save nickdunn/852641 to your computer and use it in GitHub Desktop.
Save nickdunn/852641 to your computer and use it in GitHub Desktop.
<?php
//Function to seperate multiple tags one line
function fix_newlines_for_clean_html($fixthistext)
{
$fixthistext_array = explode("\n", $fixthistext);
foreach ($fixthistext_array as $unfixedtextkey => $unfixedtextvalue)
{
//Makes sure empty lines are ignores
if (!preg_match("/^(\s)*$/", $unfixedtextvalue))
{
$fixedtextvalue = preg_replace("/>(\s|\t)*</U", ">\n<", $unfixedtextvalue);
$fixedtext_array[$unfixedtextkey] = $fixedtextvalue;
}
}
return implode("\n", $fixedtext_array);
}
function clean_html_code($uncleanhtml)
{
//Set wanted indentation
$indent = " ";
//Uses previous function to seperate tags
$fixed_uncleanhtml = fix_newlines_for_clean_html($uncleanhtml);
$uncleanhtml_array = explode("\n", $fixed_uncleanhtml);
//Sets no indentation
$indentlevel = 0;
foreach ($uncleanhtml_array as $uncleanhtml_key => $currentuncleanhtml)
{
//Removes all indentation
$currentuncleanhtml = preg_replace("/\t+/", "", $currentuncleanhtml);
$currentuncleanhtml = preg_replace("/^\s+/", "", $currentuncleanhtml);
$replaceindent = "";
//Sets the indentation from current indentlevel
for ($o = 0; $o < $indentlevel; $o++)
{
$replaceindent .= $indent;
}
//If self-closing tag, simply apply indent
if (preg_match("/<(.+)\/>/", $currentuncleanhtml))
{
$cleanhtml_array[$uncleanhtml_key] = $replaceindent.$currentuncleanhtml;
}
//If doctype declaration, simply apply indent
else if (preg_match("/<!(.*)>/", $currentuncleanhtml))
{
$cleanhtml_array[$uncleanhtml_key] = $replaceindent.$currentuncleanhtml;
}
//If opening AND closing tag on same line, simply apply indent
else if (preg_match("/<[^\/](.*)>/", $currentuncleanhtml) && preg_match("/<\/(.*)>/", $currentuncleanhtml))
{
$cleanhtml_array[$uncleanhtml_key] = $replaceindent.$currentuncleanhtml;
}
//If closing HTML tag or closing JavaScript clams, decrease indentation and then apply the new level
//If closing HTML tag or closing JavaScript clams, decrease indentation and then apply the new level
else if (preg_match("/<\/(.*)>/", $currentuncleanhtml) || preg_match("/^(\s|\t)*\}{1}(\s|\t)*$/", $currentuncleanhtml))
{
$indentlevel--;
$replaceindent = "";
for ($o = 0; $o < $indentlevel; $o++)
{
$replaceindent .= $indent;
}
// fix for textarea whitespace and in my opinion nicer looking script tags
if($currentuncleanhtml == '</textarea>' || $currentuncleanhtml == '</script>')
{
$cleanhtml_array[$uncleanhtml_key] = $cleanhtml_array[($uncleanhtml_key - 1)] . $currentuncleanhtml;
unset($cleanhtml_array[($uncleanhtml_key - 1)]);
}
else
{
$cleanhtml_array[$uncleanhtml_key] = $replaceindent.$currentuncleanhtml;
}
}
//If opening HTML tag AND not a stand-alone tag, or opening JavaScript clams, increase indentation and then apply new level
else if ((preg_match("/<[^\/](.*)>/", $currentuncleanhtml) && !preg_match("/<(link|meta|base|br|img|hr)(.*)>/", $currentuncleanhtml)) || preg_match("/^(\s|\t)*\{{1}(\s|\t)*$/", $currentuncleanhtml))
{
$cleanhtml_array[$uncleanhtml_key] = $replaceindent.$currentuncleanhtml;
$indentlevel++;
$replaceindent = "";
for ($o = 0; $o < $indentlevel; $o++)
{
$replaceindent .= $indent;
}
}
else
//Else, only apply indentation
{$cleanhtml_array[$uncleanhtml_key] = $replaceindent.$currentuncleanhtml;}
}
//Return single string seperated by newline
return implode("\n", $cleanhtml_array);
}
Class extension_html5_doctype extends Extension{
public function about(){
return array(
'name' => 'HTML5 doctype',
'description' => 'Replace any generated HTML doctype with basic HTML5 doctype',
'version' => '1.0',
'release-date' => '2010-07-13',
'author' => array(
'name' => 'Nick Dunn'
)
);
}
public function getSubscribedDelegates(){
return array(
array(
'page' => '/frontend/',
'delegate' => 'FrontendOutputPostGenerate',
'callback' => 'parse_html'
),
);
}
/*private function indent($str){
$str = preg_replace("/\n/", '', $str);
$ret = "";
$indent = 0;
$indentInc = 1;
$noIndent = false;
$indent_char = "\t";
while (($l = strpos($str,"<",$i)) !== false) {
if($l!=$r && $indent>0) {
$ret .= "\n" . str_repeat($indent_char,$indent) . substr($str,$r,($l-$r));
}
$i = $l+1;
$r = strpos($str,">",$i)+1;
$t = substr($str,$l,($r-$l));
if(strpos($t,"/")==1){
$indent -= $indentInc;
$noIndent = true;
}
else if(($r-$l-strpos($t,"/"))==2 || substr($t,0,2)=="<?"){ $noIndent = true; }
if($indent<0){ $indent = 0; }
if($ret){ $ret .= "\n"; }
$ret .= str_repeat($indent_char,$indent);
$ret .= $t;
if(!$noIndent){ $indent += $indentInc; }
$noIndent = false;
}
//$ret .= "\n";
return($ret);
}*/
public function parse_html($context) {
$html = $context['output'];
$html = preg_replace("/<!DOCTYPE [^>]+>/", "<!DOCTYPE html>", $html);
//$html = clean_html_code($html);
$context['output'] = $html;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment