Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Convert all X/HTML files in a folder to valid X/HTML using libtidy via PHP
<?php
defined('TIDYDIR_EXTENSION') || define('TIDYDIR_EXTENSION', 'html');
function tidyDir($directory) {
$htmlFiles = glob($directory.DIRECTORY_SEPARATOR.'*.'.TIDYDIR_EXTENSION);
$filenameRegEx = '#^(.+?)\.([^\.]+?)$#';
$htmlTidy = new tidy();
foreach ($htmlFiles as $entry) {
if (preg_match($filenameRegEx, $entry, $matches)) {
$filename = $matches[1];
$extension = $matches[2];
$htmlContents = file_get_contents($entry);
$doctype = (preg_match('#\A\s*(\<[\s\S]+?\>)[\s\S]*#', $htmlContents, $matches))
? $matches[1]."\n"
: '';
$htmlTidy->parseString($htmlContents);
if (0 < $htmlTidy->getStatus()) {
if ($htmlTidy->cleanRepair()) {
$correctedHTML = $doctype.$htmlTidy->html()->value;
echo 'saving ',$filename,'.',$extension,"\n";
if (!file_put_contents($filename.'.'.$extension, $correctedHTML)) {
echo 'failed saving ',$entry,"\n";
}
} else {
echo 'FAILED TO CLEAN UP ',$entry,"\n";
die;
}
} else {
echo 'Goody, ',$entry,' is valid html ',"\n";
}
}
}
$d = dir($directory);
while (false !== ($entry = $d->read())) {
if (0 !== strpos($entry, '.') && is_dir($directory.DIRECTORY_SEPARATOR.$entry)) {
echo 'calling tidyDir on ',$directory,DIRECTORY_SEPARATOR,$entry,"\n";
tidyDir($directory.DIRECTORY_SEPARATOR.$entry);
}
}
}
tidyDir(__DIR__);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.