Created
March 14, 2012 21:42
-
-
Save mkaatman/2039744 to your computer and use it in GitHub Desktop.
convert dita general task to strict task
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<? php | |
/** | |
* Convert a DITA General Task to a Strict Task | |
* Backup your files | |
* Carefully review what this script is doing | |
* Expect manual post processing cleanup | |
* Be aware that it does change some IDs | |
*/ | |
// Directory to read files from | |
$dir_read = "./user"; | |
// Directory to write files to | |
$dir_write = "./new"; | |
// Create the output directory if it doesn't exist | |
if( ! is_dir( $dir_write ) ) { | |
if( ! mkdir( $out, 0775 ) ) { | |
echo 'Cannot create '.$dir_write; | |
exit; | |
} | |
} | |
// Read every file from $dir_read | |
foreach( glob( $dir_read.'/{*.xml,*.dita}', GLOB_BRACE ) as $dita ) { | |
// Read contents into string | |
$contents = file_get_contents( $dita ); | |
// Is the file a generalTask? | |
if( preg_match( '/generalTask.dtd/', $contents ) ) { | |
// Is the file valid XML? | |
if( isXML( $contents ) ) { | |
echo $dita."\n"; | |
$contents = str_replace( '<!DOCTYPE task PUBLIC "-//OASIS//DTD DITA General Task//EN" "generalTask.dtd">', '<!DOCTYPE task PUBLIC "-//OASIS//DTD DITA Task//EN" "task.dtd">', $contents ); | |
$contents = str_replace( 'steps-informal', 'steps-unordered', $contents ); | |
$contents = preg_replace( '/<li(.*?)>/', '<step ${1}><cmd>', $contents ); | |
$contents = str_replace( '</li>', '</cmd></step>', $contents ); | |
$contents = preg_replace( '/<ul(.*?)>/', '', $contents ); | |
$contents = preg_replace( '/<\/ul(.*?)>/', '', $contents ); | |
if( preg_match( '/<ol(.*?)>/', $contents ) ) { | |
$contents = str_replace( 'steps-unordered', 'steps', $contents ); | |
$contents = preg_replace( '/<ol(.*?)>/', '', $contents ); | |
$contents = preg_replace( '/<\/ol(.*?)>/', '', $contents ); | |
} | |
// Pull non-steps out and stick them in context before the first step | |
$contents = preg_replace( '/\<steps(.*?)\>(.*?)\<step/s', '<context>${2}</context><steps ${1}><step', $contents ); | |
// Pull figures out of cmd, then stick them after cmd inside stepxmp | |
$contents = preg_replace( '/\<cmd(.*?)\<fig(.*?)\<\/cmd\>/s', '<cmd ${1}</cmd><stepxmp><fig ${2}</stepxmp>', $contents ); | |
// Pull notes out of cmd, then stick them after step inside info which is inside the step | |
$contents = preg_replace( '/\<cmd(.*?)\<note(.*?)\<\/cmd>(.*?)\<\/step\>/s', '<cmd ${1}</cmd><info><note${2}</info></step>', $contents ); | |
$fp = fopen( $dir_write.DIRECTORY_SEPARATOR.basename( $dita ), 'w' ); | |
fwrite( $fp, $contents ); | |
fclose( $fp ); | |
} | |
} | |
} | |
function isXML( $xml ) { | |
libxml_use_internal_errors( true ); | |
$doc = new DOMDocument( '1.0', 'utf-8' ); | |
$doc -> loadXML( $xml ); | |
$errors = libxml_get_errors( ); | |
if( empty( $errors ) ) { | |
return true; | |
} | |
$error = $errors [ 0 ]; | |
if( $error -> level < 3 ) { | |
return true; | |
} | |
$explodedxml = explode( "r", $xml ); | |
$badxml = $explodedxml [( $error -> line )- 1 ]; | |
$message = $error -> message.' at line '.$error -> line.'. Bad XML: '.htmlentities( $badxml ); | |
return $message; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment