Skip to content

Instantly share code, notes, and snippets.

@mkaatman
Created March 14, 2012 21:42
Show Gist options
  • Save mkaatman/2039744 to your computer and use it in GitHub Desktop.
Save mkaatman/2039744 to your computer and use it in GitHub Desktop.
convert dita general task to strict task
<? php
/**
* Convert a DITA General Task to a Strict Task
* Backup your files
* Carefully review what this script is doing
* Expect manual post processing cleanup
* Be aware that it does change some IDs
*/
// Directory to read files from
$dir_read = "./user";
// Directory to write files to
$dir_write = "./new";
// Create the output directory if it doesn't exist
if( ! is_dir( $dir_write ) ) {
if( ! mkdir( $out, 0775 ) ) {
echo 'Cannot create '.$dir_write;
exit;
}
}
// Read every file from $dir_read
foreach( glob( $dir_read.'/{*.xml,*.dita}', GLOB_BRACE ) as $dita ) {
// Read contents into string
$contents = file_get_contents( $dita );
// Is the file a generalTask?
if( preg_match( '/generalTask.dtd/', $contents ) ) {
// Is the file valid XML?
if( isXML( $contents ) ) {
echo $dita."\n";
$contents = str_replace( '<!DOCTYPE task PUBLIC "-//OASIS//DTD DITA General Task//EN" "generalTask.dtd">', '<!DOCTYPE task PUBLIC "-//OASIS//DTD DITA Task//EN" "task.dtd">', $contents );
$contents = str_replace( 'steps-informal', 'steps-unordered', $contents );
$contents = preg_replace( '/<li(.*?)>/', '<step ${1}><cmd>', $contents );
$contents = str_replace( '</li>', '</cmd></step>', $contents );
$contents = preg_replace( '/<ul(.*?)>/', '', $contents );
$contents = preg_replace( '/<\/ul(.*?)>/', '', $contents );
if( preg_match( '/<ol(.*?)>/', $contents ) ) {
$contents = str_replace( 'steps-unordered', 'steps', $contents );
$contents = preg_replace( '/<ol(.*?)>/', '', $contents );
$contents = preg_replace( '/<\/ol(.*?)>/', '', $contents );
}
// Pull non-steps out and stick them in context before the first step
$contents = preg_replace( '/\<steps(.*?)\>(.*?)\<step/s', '<context>${2}</context><steps ${1}><step', $contents );
// Pull figures out of cmd, then stick them after cmd inside stepxmp
$contents = preg_replace( '/\<cmd(.*?)\<fig(.*?)\<\/cmd\>/s', '<cmd ${1}</cmd><stepxmp><fig ${2}</stepxmp>', $contents );
// Pull notes out of cmd, then stick them after step inside info which is inside the step
$contents = preg_replace( '/\<cmd(.*?)\<note(.*?)\<\/cmd>(.*?)\<\/step\>/s', '<cmd ${1}</cmd><info><note${2}</info></step>', $contents );
$fp = fopen( $dir_write.DIRECTORY_SEPARATOR.basename( $dita ), 'w' );
fwrite( $fp, $contents );
fclose( $fp );
}
}
}
function isXML( $xml ) {
libxml_use_internal_errors( true );
$doc = new DOMDocument( '1.0', 'utf-8' );
$doc -> loadXML( $xml );
$errors = libxml_get_errors( );
if( empty( $errors ) ) {
return true;
}
$error = $errors [ 0 ];
if( $error -> level < 3 ) {
return true;
}
$explodedxml = explode( "r", $xml );
$badxml = $explodedxml [( $error -> line )- 1 ];
$message = $error -> message.' at line '.$error -> line.'. Bad XML: '.htmlentities( $badxml );
return $message;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment