Skip to content

Instantly share code, notes, and snippets.

@hampusn
Last active August 29, 2015 14:21
Show Gist options
  • Save hampusn/70cafeb70ec6e83ba845 to your computer and use it in GitHub Desktop.
Save hampusn/70cafeb70ec6e83ba845 to your computer and use it in GitHub Desktop.
PHP CLI script for removing specific XMLNodes from WPML Xliff files.
<?php
/**
* Cleanup Xliffs
*
* This file can be called through the php cli and clean up xliff files.
*
* The script takes directories and/or xliff files and removes XML nodes which
* matches the XPath query.
*
* @author Hampus Nordin <hej@hampusnord.in>
* @version 0.1
*/
/**
* undocumented class
*
* @package hampusn
* @author Hampus Nordin <hej@hampusnord.in>
**/
class Hampusn_Cleanup_Xliffs
{
/**
* Contains all arguments passed through the php cli.
* This is a list of all directories and/or files which
* should be parsed and cleaned.
*
* @var array
**/
private $arguments = array();
/**
* A list of all options passed through the cli.
* Can be used to override the default behaviour.
*
* @var array
**/
private $options = array(
// Pattern to match files with in glob()
'p' => '*.xliff',
// Namespace to use with XPath query.
'n' => 'x',
// XPath query. The matches will be removed.
'q' => '//x:trans-unit[starts-with(@resname,"field-_") and not(contains(@resname, "yoast"))]',
// Verbose output
'v' => 1,
);
/**
* Will hold all files which should be parsed and cleaned.
* The array is filled through Hampusn_Cleanup_Xliffs::parseArguments().
*
* @var array
**/
private $files = array();
/**
* The constructor.
*
* @return void
* @author Hampus Nordin <hej@hampusnord.in>
**/
public function __construct($arguments, $options = array())
{
$this->arguments = $arguments;
$this->options = array_merge($this->options, $options);
}
/**
* Execute the parsing and cleanup.
*
* @return void
* @author Hampus Nordin <hej@hampusnord.in>
**/
public function run()
{
$this->parseArguments();
$this->removeDuplicates();
$this->walkFiles(array($this, 'cleanFile'));
}
/**
* Walk through all arguments and add all xliff files found.
* The files needs to be readable.
*
* TODO: consider refactoring this function and also support recursive walk.
*
* @return void
* @author Hampus Nordin <hej@hampusnord.in>
**/
private function parseArguments()
{
foreach ($this->arguments as $arg) {
// Try to get the real path of the passed argument.
// This allows the user to pass relative directories/files such as "./file.xliff"
$realPath = realpath($arg);
// Bail early if path cannot be resolved.
if (!$realPath) {
continue;
}
// Make sure the path is readable
if (is_readable($realPath)) {
// If it's a file add it directly to the files array and bail early.
if (is_file($realPath)) {
$this->files[] = $realPath;
continue;
}
// Control directory.
if (is_dir($realPath)) {
// Add directory separator to the end of path if needed.
$realPath = rtrim($realPath, DIRECTORY_SEPARATOR) . DIRECTORY_SEPARATOR;
// Find files in directory.
$matches = glob($realPath . $this->options['p']);
// Loop through matches and make sure they are readable files.
foreach ($matches as $match) {
if (is_file($match) && is_readable($match)) {
$this->files[] = $match;
}
}
}
} else {
$this->log("Path not readable: $realPath", 0);
}
}
}
/**
* Remove any duplicates from the files array.
*
* @return void
* @author Hampus Nordin <hej@hampusnord.in>
**/
private function removeDuplicates()
{
$this->files = array_unique($this->files);
}
/**
* Walk through all files with passed function.
* The passed function will get one parameter which is the real file path.
*
* @return void
* @author Hampus Nordin <hej@hampusnord.in>
**/
private function walkFiles($func)
{
foreach ($this->files as $file) {
call_user_func($func, $file);
}
}
/**
* Cleans the file by removing superfluous XML nodes.
*
* @return void
* @author Hampus Nordin <hej@hampusnord.in>
**/
private function cleanFile($file)
{
// Bail early if file is not writable.
if (!is_writable($file)) {
$this->log("File not writable: $file", 0);
return;
}
// Create and load file as XML.
$document = new DOMDocument;
$document->load($file);
$xpath = new DOMXpath($document);
$xpath->registerNamespace($this->options['n'], $document->documentElement->namespaceURI);
$query = $xpath->query($this->options['q']);
if ($query->length) {
$numRemoved = 0;
foreach ($query as $node) {
$node->parentNode->removeChild($node);
$numRemoved++;
}
$this->log("Removed $numRemoved XMLNodes from file: $file", 1);
} else {
$this->log("Empty query for file: $file", 1);
}
$document->save($file);
}
/**
* Echo to cli console.
*
* Levels:
* 0 Errors - Messages printed when something goes wrong.
* 1 Verbose - Messages printed which might help debugging.
*
* @return void
* @author Hampus Nordin <hej@hampusnord.in>
**/
private function log($message, $level = 0)
{
if ($level >= $this->options['v']) {
echo $message . PHP_EOL;
}
}
} // END class Hampusn_Cleanup_Xliffs
// $argv should only be populated if the script is run through the cli.
// The first value in $argv is the file being executed, so make sure
// the arguments list is longer than 1.
if (!empty($argv) && count($argv) > 1) {
// Remove the first value/the current file.
array_shift($argv);
// Get options.
$options = getopt('p:n:q:v:');
// Init
$c = new Hampusn_Cleanup_Xliffs($argv, $options);
// Execute
$c->run();
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment