Skip to content

Instantly share code, notes, and snippets.

@joncutrer
Created December 15, 2018 16:12
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save joncutrer/4a9e08707938e0970d4300c4f336c45a to your computer and use it in GitHub Desktop.
Save joncutrer/4a9e08707938e0970d4300c4f336c45a to your computer and use it in GitHub Desktop.
<?php
/**
* This is NOT WORKING CODE but is provided for reference. This is the portion of
* code that was used on mwcsvimport.pronique.com to take a csv dataset and template
* file and generate a mediawiki xml import file.
*
* mwcsvimport.pronique.com was shutdown on 12/15/2018 but this code
* is provided for inspection by anyone looking to recreate it's
* functionality.
*
* - joncutrer
*
*/
class MWCSVImportMergeClass {
function doMerge( $id ) {
if ( !$dataset = $this->Dataset->read(null, $id) ) {
$this->Session->setFlash(__('Invalid dataset', true));
$this->redirect(array('action' => 'index'));
}
//Record Activity
$this->recordActivity( 'XML Merge on \'' . $dataset['Dataset']['name'] . '\' ' . $dataset['Dataset']['id'] . ' ' );
if ( !$merge_file['data'] = $this->__mwmerge( $dataset['Dataset']['fields'], $dataset['Dataset']['data'], $dataset['Template'] ) ) {
$this->log( 'id:' . $id . ' Error During Merge, No XML Generated', 'debug' );
}
$merge_file['dataset_id'] = $dataset['Dataset']['id'];
$merge_file['name'] = $dataset['Dataset']['name'] . '.xml';
$merge_file['size'] = strlen( $merge_file['data'] );
$merge_file['user_id'] = $this->Auth->user('id');
if ( !$this->Dataset->MergeFile->save( $merge_file ) ) {
$this->log( 'id: ' . $id . ' Error Saving Merge File', 'error' );
//echo "Error Saving Merge File";
}
$this->redirect(array('controller'=>'datasets','action'=>'view', $id ));
echo "XML File Built";
exit;
}
/****************************************************************************************
* Private Methods below
* **************************************************************************************/
/**
* Parse CSV Data call and build page from template
* Return the XML created by calling buildXML();
*
* @param mixed $csvdata
* @param mixed $template
*/
function __mwmerge( $fields, $data, $template ) {
$this->log( 'Merging ' . count( $data) . ' pages', 'debug' );
if ( $this->Auth->user('plan') == 'free' ) {
$data = array_slice( $data, 0, Configure::read('App.Accounts.Free.ExportLimit') );
}
foreach ( $data as $key=>$row ) {
//Old Method of Compiling Templates
//$pages[$key] = $this->__buildPage( $fields, $row, $template );
//New Methods of Compiling Templates
$pages[$key] = $this->__compileTemplate( $fields, $row, $template );
}
return $this->__buildXML( $pages );
}
/**
* Replace ${Variables} with $data in $template
* Returns a Mediawiki markup page array
* $array['title']
* $array['body']
*
*/
function __buildPage( $fields, $data, $template ) {
$ntitle = $template['title'];
$nbody = $template['body'];
foreach ( $fields as $field ) {
//Depreciated
$ntitle = str_replace( '${' . $field . '}', trim($data[$field]), $ntitle);
$ntitle = str_replace( '{$' . $field . '}', trim($data[$field]), $ntitle);
//Depreciated
$nbody = str_replace( '${' . $field . '}', trim($data[$field]), $nbody);
$nbody = str_replace( '{$' . $field . '}', trim($data[$field]), $nbody);
}
$page['title'] = trim( $this->__sanatizeTitle( $ntitle ) ); $page['body'] = $nbody;
return $page;
}
/**
* Replace ${Variables} with $data in $template
* Returns an array like
* $array['title']
* $array['body']
*
*/
function __compileTemplate( $fields, $data, $template ) {
//Mapping of modifiers to functions
$modifier_map = array(
//String modifiers
'upper'=>'strtoupper',
'lower'=>'strtolower',
'upperfirst'=>'ucfirst',
'camelize'=>array('Inflector', 'camelize'),
'capitalize'=>'ucwords',
'sha1'=>'sha1',
'md5'=>'md5',
'trim'=>'trim',
'rtrim'=>'rtrim',
'ltrim'=>'ltrim',
'reverse'=>'strrev',
'strip_tags'=>'strip_tags',
//Number modifiers
'number_format'=>array( $this, '__numberFormatModifier' ),
//'money_format'=>array( $this, '__moneyFormatModifier'),
'uuid'=>array('String', 'uuid'),
'ordinal'=>array( $this, '__ordinalModifier'),
//Date modifiers
'date'=>array( $this, '__dateModifier'),
'time'=>'time',
);
$ntitle = $template['title'];
$nbody = $template['body'];
//Replace Variables
foreach ( $fields as $field ) {
//TODO Explore replacing with CakePHP's String::insert() method
$ntitle = str_replace( '{$' . $field . '}', trim($data[$field]), $ntitle);
$nbody = str_replace( '{$' . $field . '}', trim($data[$field]), $nbody);
}
//Process modifiers in body
preg_match_all('/\{(\w+)(.*?)\}(.*?)\{\/\\1\}/si', $nbody, $matches );
foreach( $matches[1] as $key=>$func ) {
if ( array_key_exists( $func, $modifier_map ) ) {
//echo 'Debug: Calling ' . $modifier_map[$func] . ' on ' . $matches[3][$key] . "<br />";
$args[] = $matches[3][$key];
if( !empty( $matches[2][$key] ) ) { $args[] = $matches[2][$key]; }
$imodified = call_user_func_array( $modifier_map[$func], $args );
$nbody = str_replace( $matches[0][$key], $imodified, $nbody );
$imodified = '';
unset( $args );
}
}
//Sanatize Title
$page['title'] = trim( $this->__sanatizeTitle( $ntitle ) ); $page['body'] = $nbody;
return $page;
}
/**
* Builds Mediawiki XML Import file from $pages array
*
*/
function __buildXML( $pages ) {
$xml = '<mediawiki xmlns="http://www.mediawiki.org/xml/export-0.4/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.mediawiki.org/xml/export-0.4/ http://www.mediawiki.org/xml/export-0.4.xsd" version="0.4" xml:lang="en">';
foreach ( $pages as $page ) {
$xml .= '<page>
<title>' . $page['title'] . '</title>
<revision>';
$xml .= '<timestamp>'.date( 'Y-m-d\TH:i:s\Z' ,time() ).'</timestamp>';
$xml .= '<text xml:space="preserve">' . htmlspecialchars($page['body']) . '</text>
</revision>
</page>
';
}
$xml .= '</mediawiki>';
return $xml;
}
function __sanatizeTitle( $title ) {
$newtitle = str_replace( Configure::read('App.ForbiddenTitleCharacters'), '' , $title );
return $newtitle;
}
/**
* Called by __compileTemplate()
*
* @param int $time
* @param mixed $args
* @return string
*/
function __dateModifier( $time, $args) {
$argKeys = $this->__extractModifierArguments( $args );
if ( preg_match('/^[0-9]{10}$/', $time, $matches) == 0 ) {
$time = strtotime($time);
}
$date_formatted = date( $argKeys['format'], $time );
return $date_formatted;
}
function __numberFormatModifier( $number, $args) {
$argKeys = $this->__extractModifierArguments( $args );
if ( isset( $argKeys['decimals'] ) ) { $decimals = $argKeys['decimals']; } else { $decimals = 0; }
if ( isset( $argKeys['dec_point'] ) ) { $dec_point = $argKeys['dec_point']; } else { $dec_point = '.'; }
if ( isset( $argKeys['thousands_sep'] ) ) { $thousands_sep = $argKeys['thousands_sep']; } else { $thousands_sep = ','; }
return number_format( $number, $decimals, $dec_point, $thousands_sep );
}
/**
* Called by __compileTemplate()
* Add the suffix onto numbers, ie 2nd 3rd 15th
* @param int $time
* @param mixed $args
* @return string
*/
function __ordinalModifier($cdnl){
$test_c = abs($cdnl) % 10;
$ext = ((abs($cdnl) %100 < 21 && abs($cdnl) %100 > 4) ? 'th'
: (($test_c < 4) ? ($test_c < 3) ? ($test_c < 2) ? ($test_c < 1)
? 'th' : 'st' : 'nd' : 'rd' : 'th'));
return $cdnl.$ext;
}
/**
* Called by Modifier functions to extract the list parameters
* ie format="M/d/Y"
* returns array( 'format'=>'M/d/Y' )
* ie pattern="/foo/i" match="/$1/i"
* returns array( 'pattern'=>'/foo/i', 'match'=>'/$1/i' )
*
*/
function __extractModifierArguments( $args ) {
$argsArr = explode(' ', trim($args) );
foreach( $argsArr as $arg ) {
$tmp = explode( '=', $arg );
$argKeys[$tmp[0]] = str_replace('"', '', $tmp[1]);
}
return $argKeys;
}
}
?>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment