IcyApril/backupwiki.php

## backupwiki.php
<?php

/*
 * A basic PHP script that will allow you to export a MediaWiki pages directly from a database to HTML.
 * Perfect for running via a cronjob to create HTML emergency backups of MediaWiki pages.
 * This only requires a datbase connection and no modification to MediaWiki.
 * Code is terrible, I know, I wrote most of it in nano over SSH. It does the job and not much else.
 * I built this while working at Freestone Creative so our emergency documentation was ready as static HTML in emergencies.
 * Backups are named by the day of the month followed by .html. They are overwrite the one from last month when a new one is made.
 *
 * @author: Junade Ali <mjsa@junade.com>
*/

// Below enter database details for the Wiki:
$dbHost		= "127.0.0.1";
$dbUser		= "root";
$dbPassword	= "";
$db		= "wiki";

// Where to store backups (as HTML file):
$backupsDIR	= "/var/www/work/wikiexports";

class dbFunc {

	public $dbh;
	public $dsn;

	function __construct ($host, $user, $password, $db) {

		$this->dsn = 'mysql:dbname='.$db.';host='.$host;

		try {
			$this->dbh = new PDO($this->dsn, $user, $password);
		} catch (PDOException $e) {
			echo 'Connection failed:'.$e->getMessage();
		}
	}

}

$todayBackupDIR = $backupsDIR."/".date("j")."/";

mkdir($todayBackupDIR);
array_map('unlink', glob($todayBackupDIR.'*.html'));

$dbUtils = new dbFunc($dbHost, $dbUser, $dbPassword, $db);

foreach ($dbUtils->dbh->query("SELECT `page_id`, `page_title`, `page_latest` FROM `page` ORDER BY `page_id` ASC") as $page) {

	$revisionSTMT = $dbUtils->dbh->prepare("SELECT `rev_text_id` FROM `revision` WHERE `rev_id` = ? LIMIT 1");
	$revisionSTMT->execute(array($page['page_latest']));
	$revisionNum = $revisionSTMT->fetch();

	//var_dump($page);
	$pageContentSTMT = $dbUtils->dbh->prepare("SELECT `old_text` FROM `text` WHERE `old_id` = ? LIMIT 1");
	$pageContentSTMT->execute(array($revisionNum['rev_text_id']));
	$pageContent = $pageContentSTMT->fetch();

  $wiky = new wiky;

	$pageContent = $wiky->parse($pageContent['old_text']);

  $pageOutput = "<!doctype html><html lang='en'><head><meta charset='utf-8'></head><body>";
  $pageOutput .= $pageContent;
  $pageOutput .= "</body></html>";

  $title = $page['page_title'].'_'.$page['page_id'];
  $title = preg_replace('/[^\w-" *"]/', '', $title);

	file_put_contents($todayBackupDIR.$title.".html", $pageOutput);

	//echo $page['page_title'];
	//echo $wiky->parse($pageContent['old_text']);
}

/* Wiky.php - A tiny PHP "library" to convert Wiki Markup language to HTML
 * Author: Toni Lähdekorpi <toni@lygon.net>
 * Modified by Junade Ali for usage in backupwiki.php <mjsa@junade.com>
 *
 * Code usage under any of these licenses:
 * Apache License 2.0, http://www.apache.org/licenses/LICENSE-2.0
 * Mozilla Public License 1.1, http://www.mozilla.org/MPL/1.1/
 * GNU Lesser General Public License 3.0, http://www.gnu.org/licenses/lgpl-3.0.html
 * GNU General Public License 2.0, http://www.gnu.org/licenses/gpl-2.0.html
 * Creative Commons Attribution 3.0 Unported License, http://creativecommons.org/licenses/by/3.0/
 */

class wiky {
	private $patterns, $replacements;

	public function __construct($analyze=false) {
		$this->patterns=array(
			"/\r\n/",

			// Headings
			"/^==== (.+?) ====$/m",					// Subsubheading
			"/^====(.+?)====$/m",						// Subsubheading without spaces
			"/^=== (.+?) ===$/m",						// Subheading
      "/^===(.+?)===$/m",             // Subheading without spaces
      "/^== (.+?) ==$/m",             // Subheading
			"/^==(.+?)==$/m",               // Subheading without spaces
      "/^= (.+?) =$/m",               // Heading
			"/^=(.+?)=$/m",							    // Heading without space


			// Formatting
			"/\'\'\'\'\'(.+?)\'\'\'\'\'/s",		// Bold-italic
			"/\'\'\'(.+?)\'\'\'/s",						// Bold
			"/\'\'(.+?)\'\'/s",						    // Italic

			// Special
			"/^----+(\s*)$/m",						                            // Horizontal line
			"/\[\[(file|img):((ht|f)tp(s?):\/\/(.+?))( (.+))*\]\]/i",	// (File|img):(http|https|ftp) aka image
			"/\[((news|(ht|f)tp(s?)|irc):\/\/(.+?))( (.+))\]/i",	  	// Other urls with text
			"/\[((news|(ht|f)tp(s?)|irc):\/\/(.+?))\]/i",			        // Other urls without text

			// Indentations
			"/[\n\r]: *.+([\n\r]:+.+)*/",					// Indentation first pass
			"/^:(?!:) *(.+)$/m",						      // Indentation second pass
			"/([\n\r]:: *.+)+/",						      // Subindentation first pass
			"/^:: *(.+)$/m",					          	// Subindentation second pass

			// Ordered list
			"/[\n\r]?#.+([\n|\r]#.+)+/",					            // First pass, finding all blocks
			"/[\n\r]#(?!#) *(.+)(([\n\r]#{2,}.+)+)/",			    // List item with sub items of 2 or more
			"/[\n\r]#{2}(?!#) *(.+)(([\n\r]#{3,}.+)+)/",			// List item with sub items of 3 or more
			"/[\n\r]#{3}(?!#) *(.+)(([\n\r]#{4,}.+)+)/",			// List item with sub items of 4 or more

			// Unordered list
			"/[\n\r]?\*.+([\n|\r]\*.+)+/",					            // First pass, finding all blocks
			"/[\n\r]\*(?!\*) *(.+)(([\n\r]\*{2,}.+)+)/",			  // List item with sub items of 2 or more
			"/[\n\r]\*{2}(?!\*) *(.+)(([\n\r]\*{3,}.+)+)/",			// List item with sub items of 3 or more
			"/[\n\r]\*{3}(?!\*) *(.+)(([\n\r]\*{4,}.+)+)/",			// List item with sub items of 4 or more

			// List items
			"/^[#\*]+ *(.+)$/m",						// Wraps all list items to <li/>

			// Newlines (TODO: make it smarter and so that it groupd paragraphs)
			"/^(?!<li|dd).+(?=(<a|strong|em|img)).+$/mi",			// Ones with breakable elements (TODO: Fix this crap, the li|dd comparison here is just stupid)
			"/^[^><\n\r]+$/m",						                    // Ones with no elements
		);
		$this->replacements=array(
			"\n",

			// Headings
			"<h4>$1</h4>",
      "<h4>$1</h4>",
			"<h3>$1</h3>",
      "<h3>$1</h3>",
			"<h2>$1</h2>",
      "<h2>$1</h2>",
      "<h1>$1</h1>",
      "<h1>$1</h1>",

			//Formatting
			"<strong><em>$1</em></strong>",
			"<strong>$1</strong>",
			"<em>$1</em>",

			// Special
			"<hr/>",
			"<img src=\"$2\" alt=\"$6\"/>",
			"<a href=\"$1\">$7</a>",
			"<a href=\"$1\">$1</a>",

			// Indentations
			"\n<dl>$0\n</dl>", // Newline is here to make the second pass easier
			"<dd>$1</dd>",
			"\n<dd><dl>$0\n</dl></dd>",
			"<dd>$1</dd>",

			// Ordered list
			"\n<ol>\n$0\n</ol>",
			"\n<li>$1\n<ol>$2\n</ol>\n</li>",
			"\n<li>$1\n<ol>$2\n</ol>\n</li>",
			"\n<li>$1\n<ol>$2\n</ol>\n</li>",

			// Unordered list
			"\n<ul>\n$0\n</ul>",
			"\n<li>$1\n<ul>$2\n</ul>\n</li>",
			"\n<li>$1\n<ul>$2\n</ul>\n</li>",
			"\n<li>$1\n<ul>$2\n</ul>\n</li>",

			// List items
			"<li>$1</li>",

			// Newlines
			"$0<br/>",
			"$0<br/>",
		);
		if($analyze) {
			foreach($this->patterns as $k=>$v) {
				$this->patterns[$k].="S";
			}
		}
	}
	public function parse($input) {
		if(!empty($input))
			$output=preg_replace($this->patterns,$this->replacements,$input);
		else
			$output=false;
		return $output;
	}
}
	<?php

	/*
	* A basic PHP script that will allow you to export a MediaWiki pages directly from a database to HTML.
	* Perfect for running via a cronjob to create HTML emergency backups of MediaWiki pages.
	* This only requires a datbase connection and no modification to MediaWiki.
	* Code is terrible, I know, I wrote most of it in nano over SSH. It does the job and not much else.
	* I built this while working at Freestone Creative so our emergency documentation was ready as static HTML in emergencies.
	* Backups are named by the day of the month followed by .html. They are overwrite the one from last month when a new one is made.
	*
	* @author: Junade Ali <mjsa@junade.com>
	*/

	// Below enter database details for the Wiki:
	$dbHost = "127.0.0.1";
	$dbUser = "root";
	$dbPassword = "";
	$db = "wiki";

	// Where to store backups (as HTML file):
	$backupsDIR = "/var/www/work/wikiexports";

	class dbFunc {

	public $dbh;
	public $dsn;

	function __construct ($host, $user, $password, $db) {

	$this->dsn = 'mysql:dbname='.$db.';host='.$host;

	try {
	$this->dbh = new PDO($this->dsn, $user, $password);
	} catch (PDOException $e) {
	echo 'Connection failed:'.$e->getMessage();
	}
	}

	}

	$todayBackupDIR = $backupsDIR."/".date("j")."/";

	mkdir($todayBackupDIR);
	array_map('unlink', glob($todayBackupDIR.'*.html'));

	$dbUtils = new dbFunc($dbHost, $dbUser, $dbPassword, $db);

	foreach ($dbUtils->dbh->query("SELECT `page_id`, `page_title`, `page_latest` FROM `page` ORDER BY `page_id` ASC") as $page) {

	$revisionSTMT = $dbUtils->dbh->prepare("SELECT `rev_text_id` FROM `revision` WHERE `rev_id` = ? LIMIT 1");
	$revisionSTMT->execute(array($page['page_latest']));
	$revisionNum = $revisionSTMT->fetch();

	//var_dump($page);
	$pageContentSTMT = $dbUtils->dbh->prepare("SELECT `old_text` FROM `text` WHERE `old_id` = ? LIMIT 1");
	$pageContentSTMT->execute(array($revisionNum['rev_text_id']));
	$pageContent = $pageContentSTMT->fetch();

	$wiky = new wiky;

	$pageContent = $wiky->parse($pageContent['old_text']);

	$pageOutput = "<!doctype html><html lang='en'><head><meta charset='utf-8'></head><body>";
	$pageOutput .= $pageContent;
	$pageOutput .= "</body></html>";

	$title = $page['page_title'].'_'.$page['page_id'];
	$title = preg_replace('/[^\w-" *"]/', '', $title);

	file_put_contents($todayBackupDIR.$title.".html", $pageOutput);

	//echo $page['page_title'];
	//echo $wiky->parse($pageContent['old_text']);
	}

	/* Wiky.php - A tiny PHP "library" to convert Wiki Markup language to HTML
	* Author: Toni Lähdekorpi <toni@lygon.net>
	* Modified by Junade Ali for usage in backupwiki.php <mjsa@junade.com>
	*
	* Code usage under any of these licenses:
	* Apache License 2.0, http://www.apache.org/licenses/LICENSE-2.0
	* Mozilla Public License 1.1, http://www.mozilla.org/MPL/1.1/
	* GNU Lesser General Public License 3.0, http://www.gnu.org/licenses/lgpl-3.0.html
	* GNU General Public License 2.0, http://www.gnu.org/licenses/gpl-2.0.html
	* Creative Commons Attribution 3.0 Unported License, http://creativecommons.org/licenses/by/3.0/
	*/

	class wiky {
	private $patterns, $replacements;

	public function __construct($analyze=false) {
	$this->patterns=array(
	"/\r\n/",

	// Headings
	"/^==== (.+?) ====$/m", // Subsubheading
	"/^====(.+?)====$/m", // Subsubheading without spaces
	"/^=== (.+?) ===$/m", // Subheading
	"/^===(.+?)===$/m", // Subheading without spaces
	"/^== (.+?) ==$/m", // Subheading
	"/^==(.+?)==$/m", // Subheading without spaces
	"/^= (.+?) =$/m", // Heading
	"/^=(.+?)=$/m", // Heading without space


	// Formatting
	"/\'\'\'\'\'(.+?)\'\'\'\'\'/s", // Bold-italic
	"/\'\'\'(.+?)\'\'\'/s", // Bold
	"/\'\'(.+?)\'\'/s", // Italic

	// Special
	"/^----+(\s*)$/m", // Horizontal line
	"/\[\[(file\|img):((ht\|f)tp(s?):\/\/(.+?))( (.+))*\]\]/i", // (File\|img):(http\|https\|ftp) aka image
	"/\[((news\|(ht\|f)tp(s?)\|irc):\/\/(.+?))( (.+))\]/i", // Other urls with text
	"/\[((news\|(ht\|f)tp(s?)\|irc):\/\/(.+?))\]/i", // Other urls without text

	// Indentations
	"/[\n\r]: .+([\n\r]:+.+)/", // Indentation first pass
	"/^:(?!:) *(.+)$/m", // Indentation second pass
	"/([\n\r]:: *.+)+/", // Subindentation first pass
	"/^:: *(.+)$/m", // Subindentation second pass

	// Ordered list
	"/[\n\r]?#.+([\n\|\r]#.+)+/", // First pass, finding all blocks
	"/[\n\r]#(?!#) *(.+)(([\n\r]#{2,}.+)+)/", // List item with sub items of 2 or more
	"/[\n\r]#{2}(?!#) *(.+)(([\n\r]#{3,}.+)+)/", // List item with sub items of 3 or more
	"/[\n\r]#{3}(?!#) *(.+)(([\n\r]#{4,}.+)+)/", // List item with sub items of 4 or more

	// Unordered list
	"/[\n\r]?\.+([\n\|\r]\.+)+/", // First pass, finding all blocks
	"/[\n\r]\(?!\) (.+)(([\n\r]\{2,}.+)+)/", // List item with sub items of 2 or more
	"/[\n\r]\{2}(?!\) (.+)(([\n\r]\{3,}.+)+)/", // List item with sub items of 3 or more
	"/[\n\r]\{3}(?!\) (.+)(([\n\r]\{4,}.+)+)/", // List item with sub items of 4 or more

	// List items
	"/^[#\]+ (.+)$/m", // Wraps all list items to <li/>

	// Newlines (TODO: make it smarter and so that it groupd paragraphs)
	"/^(?!<li\|dd).+(?=(<a\|strong\|em\|img)).+$/mi", // Ones with breakable elements (TODO: Fix this crap, the li\|dd comparison here is just stupid)
	"/^[^><\n\r]+$/m", // Ones with no elements
	);
	$this->replacements=array(
	"\n",

	// Headings
	"<h4>$1</h4>",
	"<h4>$1</h4>",
	"<h3>$1</h3>",
	"<h3>$1</h3>",
	"<h2>$1</h2>",
	"<h2>$1</h2>",
	"<h1>$1</h1>",
	"<h1>$1</h1>",

	//Formatting
	"<strong><em>$1</em></strong>",
	"<strong>$1</strong>",
	"<em>$1</em>",

	// Special
	"<hr/>",
	"<img src=\"$2\" alt=\"$6\"/>",
	"<a href=\"$1\">$7</a>",
	"<a href=\"$1\">$1</a>",

	// Indentations
	"\n<dl>$0\n</dl>", // Newline is here to make the second pass easier
	"<dd>$1</dd>",
	"\n<dd><dl>$0\n</dl></dd>",
	"<dd>$1</dd>",

	// Ordered list
	"\n<ol>\n$0\n</ol>",
	"\n<li>$1\n<ol>$2\n</ol>\n</li>",
	"\n<li>$1\n<ol>$2\n</ol>\n</li>",
	"\n<li>$1\n<ol>$2\n</ol>\n</li>",

	// Unordered list
	"\n<ul>\n$0\n</ul>",
	"\n<li>$1\n<ul>$2\n</ul>\n</li>",
	"\n<li>$1\n<ul>$2\n</ul>\n</li>",
	"\n<li>$1\n<ul>$2\n</ul>\n</li>",

	// List items
	"<li>$1</li>",

	// Newlines
	"$0<br/>",
	"$0<br/>",
	);
	if($analyze) {
	foreach($this->patterns as $k=>$v) {
	$this->patterns[$k].="S";
	}
	}
	}
	public function parse($input) {
	if(!empty($input))
	$output=preg_replace($this->patterns,$this->replacements,$input);
	else
	$output=false;
	return $output;
	}
	}