Skip to content

Instantly share code, notes, and snippets.

@danielearwicker
Created February 19, 2016 13:30
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save danielearwicker/0da828e0e8f7680b0d4a to your computer and use it in GitHub Desktop.
Save danielearwicker/0da828e0e8f7680b0d4a to your computer and use it in GitHub Desktop.
Simple minimal backup of mediawiki
using System;
using System.Collections.Specialized;
using System.IO;
using System.Linq;
using System.Net;
using System.Text.RegularExpressions;
namespace WikiBackup
{
class Program
{
private const string BaseUrl = "http://wiki/mediawiki/";
private const string IndexUrl = BaseUrl + "index.php/";
private const string BackupPath = @"\\fileserver\home$\daniele\WikiBackups";
static void BackupText()
{
var backupTextPath = Path.Combine(BackupPath, "Text");
if (!Directory.Exists(backupTextPath))
{
Directory.CreateDirectory(backupTextPath);
}
var allPagesRaw = new WebClient().DownloadString(IndexUrl + "Special:AllPages");
const string Prefix = "<table class=\"mw-allpages-table-chunk\"><tr><td style=\"width:33%\">";
var prefixPos = allPagesRaw.IndexOf(Prefix);
if (prefixPos == -1)
throw new InvalidOperationException("Unexpected data from Special:AllPages");
allPagesRaw = allPagesRaw.Substring(prefixPos + Prefix.Length);
const string Suffix = "</table>";
var suffixPos = allPagesRaw.IndexOf(Suffix);
if (suffixPos == -1)
throw new InvalidOperationException("Unexpected data from Special:AllPages");
allPagesRaw = allPagesRaw.Substring(0, suffixPos);
var linkPattern = new Regex(@"\<a href=""[^""]+"" title=""([^""]+)""\>");
var allPages = linkPattern.Matches(allPagesRaw).OfType<Match>().Select(m => m.Groups[1].Value);
var values = new NameValueCollection { { "pages", string.Join("\n", allPages) } };
using (var client = new WebClient())
{
client.Headers.Add("Content-Type", "application/x-www-form-urlencoded");
var result = client.UploadValues("http://wiki/mediawiki/index.php?title=Special:Export&action=submit", "POST", values);
var backupName = Path.Combine(backupTextPath, DateTime.Now.ToString("yyyy-MM-dd-hh-mm-ss")) + ".xml";
File.WriteAllBytes(backupName, result);
}
var files = Directory.EnumerateFiles(backupTextPath).ToList();
files.Sort();
foreach (var old in files.Take(files.Count - 10))
{
try
{
File.Delete(old);
}
catch (Exception)
{
}
}
}
static void BackupImages()
{
var backupImagesPath = Path.Combine(BackupPath, "Images");
if (!Directory.Exists(backupImagesPath))
{
Directory.CreateDirectory(backupImagesPath);
}
var newFilesRaw = new WebClient().DownloadString(IndexUrl + "Special:NewFiles");
var filePattern = new Regex(@"\<img alt=""\(thumbnail\)"" src=""/mediawiki/images/thumb/([^""]+)""");
var allFiles = filePattern.Matches(newFilesRaw).OfType<Match>().Select(m => m.Groups[1].Value);
foreach (var fileName in allFiles)
{
using (var client = new WebClient())
{
var fileNameParts = fileName.Split('/').Take(3).ToList();
var fileData = client.DownloadData(BaseUrl + "images/" + string.Join("/", fileNameParts));
File.WriteAllBytes(Path.Combine(backupImagesPath, fileNameParts.Last()), fileData);
}
}
}
static void Main(string[] args)
{
BackupText();
BackupImages();
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment