Skip to content

Instantly share code, notes, and snippets.

@saibotsivad
Created July 23, 2014 01:51
Show Gist options
  • Save saibotsivad/1a73910e7f3862321084 to your computer and use it in GitHub Desktop.
Save saibotsivad/1a73910e7f3862321084 to your computer and use it in GitHub Desktop.
Convert MediaWiki to markdown for noddity
'use strict'
var fs = require('fs')
var request = require('request')
var htmlparser = require("htmlparser2")
var toMarkdown = require('to-markdown').toMarkdown
var mkdirp = require('mkdirp')
if (process.argv[3] === undefined) {
console.log("Usage: node wikimedia-markdown-export.js [text file of all pages to export] [site domain]")
// node index.js titles.txt fs.error420.com
// put the page titles one per line in titles.txt
return
}
var settings = {
fileName: process.argv[2],
url: 'http://' + process.argv[3] + '/api.php?format=json&action=parse&page=',
fileEncoding: 'utf8'
}
mkdirp(__dirname + '/output/', function (err) {
if (err) {
console.log(err)
}
})
fs.readFile(settings.fileName, settings.fileEncoding, function (err, data) {
if (!err) {
data.split('\n').forEach(function (title) {
request(settings.url + encodeURIComponent(title), function (error, response, body) {
if (error) {
console.log('Some error on: ' + settings.url + encodeURIComponent(title))
} else {
var html = JSON.parse(body).parse.text['*']
var markdown = 'title: ' + title + '\n\n' + toMarkdown(html)
fs.writeFile(__dirname + '/output/' + title + '.md', markdown, 'utf8')
// var parser = new htmlparser.Parser({
// onopentag: function (name, attribs) {
// if (name === "img") {
// console.log(settings.url + attribs.src)
// request(settings.url + attribs.src).pipe(fs.createWriteStream(__dirname + '/output/' + attribs.src))
// }
// }
// })
// parser.write(html)
// parser.end()
}
})
})
} else {
console.log('Error reading file')
}
})
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment