|
var fs = require('fs') |
|
var path = require('path') |
|
var mkdirp = require('mkdirp') |
|
var exec = require('child_process').exec |
|
var BagPipe = require('bagpipe') |
|
var bag = new BagPipe(5) |
|
|
|
// '/Users/saibotsivad/Development/thinking/kayser-commentary' |
|
var input_folder = process.argv[2] |
|
// '/Users/saibotsivad/Development/thinking/kayser-commentary-markdown' |
|
var output_folder = input_folder + '-markdown' |
|
|
|
var temp_folder = '/tmp/markdown_conversion' |
|
|
|
processDirectory(input_folder, '.') |
|
|
|
function processDirectory(parentPath, childPath) { |
|
fs.readdir(path.join(parentPath, childPath), function(err, files) { |
|
if (err) { |
|
console.log(err) |
|
} else { |
|
files.forEach(checkIfFile.bind(null, parentPath, childPath)) |
|
} |
|
}) |
|
} |
|
|
|
function checkIfFile(parentPath, childPath, file) { |
|
fs.stat(path.join(parentPath, childPath, file), function(err, stats) { |
|
if (stats.isDirectory()) { |
|
processDirectory(parentPath, path.join(childPath, file)) |
|
} else { |
|
beginConversion(parentPath, childPath, file) |
|
} |
|
}) |
|
} |
|
|
|
function uuidv4() { |
|
return 'xxxxxxxxxxxx4xxxyxxxxxxxxxxxxxxx'.replace(/[xy]/g, function(c) { |
|
var r = Math.random() * 16 | 0, v = c == 'x' ? r : (r & 0x3 | 0x8) |
|
return v.toString(16) |
|
}) |
|
} |
|
|
|
function beginConversion(parentPath, childPath, file) { |
|
if (/\.doc$/i.test(file)) { |
|
bag.push(convertDocToDocxThenMarkdown, parentPath, childPath, file, function(){ |
|
console.log('Success converting to DOCX: ' + file) |
|
}) |
|
} else if (/\.docx$/i.test(file)) { |
|
convertDocxToMarkdown(parentPath, childPath, file, path.join(parentPath, childPath)) |
|
} |
|
} |
|
|
|
function regexFilename(file) { |
|
var regex = /(.+)\.[^.]+$/.exec(file) |
|
if (!regex) { |
|
console.log('Error regexing filename', file) |
|
} else { |
|
return regex[1] |
|
} |
|
} |
|
|
|
function convertDocToDocxThenMarkdown(parentPath, childPath, file, cb) { |
|
console.log('Starting LibreOffice conversion...', path.join(childPath, file)) |
|
var outputPath = path.join(temp_folder, uuidv4()) |
|
exec('/Applications/LibreOffice.app/Contents/MacOS/soffice --invisible --convert-to docx "' + path.join(parentPath, childPath, file) + '" --outdir "' + outputPath + '"', { |
|
cwd: input_folder |
|
}, function(err, stdout, stderr) { |
|
if (err) { |
|
console.log('Error converting to DOCX', err, stderr) |
|
} else { |
|
console.log(stderr, stdout) |
|
cb() |
|
convertDocxToMarkdown(parentPath, childPath, regexFilename(file) + '.docx', outputPath) |
|
} |
|
}) |
|
} |
|
|
|
function convertDocxToMarkdown(parentPath, childPath, filenameWithExtension, inputFilePath) { |
|
var filenameWithoutExtension = regexFilename(filenameWithExtension) |
|
var childPathWithFile = path.join(childPath, filenameWithoutExtension) |
|
|
|
mkdirp(path.join(output_folder, childPath), function() { |
|
|
|
exec('/usr/local/bin/pandoc "' + filenameWithExtension + '" --extract-media="./' + filenameWithoutExtension + '" -f docx -t markdown -o "' + path.join(output_folder, childPath, filenameWithoutExtension) + '.md"', { |
|
cwd: inputFilePath |
|
}, function(err, stdout, stderr) { |
|
if (err) { |
|
console.log('Error converting to markdown', err, stderr) |
|
} else { |
|
console.log('Success converting to markdown: ' + childPathWithFile) |
|
fs.exists(path.join(inputFilePath, filenameWithoutExtension), function(exists) { |
|
if (exists) { |
|
console.log('Moving image directory:', childPathWithFile) |
|
fs.rename(path.join(inputFilePath, filenameWithoutExtension), path.join(output_folder, childPath, filenameWithoutExtension), function(err) { |
|
if (err) { |
|
console.log('Error moving image directory:', childPathWithFile) |
|
} else { |
|
console.log('Success moving image directory:', childPathWithFile) |
|
} |
|
}) |
|
} else { |
|
console.log('No image directory found:', childPathWithFile) |
|
} |
|
}) |
|
} |
|
}) |
|
|
|
}) |
|
} |