Skip to content

Instantly share code, notes, and snippets.

@timepp
Last active August 9, 2016 07:18
Show Gist options
  • Save timepp/c5f75a99c619ab111dfae1c4f5a8e6e4 to your computer and use it in GitHub Desktop.
Save timepp/c5f75a99c619ab111dfae1c4f5a8e6e4 to your computer and use it in GitHub Desktop.
get word document headings
function GetWordOutline(docfilename) {
/** Get word document outlines
* @tag dev
*
* @param {filename} docfilename - .doc(x) file
*
*/
var word = new ActiveXObject("Word.Application");
var doc = word.Documents.Open(docfilename, false, true);
var hdrregex = /^Heading ([0-9])(,|$)/;
var outline = [];
var count = doc.Paragraphs.Count;
var percent = 0;
var p = 0;
var t = new Date();
for (var it = new Enumerator(doc.Paragraphs) ; !it.atEnd() ; it.moveNext()) {
var para = it.item();
var result = hdrregex.exec(para.Style);
if (result != null) {
var level = parseInt(result[1]);
outline.push({ level: level, text: para.Range.Text });
}
p++;
var nt = new Date();
if (nt.getTime() - t.getTime() > 1000 * 10) {
t = nt;
var np = Math.floor(p * 100 / count);
WScript.Echo(np.toString() + "% (" + p + ") paragraphs proceeded.");
}
}
doc.Close();
word.Quit();
return outline;
}
var filename = WScript.Arguments(0);
var headings = GetWordOutline(filename);
for (var i in headings) {
var heading = headings[i];
var prefix = Array(2 * heading.level - 1).join(" ");
WScript.Echo(prefix + heading.text);
}
@timepp
Copy link
Author

timepp commented Aug 9, 2016

usage:
cscript dump_doc_headings.js WORD_DOCUMENT

screenshot:
image

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment