Last active
August 9, 2016 07:18
-
-
Save timepp/c5f75a99c619ab111dfae1c4f5a8e6e4 to your computer and use it in GitHub Desktop.
get word document headings
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function GetWordOutline(docfilename) { | |
/** Get word document outlines | |
* @tag dev | |
* | |
* @param {filename} docfilename - .doc(x) file | |
* | |
*/ | |
var word = new ActiveXObject("Word.Application"); | |
var doc = word.Documents.Open(docfilename, false, true); | |
var hdrregex = /^Heading ([0-9])(,|$)/; | |
var outline = []; | |
var count = doc.Paragraphs.Count; | |
var percent = 0; | |
var p = 0; | |
var t = new Date(); | |
for (var it = new Enumerator(doc.Paragraphs) ; !it.atEnd() ; it.moveNext()) { | |
var para = it.item(); | |
var result = hdrregex.exec(para.Style); | |
if (result != null) { | |
var level = parseInt(result[1]); | |
outline.push({ level: level, text: para.Range.Text }); | |
} | |
p++; | |
var nt = new Date(); | |
if (nt.getTime() - t.getTime() > 1000 * 10) { | |
t = nt; | |
var np = Math.floor(p * 100 / count); | |
WScript.Echo(np.toString() + "% (" + p + ") paragraphs proceeded."); | |
} | |
} | |
doc.Close(); | |
word.Quit(); | |
return outline; | |
} | |
var filename = WScript.Arguments(0); | |
var headings = GetWordOutline(filename); | |
for (var i in headings) { | |
var heading = headings[i]; | |
var prefix = Array(2 * heading.level - 1).join(" "); | |
WScript.Echo(prefix + heading.text); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
usage:
cscript dump_doc_headings.js WORD_DOCUMENT
screenshot: