Skip to content

Instantly share code, notes, and snippets.

@yu-tang
Last active August 29, 2015 14:06
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save yu-tang/f1b3e5f3975370f609d0 to your computer and use it in GitHub Desktop.
Save yu-tang/f1b3e5f3975370f609d0 to your computer and use it in GitHub Desktop.
Merge two .docx files paragraph-by-paragraph.
'*********************************************************************
' MergeDocs.vbs
' Merge two .docx files paragraph-by-paragraph.
'---------------------------------------------------------------------
' Author : Yu Tang
' Version : 0.1.1
' Lisence : GPLv3
' Required : Microsoft Word installed.
' Usage : 1. D&D source file from OmegaT project source dir.
' or
' 2. D&D target file from OmegaT project target dir.
' or
' 3. Run this command;
' CScript //nologo MergeDocs.vbs "path\to\source\doc" "path\to\target\doc"
' ChangeLog :
' 2014-9-5 v0.1.0 released
'*********************************************************************
Option Explicit
'*********************************************************************
' Main routine
'*********************************************************************
Call Main()
Public Sub Main()
Dim merger ' As DocxMerger
Dim settings ' As DocxMergerSettings
Dim ret ' As Boolean
Dim s ' As String
' get settings
Set settings = CreateDocxMergerSettings()
ret = settings.GetReady()
If ret = False Then
WScript.Echo settings.ErrorDescription
Exit Sub
ElseIf settings.FilesPathsAutoCompleted Then
s = "[source] " & settings.SourceFilePath & vbCr
s = s & "[target] " & settings.TargetFilePath & vbCr
s = s & vbCr & "Are you sure you want to merge with these settings?"
If vbNo = MsgBox(s, vbYesNo + vbQuestion, "Merge docx") Then
WScript.Echo "Operation canceled by user."
Exit Sub
End If
End If
' get merger
Set merger = CreateDocxMerger(settings)
ret = merger.Validate()
If ret = False Then
WScript.Echo "Warning: These documents have different paragraph count. Merging will be incomplete."
End If
' merge
ret = merger.Merge()
If ret = False Then
WScript.Echo "Failed (" & merger.ErrorNumber & ") " & merger.ErrorDescription
End If
End Sub
Public Function CreateDocxMergerSettings() ' As DocxMergerSettings
Dim settings ' As DocxMergerSettings
Dim argsUnnamed
Set settings = New DocxMergerSettings
Set argsUnnamed = WScript.Arguments.Unnamed
Select Case argsUnnamed.Count
Case 0
' No args found
Case 1
settings.SourceFilePath = argsUnnamed.Item(0)
Case Else
settings.SourceFilePath = argsUnnamed.Item(0)
settings.TargetFilePath = argsUnnamed.Item(1)
End Select
Set CreateDocxMergerSettings = settings
End Function
Public Function CreateDocxMerger(settings) ' As DocxMerger
Dim marger ' As DocxMerger
Set marger = New DocxMerger
Call marger.Init(settings)
Set CreateDocxMerger = marger
End Function
'*********************************************************************
' DocxMergerSettings class
'*********************************************************************
Class DocxMergerSettings
Private fso ' As Scripting.FileSystemObject
Private sourceDocxPath ' As String
Private targetDocxPath ' As String
Private errNumber ' As Long
Private errDescription ' As String
Private pathsAutoCompleted ' As Boolean
Private Sub Class_Initialize()
Set fso = CreateObject("Scripting.FileSystemObject")
sourceDocxPath = ""
targetDocxPath = ""
errNumber = 0
errDescription = ""
pathsAutoCompleted = False
End Sub
Private Sub Class_Terminate()
'
End Sub
Public Property Get SourceFilePath() ' As String
SourceFilePath = sourceDocxPath
End Property
Public Property Let SourceFilePath(ByVal vNewValue)
sourceDocxPath = vNewValue
End Property
Public Property Get TargetFilePath() ' As String
TargetFilePath = targetDocxPath
End Property
Public Property Let TargetFilePath(ByVal vNewValue)
targetDocxPath = vNewValue
End Property
Public Property Get ErrorNumber() ' As Long
ErrorNumber = errNumber
End Property
Public Property Get ErrorDescription() ' As String
ErrorDescription = errDescription
End Property
Public Property Get FilesPathsAutoCompleted() ' As Boolean
FilesPathsAutoCompleted = pathsAutoCompleted
End Property
Public Function GetReady() ' As Boolean
errNumber = 0
errDescription = ""
pathsAutoCompleted = False
GetReady = False
' Both paths are missing
If sourceDocxPath = "" And targetDocxPath = "" Then
errNumber = 5
errDescription = "Both source file path and target file path are missing."
Exit Function
End If
' Source file is missing
If sourceDocxPath <> "" Then
If Not fso.FileExists(sourceDocxPath) Then
errNumber = 5
errDescription = "Source file '" & sourceDocxPath & "' is missing."
Exit Function
End If
End If
' Target file is missing
If targetDocxPath <> "" Then
If Not fso.FileExists(targetDocxPath) Then
errNumber = 5
errDescription = "Target file '" & targetDocxPath & "' is missing."
Exit Function
End If
End If
Dim ret: ret = False
If sourceDocxPath <> "" And targetDocxPath = "" Then
pathsAutoCompleted = AutoCompleteDocxPaths(sourceDocxPath)
ret = pathsAutoCompleted
ElseIf sourceDocxPath = "" And targetDocxPath <> "" Then
pathsAutoCompleted = AutoCompleteDocxPaths(targetDocxPath)
ret = pathsAutoCompleted
Else
ret = True
End If
If ret = False Then
errNumber = 5
errDescription = "Source file or Target file is missing."
End If
GetReady = ret
End Function
Private Function AutoCompleteDocxPaths(filePath) ' As Boolean
Dim f ' As Scripting.File
Dim s ' As String
Dim search ' As String
Dim pos ' As Long
AutoCompleteDocxPaths = False
Set f = fso.GetFile(filePath)
s = f.ParentFolder.Path & "\" ' ex) "D:\omegat\myProject\source\"
search = "\source\"
pos = InStr(s & "\", search)
If pos > 0 Then
s = Replace(filePath, search, "\target\")
If fso.FileExists(s) Then
sourceDocxPath = filePath
targetDocxPath = s
AutoCompleteDocxPaths = True
Exit Function
End If
End If
search = "\target\"
pos = InStr(s & "\", search)
If pos > 0 Then
s = Replace(filePath, search, "\source\")
If fso.FileExists(s) Then
sourceDocxPath = s
targetDocxPath = filePath
AutoCompleteDocxPaths = True
Exit Function
End If
End If
End Function
End Class
'*********************************************************************
' DocxMerger class
'*********************************************************************
Class DocxMerger
Private settings ' As DocxMergerSettings
Private app ' As Word.Application
Private docSource ' As Word.Document
Private docTarget ' As Word.Document
Private mergeCompleted ' As Boolean
Private errNumber ' As Long
Private errDescription ' As String
Private Sub Class_Initialize()
Set settings = Nothing
Set app = CreateObject("Word.Application")
mergeCompleted = False
errNumber = 0
errDescription = ""
End Sub
Private Sub Class_Terminate()
Const DO_NOT_SAVE_CHANGES = False
Const wdDialogFileSaveAs = 84
If app Is Nothing Then
Exit Sub
ElseIf mergeCompleted Then
docSource.Close DO_NOT_SAVE_CHANGES
app.Visible = True
WScript.CreateObject("WScript.Shell").AppActivate app.ActiveWindow.Caption
app.Dialogs.Item(wdDialogFileSaveAs).Show
Else
app.Quit DO_NOT_SAVE_CHANGES
Set app = Nothing
End If
End Sub
' callable one-time only
Public Sub Init(objSettings)
Const READ_ONLY = True
If Not settings Is Nothing Then
Err.Raise vbObjectError + 1, TypeName(Me), "This object is already initialized."
End If
Set settings = objSettings
Set docSource = app.Documents.Open(settings.SourceFilePath)
Set docTarget = app.Documents.Open(settings.TargetFilePath)
End Sub
Public Function Validate() ' As Boolean
Dim sourceParaCount ' As Long
Dim targetParaCount ' As Long
sourceParaCount = docSource.Paragraphs.Count
targetParaCount = docTarget.Paragraphs.Count
Validate = (sourceParaCount = targetParaCount)
End Function
' return True if succeed, otherwise return False.
Public Function Merge() ' As Boolean
On Error Resume Next
errNumber = 0
errDescription = ""
Call MergeInternal
If Err.Number = 0 Then
Merge = True
Else
errNumber = Err.Number
errDescription = Err.Description
Err.Clear
Merge = False
End If
End Function
Public Property Get ErrorNumber() ' As Long
ErrorNumber = errNumber
End Property
Public Property Get ErrorDescription() ' As String
ErrorDescription = errDescription
End Property
Private Sub MergeInternal()
Dim sourceParaCount ' As Long
Dim targetParaCount ' As Long
Dim i ' As Long
Dim paraPair ' As ParagraphPair
sourceParaCount = docSource.Paragraphs.Count
targetParaCount = docTarget.Paragraphs.Count
If sourceParaCount > targetParaCount Then
i = sourceParaCount
Else
i = targetParaCount
End If
For i = i To 1 Step -1
Set paraPair = New ParagraphPair
paraPair.Init docSource.Paragraphs(i), docTarget.Paragraphs(i)
paraPair.Merge
Set paraPair = Nothing
Next
mergeCompleted = True
End Sub
Private Function GetPosTailNonControlChar(ByVal s) ' As Long
Const ASCII_CHAR_CODE_SPACE = 32
Dim i ' As Long
Dim code ' As Long
GetPosTailNonControlChar = 0
For i = Len(s) To 1 Step -1
code = AscW(Mid(s, i))
If code >= ASCII_CHAR_CODE_SPACE Then
' Non-control char found.
GetPosTailNonControlChar = i
Exit Function
End If
Next
End Function
End Class
'*********************************************************************
' ParagraphPair class
'*********************************************************************
Class ParagraphPair
Private paraSource ' As Word.Paragraph
Private paraTarget ' As Word.Paragraph
Private textSource ' As String
Private textTarget ' As String
Private textCleanedSource ' As String
Private textCleanedTarget ' As String
Private Sub Class_Initialize()
Set paraSource = Nothing
Set paraTarget = Nothing
End Sub
Private Sub Class_Terminate()
'
End Sub
' callable one-time only
Public Sub Init(sourceParagraph, targetParagraph)
If Not paraSource Is Nothing Or Not paraTarget Is Nothing Then
Err.Raise vbObjectError + 1, TypeName(Me), "This object is already initialized."
End If
Set paraSource = sourceParagraph
Set paraTarget = targetParagraph
textSource = paraSource.Range.Text
textTarget = paraTarget.Range.Text
textCleanedSource = Clean(textSource)
textCleanedTarget = Clean(textTarget)
End Sub
' target <- source
' return true if merge executed, otherwise retrn false.
Public Function Merge() ' As Boolean
Const wdCharacter = 1
Merge = False
If IsMergeable() Then
If IsListOutlineNumbering() And Right(textSource, 1) = vbCr Then
Dim r ' As Word.Range
Set r = paraSource.Range
r.MoveEnd wdCharacter, -1 ' replace carriage return
r.InsertAfter vbVerticalTab ' with vertical tab (soft-break without numbering)
paraTarget.Range.InsertBefore r
Else
paraTarget.Range.InsertBefore paraSource.Range
End If
Merge = True
End If
End Function
Private Function Clean(s) ' As String
Dim pos ' As Long
Clean = 0
pos = GetPosTailNonControlChar(s)
If pos > 0 Then
Clean = Left(s, pos)
End If
End Function
Private Function GetPosTailNonControlChar(s) ' As Long
Const ASCII_CHAR_CODE_SPACE = 32
Dim i ' As Long
Dim code ' As Long
GetPosTailNonControlChar = 0
For i = Len(s) To 1 Step -1
code = AscW(Mid(s, i))
If code >= ASCII_CHAR_CODE_SPACE Then
' Non-control char found.
GetPosTailNonControlChar = i
Exit Function
End If
Next
End Function
Private Function IsMergeable() ' As Boolean
IsMergeable = False
' is empty?
If Len(textCleanedSource) = 0 And Len(textCleanedTarget) = 0 Then
Exit Function
End If
' Do they have same contents?
If StrComp(textCleanedSource, textCleanedTarget, vbBinaryCompare) = 0 Then
Exit Function
End If
' OK, they are mergeable
IsMergeable = True
End Function
Private Function IsListOutlineNumbering() ' As Boolean
Const wdListOutlineNumbering = 4
Dim sourceOutlineNumbered ' As Boolean
Dim targetOutlineNumbered ' As Boolean
sourceOutlineNumbered = paraSource.Range.ListFormat.ListType = wdListOutlineNumbering
targetOutlineNumbered = paraTarget.Range.ListFormat.ListType = wdListOutlineNumbering
IsListOutlineNumbering = sourceOutlineNumbered And targetOutlineNumbered
End Function
End Class
'*********************************************************************
' This file should be saved in Unicode (UTF16-LE) after changing settings.
'*********************************************************************
Const OMEGAT_PROJECT_NAME = "your-project-folder-name"
Const WORD_DOCUMENT_NAME = "your-document.docx"
Const WINDOW_STYLE_HIDE = 0
Dim shell, parentDir, cmd, source, target
Set shell = WScript.CreateObject("WSCript.shell")
parentDir = GetParentDirName()
cmd = "CScript //nologo MergeDocs.vbs ""%source%"" ""%target%"""
source = parentDir & OMEGAT_PROJECT_NAME & "\source\" & WORD_DOCUMENT_NAME
target = parentDir & OMEGAT_PROJECT_NAME & "\target\" & WORD_DOCUMENT_NAME
cmd = Replace(cmd, "%source%", source)
cmd = Replace(cmd, "%target%", target)
shell.run cmd, WINDOW_STYLE_HIDE
Set shell = Nothing
Function GetParentDirName()
Dim scriptFullName: scriptFullName = WScript.ScriptFullName
Dim scriptName: scriptName = WScript.ScriptName
GetParentDirName = Left(scriptFullName, Len(scriptFullName) - Len(scriptName))
End Function
@yu-tang
Copy link
Author

yu-tang commented Sep 5, 2014

元スレはこちら。
https://groups.yahoo.com/neo/groups/OmegaT/conversations/topics/33190
操作対象のサンプルファイルは以下からダウンロード可能。
https://www.dropbox.com/s/dwwgfif6y7br741/Sample.docx?dl=0

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment