Skip to content

Instantly share code, notes, and snippets.

@ogroleg
Last active August 29, 2015 14:23
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save ogroleg/df951cea0fe33e5e8356 to your computer and use it in GitHub Desktop.
Save ogroleg/df951cea0fe33e5e8356 to your computer and use it in GitHub Desktop.
merge docx files by Heading1 and Heading2
import glob
from docx import Document as Document
from docx.oxml.text.paragraph import CT_P
from docx.oxml.section import CT_SectPr
temp2=lambda x:x.style if x.__class__==CT_P else None
temp3=lambda x:x if x!='None' and x else ''
def proceed_doc(d):
global res
t = [temp2(x) for x in d]
curr_num, curr_ps = None, []
for x in range(len(t)):
if t[x] in ('Heading1', 'Heading2'):
if curr_num:
if curr_num in res:
raise ValueError("Key is already present:" + str(curr_num))
res[curr_num]=curr_ps
curr_num = get_num(''.join(temp3(y.text) for y in d[x].getchildren()))
curr_ps = [d[x]]
elif curr_num:
curr_ps.append(d[x])
if curr_num:
if curr_num in res:
raise ValueError("Key is already present:" + str(curr_num))
res[curr_num] = curr_ps
def get_num(e):
res, is_it = '', False
for x in e:
if x.isdigit():
is_it = True
res += x
elif is_it:
return int(res)
def main():
global res,r
d = [Document(x).element.body.getchildren() for x in glob.glob('*.docx') if x != 'res.docx']
res, r, i = {}, Document(), 1
for x in d:
proceed_doc(x)
for x in sorted(res.keys()):
for y in res[x]:
r.element.body.insert(i, y)
i += 1
r.save('res.docx')
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment