Skip to content

Instantly share code, notes, and snippets.

@cindygis
Created September 29, 2015 08:53
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save cindygis/46d31815451e19836776 to your computer and use it in GitHub Desktop.
Save cindygis/46d31815451e19836776 to your computer and use it in GitHub Desktop.
Writes the contents of all rows from multiple tables in a Word document to a spreadsheet.
import docx
import xlwt
doc = r"C:\Some\Arb\Folder\input.docx"
xls = r"C:\Some\Arb\Folder\output.xls"
document = docx.Document(doc)
book = xlwt.Workbook()
cur_sheet = book.add_sheet("Tables")
row_num = 0
tables = document.tables # Get all the tables in the docx
# Get the header row from the 1st table's 1st row
for index, cell in enumerate(tables[0].rows[0].cells):
cur_sheet.write(row_num, index, cell.text)
for table in tables:
for row in table.rows[1:]: # Skip the repeating header row of each table
row_num += 1
for index, cell in enumerate(row.cells):
if cell != '':
cur_sheet.write(row_num, index, cell.text.strip())
book.save(xls)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment