Skip to content

Instantly share code, notes, and snippets.

Created June 19, 2014 05:36
Show Gist options
  • Save copyleftdev/5b3d9c0c11ab7cd67940 to your computer and use it in GitHub Desktop.
Save copyleftdev/5b3d9c0c11ab7cd67940 to your computer and use it in GitHub Desktop.
docx generator
# pip install lxml PIL
from docx import *
import os
import tarfile
import time
import shutil
fileSeedName = 'clusterTesting_50k_set_'
dataStorageDirectory = 'data'
timeStamp = int(time.time())
tarFilename = fileSeedName + str(timeStamp) + ".tar.gz"
relationships = relationshiplist()
appprops = appproperties()
contenttypes = contenttypes()
websettings = websettings()
wordrelationships = wordrelationships(relationships)
#Create data directory if it does not exists
if not os.path.exists(dataStorageDirectory):
#File Generator
for x in range(0,100000):
document = newdocument()
body = document.xpath('/w:document/w:body', namespaces=nsprefixes)[0]
#Append two headers and a paragraph
body.append(heading("Encasereview Clustering Test Document",1))
body.append(heading("Legal Car Cat <= Search Vectors",2))
body.append(paragraph('This file was generated using python, no need to COM, .NET, Java. Keeping it simple since 90s :).'))
title = 'Cluster Testing Document'
subject = 'A test file generated by SQA Don Johnson (Written in Python)'
creator = 'Don Johnson'
keywords = ['cluster','testing','python']
coreprops = coreproperties(title=title, subject=subject, creator=creator, keywords=keywords)
#save document
savedocx(document,coreprops, appprops, contenttypes, websettings, wordrelationships,'data/{}_{}.docx'.format(fileSeedName,x))
tar ="{}".format(tarFilename),"w:gz")
tar.add("data/", arcname="{}".format(tarFilename))
#remove Data Directory
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment