Skip to content

Instantly share code, notes, and snippets.

@0187773933
Last active June 14, 2020 23:07
Show Gist options
  • Save 0187773933/3e7189aa1db7038593916f61cd7abfbd to your computer and use it in GitHub Desktop.
Save 0187773933/3e7189aa1db7038593916f61cd7abfbd to your computer and use it in GitHub Desktop.
Microsoft Word Progress Bar on Word Count
import sys, re, os, argparse
from docx import Document
import subprocess as sub
import time
from tqdm import tqdm
from pprint import pprint
import re
# https://raw.githubusercontent.com/SyntaxOverflowByte/word_count/master/word_count.py
# python3 -m pip install docx
# python3 -m pip install python-docx
class WordCount:
def __init__( self , options={} ):
self.options = options
if "file_path" not in self.options:
print( "you have to pass a file_path" )
return False
if "word_count" not in self.options:
self.options["word_count"] = 500
if "time_interval" not in self.options:
self.options["time_interval"] = 3
self.bar = tqdm( total=int( self.options["word_count"] ) , desc="Word Count" )
#self.bar = tqdm( total=100 , desc="Word Count" )
self.checking = True
self.last_update_count = 0
self.start_check_interval()
def load_document( self ):
self.document = Document( self.options["file_path"] )
def get_word_count_update( self ):
words = []
for index , paragraph in enumerate( self.document.paragraphs ):
original_text = self.document.paragraphs[ index ].text
cleaned = re.sub( ' +' , ' ' , original_text )
cleaned_list = cleaned.split( " " )
words = words + cleaned_list
words = [ x for x in words if x ]
self.words = words
self.current_word_count = len( words )
self.percent_complete = ( ( self.current_word_count / self.options["word_count"] ) * 100 )
# print( f'Total Words = {self.current_word_count}' )
# print( f'Perecent Complete = {self.percent_complete}' )
if self.current_word_count != self.last_update_count:
self.bar = tqdm( total=int( self.options["word_count"] ) , desc="Word Count" )
self.bar.update( self.current_word_count )
self.bar.refresh()
self.last_update_count = self.current_word_count
def start_check_interval( self ):
while self.checking == True:
self.load_document()
self.get_word_count_update()
time.sleep( self.options["time_interval"] )
def stop_check_interval( self ):
self.checking = False
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Counts the words in a Microsoft Word document.')
parser.add_argument('input_file', action='store', type=str, help='Enter the name of the file you want to perform the word count on.')
parser.add_argument('word_count', action='store', type=int )
args = parser.parse_args()
word_counter = WordCount({
"file_path": args.input_file ,
"word_count": args.word_count
})
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment