Skip to content

Instantly share code, notes, and snippets.

@wphicks
Created July 10, 2017 20:06
Show Gist options
  • Save wphicks/991a085174b45720a4255987c290546d to your computer and use it in GitHub Desktop.
Save wphicks/991a085174b45720a4255987c290546d to your computer and use it in GitHub Desktop.
A basic command line tool for determining word count of Open Document Text (.odt and .fodt) files
#!/usr/bin/env python3
import os
import sys
import argparse
import zipfile
import shutil
import tempfile
import xml.etree.ElementTree as ET
def count_fodt(filename):
root = ET.parse(filename).getroot()
return sum(len(text.split()) for text in root.itertext())
def count_odt(filename):
with tempfile.NamedTemporaryFile() as tmp_file:
with zipfile.ZipFile(filename) as odt_file:
with odt_file.open("content.xml") as content_file:
shutil.copyfileobj(content_file, tmp_file)
tmp_file.seek(0)
word_count = count_fodt(tmp_file.name)
return word_count
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description='Count words in Open Document text file'
)
parser.add_argument("filename", help="Name of odt or fodt file")
if len(sys.argv) == 1:
parser.print_help()
sys.exit(1)
args = parser.parse_args()
if os.path.splitext(args.filename)[1].lower() == ".fodt":
word_count = count_fodt(args.filename)
else:
word_count = count_odt(args.filename)
print(word_count)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment