Skip to content

Instantly share code, notes, and snippets.

@oliver-batey
Last active January 20, 2021 20:27
Show Gist options
  • Save oliver-batey/35f2f04cdc2170313a0620b09aa638a9 to your computer and use it in GitHub Desktop.
Save oliver-batey/35f2f04cdc2170313a0620b09aa638a9 to your computer and use it in GitHub Desktop.
Part 1 of the factory method pattern
import os
from docx import Document
class DocParser:
def parse(self,document):
parser = get_format(document)
return parser(document)
def get_format(document):
format = os.path.splitext(document)[-1]
return get_parser(format)
def get_parser(format):
if format == '.txt':
return parse_txt
elif format == '.docx':
return parse_docx
else:
raise ValueError(format)
def parse_txt(document):
with open(document, 'r') as file:
string = file.read().replace('\n', ' ')
return string
def parse_docx(document):
doc = Document(document)
string = ''
for para in doc.paragraphs:
string += f'{para.text} '
return string
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment