Last active
January 20, 2021 20:27
-
-
Save oliver-batey/35f2f04cdc2170313a0620b09aa638a9 to your computer and use it in GitHub Desktop.
Part 1 of the factory method pattern
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
from docx import Document | |
class DocParser: | |
def parse(self,document): | |
parser = get_format(document) | |
return parser(document) | |
def get_format(document): | |
format = os.path.splitext(document)[-1] | |
return get_parser(format) | |
def get_parser(format): | |
if format == '.txt': | |
return parse_txt | |
elif format == '.docx': | |
return parse_docx | |
else: | |
raise ValueError(format) | |
def parse_txt(document): | |
with open(document, 'r') as file: | |
string = file.read().replace('\n', ' ') | |
return string | |
def parse_docx(document): | |
doc = Document(document) | |
string = '' | |
for para in doc.paragraphs: | |
string += f'{para.text} ' | |
return string |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment