Skip to content

Instantly share code, notes, and snippets.

Created February 12, 2020 07:16
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
Star You must be signed in to star a gist
What would you like to do?
class Candidate:
def __init__(self, name, algorithm): = name
self.algorithm = algorithm
def as_dict(self):
return {
"algorithm": self.algorithm
def __repr__(self):
return f"<Candidate(name={ })>"
class DummyExtractor:
def __init__(self):
def extract(self, sentence):
return [Candidate(name="HOGE", algorithm="dummy"), Candidate(name="FUGA", algorithm="dummy")]
def __repr__(self):
return self.__class__.__name__
from abc import ABC, abstractmethod
class Postprocessor(ABC):
def process(self, candidates):
def __repr__(self):
return self.__class__.__name__
class StopwordsFilter(Postprocessor):
def __init__(self):
self.stop_words = {"HOGE"}
def process(self, candidates):
return [cand for cand in candidates if not in self.stop_words]
import unicodedata
from abc import ABC, abstractmethod
class Preprocessor(ABC):
def process(self, text):
def __repr__(self):
return self.__class__.__name__
class UnicodeNormalizer(Preprocessor):
def process(self, sentences):
return [unicodedata.normalize("NFKC", sentence) for sentence in sentences]
from logzero import logger
from extractor import DummyExtractor
from preprocessor import UnicodeNormalizer
from postprocessor import StopwordsFilter
from worker import ExtractorWorker
if __name__ == "__main__":
extractor = ExtractorWorker(description="sample")
import json
from logzero import logger
class ExtractorWorker:
def __init__(self, description=""):
self.preprocessors = []
self.postprocessors= []
self.extractors = []
self._description = description
def add_preprocessor(self, preprocessor):
def add_postprocessor(self, postprocessor):
def add_extractor(self, extractor):
def extract(self, text):
sentences = self._preprocess([text])
candidates = self._extract(sentences)
logger.debug("Candidates:" + json.dumps([cand.as_dict() for cand in candidates], ensure_ascii=False))
return self._post_process(candidates)
def _preprocess(self, sentences):
logger.debug(json.dumps(sentences, ensure_ascii=False))
for processor in self.preprocessors:
sentences = processor.process(sentences)
logger.debug(str(processor) + ":" + json.dumps(sentences, ensure_ascii=False))
return sentences
def _extract(self , sentences):
extracted = []
for extractor in self.extractors:
for sentence in sentences:
return extracted
def _post_process(self, candidates):
for postprocessor in self.postprocessors:
candidates = postprocessor.process(candidates)
logger.debug(str(postprocessor) + ":" + json.dumps([cand.as_dict() for cand in candidates], ensure_ascii=False))
return candidates
def get_steps(self):
return {
"Description": self._description,
"Preprocessing": [str(step) for step in self.preprocessors],
"Extractors": [str(step) for step in self.extractors],
"Postprocessing": [str(step) for step in self.postprocessors]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment