Skip to content

Instantly share code, notes, and snippets.

@Sirsirious
Last active February 5, 2020 17:22
Show Gist options
  • Save Sirsirious/214b9ea2f2ce16e89004a0ee8192184c to your computer and use it in GitHub Desktop.
Save Sirsirious/214b9ea2f2ce16e89004a0ee8192184c to your computer and use it in GitHub Desktop.
The main parts of our structure.
class Document:
def __init__(self, document_text):
self.raw = document_text
self.sentences = sentencize(self.raw)
self._index = 0
#[...]
class Sentence:
def __init__(self, start_position, end_position, raw_document_reference):
self.start_pos = int(start_position)
self.end_pos = int(end_position)
self._document_string = raw_document_reference
self.next_sentence = None
self.previous_sentence = None
self.tokens = tokenize(self._document_string[self.start_pos:self.end_pos])
self._index = 0
#[...]
class Token:
def __init__(self, start_position, end_position, raw_sentence_reference, SOS = False, EOS = False):
self.start_pos = int(start_position)
self.end_pos = int(end_position)
self._sentence_string = raw_sentence_reference
self.next_token = None
self.previous_token = None
self.SOS = SOS
self.EOS = EOS
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment