This is a literate doctest. Run
python -m doctest -v examples.rst
to test.
>>> from vocab_utils import group_tokens_by >>> from gnt_data import ChunkType, TokenType
BOOKS = [ | |
'MAT', | |
'MRK', | |
'LUK', | |
'JHN', | |
'ACT', | |
'ROM', | |
'1CO', | |
'2CO', | |
'GAL', |
from collections import Counter, defaultdict | |
from pysblgnt import morphgnt_rows | |
from greek_normalisation.utils import nfc | |
import sys | |
def get_data(): | |
data = defaultdict(dict) | |
books = range(1,28) | |
for i in books: | |
for row in morphgnt_rows(i): |
import sys | |
import re | |
import mistletoe | |
def read_to_dict(fpath, encoding='UTF-8'): | |
lines = {} | |
with open(fpath, 'r', encoding=encoding) as file: | |
for line in file: | |
num, content = line.split(' ', maxsplit=1) |
from greek_normalisation.utils import nfc, strip_last_accent_if_two, grave_to_acute | |
import re | |
from ordering import next_best | |
import sys | |
import glob | |
import json | |
from pysblgnt import morphgnt_rows | |
from collections import defaultdict | |
# filename-section OR filename-chapter.section (for longer works, i.e. shepherd) | |
013-shepherd.txt-22.0 | |
013-shepherd.txt-19.0 | |
013-shepherd.txt-20.0 | |
013-shepherd.txt-21.0 | |
013-shepherd.txt-24.0 | |
013-shepherd.txt-12.0 | |
013-shepherd.txt-4.0 | |
013-shepherd.txt-9.0 | |
013-shepherd.txt-3.0 |
from greek_normalisation.utils import nfc,grave_to_acute | |
def layout_paradigm(forms, row_identifiers, column_idenfiers): | |
#""" forms: [(form, parse)] | |
#... idenfiers = [lambda x: -> Boolean] | |
#""" | |
output = [] | |
for rfn in row_identifiers: | |
row = [] | |
for cfn in column_idenfiers: |
from gnt_data import get_tokens, TokenType | |
from collections import defaultdict, namedtuple | |
from tabulate import tabulate | |
from statistics import mean, median | |
gnt_lemmas = get_tokens(TokenType.lemma) | |
data = defaultdict(list) |
from gnt_data import ChunkType, TokenType, get_tokens, get_tokens_by_chunk | |
from collections import Counter | |
def get_stats(token_type): | |
ALL_GNT = get_tokens(token_type) | |
BOOKS = get_tokens_by_chunk(token_type, ChunkType.book) |
from pathlib import Path | |
from greek_normalisation.utils import nfc | |
from greek_accentuation.characters import add_breathing, Breathing,Accent, add_diacritic | |
import re | |
import sys | |
FIND = [x for x in "α ε η υ ο ω i".split(' ') if x] | |
DIPTH = [x for x in "ει αι οι ου αυ ευ ηυ".split(' ') if x] |