I hereby claim:
- I am jtauber on github.
- I am jtauber (https://keybase.io/jtauber) on keybase.
- I have a public key ASBKXRd38Pg3fZKJJkLQ9TG3sLxxs17UAcX1zhjDiL3cpQo
To claim this, I am signing this object:
I hereby claim:
To claim this, I am signing this object:
license: mit |
VARIA = "\u0300" | |
OXIA = "\u0301" | |
PERISPOMENI = "\u0342" | |
ACCENTS = [VARIA, OXIA, PERISPOMENI] | |
def strip_accents(s): | |
return unicodedata.normalize("NFKC", "".join( | |
c for c in return unicodedata.normalize("NFD", s) if c not in ACCENTS |
### strip specific accents | |
def strip_accents(w): | |
return unicodedata.normalize("NFC", "".join( | |
ch | |
for ch in unicodedata.normalize("NFD", w) | |
if ch not in ["\u0300", "\u0301", "\u0342"] | |
)) | |
# Opens the file with the given filename for reading and puts the resultant | |
# file object in the variable `f`. | |
f = open("OCR Output linebreaks removed.txt") | |
# `f.read()` reads the file and returns a string. | |
# `.split()` splits that string on whitespace and returns a list of strings. | |
# `for A in B:` iterates over the list B and runs the indented block with each | |
# list item in the variable A. | |
for token in f.read().split(): |
#!/usr/bin/env python3 | |
import argparse | |
import collections | |
import glob | |
parser = argparse.ArgumentParser(description="count (and optionally list) the entries where the determinant columns do not functionally determine the dependent columns.") | |
parser.add_argument("-v", "--verbose", help="output full results", action="store_true") | |
parser.add_argument("determinant", help="comma-separated list of columns") | |
parser.add_argument("dependent", help="comma-separated list of columns") |
#!/usr/bin/env python | |
import sys | |
import unicodedata | |
with open(sys.argv[1]) as f: | |
for line in f: | |
sys.stdout.write(unicodedata.normalize("NFKC", line.decode("utf-8")).encode("utf-8")) |
Rev 7.7
ἐκ φυλῆς Λευὶ δώδεκα χιλιάδες,
Rev 7.5
ἐκ φυλῆς Ἰούδα δώδεκα χιλιάδες ἐσφραγισμένοι,
#!/usr/bin/env python3 | |
from collections import defaultdict | |
from pysblgnt import morphgnt_rows | |
count_by_item = defaultdict(int) | |
total_item_count = 0 | |
for book_num in range(1, 28): |