Skip to content

Instantly share code, notes, and snippets.

@willismonroe
Last active June 3, 2018 05:44
Show Gist options
  • Save willismonroe/e3dbc9ba0ee834befae82fb641535783 to your computer and use it in GitHub Desktop.
Save willismonroe/e3dbc9ba0ee834befae82fb641535783 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
import json, pprint
class ORACC_text_reader:
def __init__(self, json_string, DEBUG=False):
self.DEBUG = DEBUG
self.data = json.loads(json_string)
try:
for node in self.data['cdl'][0]['cdl']:
if 'cdl' in node.keys():
self.text = node['cdl'][0]['cdl']
except:
if self.DEBUG: pprint.pprint(self.data)
def output_translit(self, with_line_headers=True):
output = []
line = 'o' if with_line_headers else ''
for node in self.text:
if node['node'] == 'd' and 'label' in node.keys():
output.append(line)
line = node['label'] if with_line_headers else ''
elif node['node'] == 'l':
line += ' ' + node['frag']
output.append(line)
return output
def output_norm(self, with_line_headers=True):
output = []
line = 'o' if with_line_headers else ''
for node in self.text:
if node['node'] == 'd' and 'label' in node.keys():
output.append(line)
line = node['label'] if with_line_headers else ''
elif node['node'] == 'l':
if 'norm' in node['f'].keys():
line += ' ' + node['f']['norm']
else:
line += ' ' + node ['f']['form']
output.append(line)
return output
def output_sense(self, with_line_headers=True):
output = []
line = 'o' if with_line_headers else ''
for node in self.text:
if node['node'] == 'd' and 'label' in node.keys():
output.append(line)
line = node['label'] if with_line_headers else ''
elif node['node'] == 'l':
if 'sense' in node['f'].keys():
line += ' ' + node['f']['sense']
else:
line += ' ' + node ['f']['form']
output.append(line)
return output
def output_cuneiform(self, with_line_headers=True):
output = []
line = 'o' if with_line_headers else ''
for node in self.text:
if node['node'] == 'd' and 'label' in node.keys():
output.append(line)
line = node['label'] if with_line_headers else ''
elif node['node'] == 'l':
translit = node['frag']
gdl = node['f']['gdl']
if len(gdl) == 1:
# single gdl
sign = ''
if 'group' in gdl[0].keys():
group = gdl[0]['group']
for el in group:
if 'gdl_utf8' in el.keys():
sign += el['gdl_utf8']
else:
sign += el['seq'][0]['gdl_utf8']
else:
sign = gdl[0]['gdl_utf8']
if self.DEBUG: print("Single gdl ✓ {} = {}".format(translit, sign))
line += ' ' + sign
elif len(gdl) > 1:
sign = ''
for el in gdl:
if 'gdl_utf8' in el.keys():
sign += el['gdl_utf8']
elif 'seq' in el.keys():
for seq in el['seq']:
sign += seq['gdl_utf8']
elif 'group' in el.keys():
for el2 in el['group']:
sign += el2['gdl_utf8']
else:
if self.DEBUG: print("Error with multi-gdl ✗ {} = {}".format(translit, sign))
pprint.pprint(gdl)
if self.DEBUG: print("Multi-gdl ✓ {} = {}".format(translit, sign))
line += ' ' + sign
else:
if self.DEBUG: print("Can't process ✗")
pprint.pprint(node)
print()
output.append(line)
return output
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@willismonroe
Copy link
Author

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment