Skip to content

Instantly share code, notes, and snippets.

@pgtwitter
Last active June 27, 2024 02:31
Show Gist options
  • Save pgtwitter/a954df03f46f8532300480abcfd4cfa8 to your computer and use it in GitHub Desktop.
Save pgtwitter/a954df03f46f8532300480abcfd4cfa8 to your computer and use it in GitHub Desktop.
# %%
import stanza
import graphviz as gv
# %%
# https://qiita.com/ytaki0801/items/b8d7d4c778fe98966ce5
def p_lex(p):
for s in '()[]{}",':
p = p.replace(s, ' ' + s + ' ')
return p.split()
def p_syn(p):
t = p[0]
del p[0]
if t in '({["':
r = []
while p[0] != ')}]"'['({["'.find(t)]:
r += [p_syn(p)]
del p[0]
return r
else:
return t
# %%
# stanza.download('ja')
nlp = stanza.Pipeline('ja', model_dir='Stanza')
doc = nlp("バラク・オバマはハワイで生まれた。 2008年に大統領に就任。")
# doc.sentences[0].print_dependencies()
# %%
def print_node(node, cnt, leafs, results):
original_cnt = cnt
if (len(node) == 3):
results.append(f'\tn{cnt}'+'[label = "{'+f'{node[0]}'+'|{<pl>|<pr>}}"];')
results.append(f"\tn{original_cnt}:pl -> n{cnt+1}")
cnt = print_node(node[1], cnt+1, leafs, results)
results.append(f"\tn{original_cnt}:pr -> n{cnt+1}")
cnt = print_node(node[2], cnt+1, leafs, results)
else:
if (type(node[1]) == list):
results.append(f'\tn{cnt}'+'[label = "{'+f'{node[0]}'+'|{<pl>}}"];')
results.append(f"\tn{original_cnt}:pl -> n{cnt+1}")
cnt = print_node(node[1], cnt+1, leafs, results)
else:
leafs.append(f"n{cnt}")
results.append(f'\tn{cnt}'+'[label = "{'+f'{node[0]}|{node[1]}'+'}"];')
return cnt
def sentence2graph(idx, s):
root = p_syn(p_lex(str(s.constituency)))
leafs = []
dot = []
dot.append("digraph graphname {\n\tnode[shape=record];")
results = []
print_node(root, 0, leafs, results)
dot.extend(results)
dot.append("\t{rank = same;"+"; ".join(leafs)+"}\n}")
graph = gv.Source("\n".join(dot), filename=f"graph{idx}.gv", format="png")
return graph
# %%
sentence2graph(0, doc.sentences[0])
# %%
sentence2graph(1, doc.sentences[1])
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment