Skip to content

Instantly share code, notes, and snippets.

@vanatteveldt
Created January 31, 2014 21:21
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save vanatteveldt/8743409 to your computer and use it in GitHub Desktop.
Save vanatteveldt/8743409 to your computer and use it in GitHub Desktop.
from amcatclient.api import AmcatAPI
from amcat.nlp.naf import NAF_Article
from amcat.nlp.syntaxtree import _naf_to_rdf, SyntaxTree
from amcat.tools.pysoh.pysoh import SOHServer
from collections import defaultdict
import re
aid = 3958
api = AmcatAPI("http://amcat.vu.nl", "wva", "geheim!")
soh = SOHServer("http://localhost:3030/x")
ruleset = api.request("rulesets/5", page_size=999)
rules = api.request("rulesets/5/rules", page_size=999)['results']
labels = api.request("label", code__codebook_codes__codebook__id=ruleset['lexicon_codebook'], page_size=9999)['results']
codes = defaultdict(dict)
for label in labels:
codes[int(label['code'])][int(label['language'])] = label['label']
lexicon = defaultdict(set) # {lemma : {lexclass, ... }}
lexlang = int(ruleset['lexicon_language'])
for code, labels in codes.iteritems():
if lexlang not in labels: continue
lexclass = labels[min(labels)]
for lemma in set(re.findall("[\w*]+", labels[lexlang])):
lexicon[lemma].add(lexclass)
naf = api.request("projects/49/articlesets/4635/articles/{aid}/xtas/parse_alpino/".format(**locals()))['results']
naf = NAF_Article.from_dict(naf)
rdf = _naf_to_rdf(naf, 73)
t = SyntaxTree(soh, rdf)
t.apply_lexicon(lexicon)
rules = sorted(rules, key=lambda r: int(r['order']))
for rule in rules:
print "Applying rule {id}: {label}".format(**rule)
t.apply_rule(rule['where'], rule['insert'], rule['remove'])
g = t.visualise()
open("/tmp/bla.html", "w").write(g.getHTMLObject())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment