Skip to content

Instantly share code, notes, and snippets.

@junjiah
Last active December 22, 2015 23:49
Show Gist options
  • Save junjiah/6549406 to your computer and use it in GitHub Desktop.
Save junjiah/6549406 to your computer and use it in GitHub Desktop.
extract causation frame's Cause and Effect role from FrameNet annotated corpus
from lxml import etree as ET
from itertools import chain
import sys
if len(sys.argv) > 1:
f = open(sys.argv[1])
else:
f = open('x.xml') # default xml file
# first, remove namespace
tree = ET.XML(f.read().replace(' xmlns=', ' xmlnamespace='))
f.close()
query = '''
//sentence[annotationSet[
@frameName="Causation"]/layer[
@name="FE" and
label[@name="Cause"] and
label[@name="Effect"]]]
'''
sens = tree.xpath(query)
check_frame = lambda x : x.get('frameName') == 'Causation'
for sen in sens:
text = sen.find('text').text
print "%-7s : %s" % ("TEXT", text)
for i, a in enumerate(filter(check_frame, sen)):
print "--- %d ---" % i
causes = a.iterfind('layer[@name="FE"]/label[@name="Cause"]')
effects = a.iterfind('layer[@name="FE"]/label[@name="Effect"]')
for item in chain(causes, effects):
s, e = item.get('start'), item.get('end')
if s and e:
print "%-7s : %s" % (item.get('name').upper(),
text[int(s):int(e)+1])
print
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment