Last active
June 30, 2016 10:31
-
-
Save myedibleenso/0c6a5c99070b506992c6343375cd7ebd to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This defines how our entities and events are related to one another semantically | |
taxonomy: | |
- ExpandedEntity: | |
- Entity: | |
- Nominal: | |
- Noun | |
- PossiblePerson: | |
- Person | |
- Organization | |
- Location | |
- Date | |
- Unknown | |
- Verb | |
- Subject | |
- Object | |
- Triple: | |
- TripleWithProperty: | |
- HasDate | |
- HasLocation | |
- HasAssociation | |
# here are our rules | |
# these are just meant to serve as an example | |
# there are many ways they could be improved | |
rules: | |
- name: "ner-unknown" | |
label: Unknown | |
priority: 1 | |
# a token pattern describes a sequence of tokens | |
type: token | |
pattern: | | |
# inspect the sequence of named entity labels | |
# produced by the named entity recognizer | |
[entity="MISC"]+ | |
- name: "ner-person" | |
label: Person | |
priority: 1 | |
type: token | |
pattern: | | |
[entity="PERSON"]+ | |
- name: "possible-person" | |
label: PossiblePerson | |
priority: 1 | |
type: token | |
pattern: | | |
[tag=/^N/]* [tag=/^N/ & outgoing="cop"] [tag=/^N/]* | |
- name: "ner-location" | |
label: Location | |
priority: 1 | |
type: token | |
pattern: | | |
[entity="LOCATION"]+ | |
- name: "ner-org" | |
label: Organization | |
priority: 1 | |
type: token | |
pattern: | | |
[entity="ORGANIZATION"]+ | |
- name: "ner-date" | |
label: Date | |
priority: 1 | |
type: token | |
pattern: | | |
[entity="DATE"]+ | |
- name: subj | |
label: Subject | |
priority: 1 | |
type: token | |
# instead of assuming the tokens are words, | |
# we can assume they describe PoS tags | |
# by changing the value of unit to "tag" | |
unit: "tag" | |
pattern: | | |
/^N/* [incoming=/^nsubj/] /^N/* | |
- name: obj | |
label: Object | |
priority: 1 | |
type: token | |
unit: "tag" | |
pattern: | | |
/^N/* [incoming=/obj/] /^N/* | |
- name: noun | |
label: Noun | |
priority: 1 | |
type: token | |
pattern: | | |
# inspect the PoS tags | |
# find a seq of 1 or more nouns | |
# the tokens should not already be part of an Entity mention | |
[tag=/^N/ & !mention=/./]+ | |
- name: verb | |
label: Verb | |
priority: 1 | |
type: token | |
unit: "tag" | |
pattern: | | |
# inspect the PoS tags | |
# find a seq of 1 or more verbs | |
/^V/+ | |
- name: "expanded-entity" | |
label: ExpandedEntity | |
example: "Democratic primary" | |
priority: 2 | |
type: token | |
pattern: | | |
# some entity | |
# followed by 1 or more nouns or adjectives | |
[tag=/^(J|N)/]* @Nominal [tag=/^(J|N)/]* | |
# | |
# Rules for filling triples | |
# | |
- name: "triples-1" | |
label: Triple | |
priority: 10 | |
example: "HE then DEFEATED Republican NOMINEE JOHN MCCAIN in the general election" | |
# this pattern describes a syntactic configuration as a traversal over a | |
# syntactic dependency graph | |
# the traversal begins at some trigger (a predicate) | |
type: dependency | |
pattern: | | |
trigger = @Verb | |
# traversal is in relation to the trigger | |
# NOTE: outgoing relation is optionally prefixed with > | |
# named arguments have an associated type | |
# this is a semantic constraint on the final node (token) in the traversal | |
subject:ExpandedEntity = (<xcomp|<cop)? (nsubj|nsubjpass) | |
object:ExpandedEntity = (dobj|<cop) | |
- name: "triples-copula" | |
label: Triple | |
example: "HE WAS a US SENATOR" | |
priority: 10 | |
type: dependency | |
pattern: | | |
trigger = @Verb | |
# a cop represent the "be" verb (is, was, am, are, etc.) | |
subject:ExpandedEntity = <cop /^nsubj/ | |
object:ExpandedEntity = <cop | |
- name: "triples-against" | |
label: Triple | |
example: "HE RAN in 2012 against X" | |
priority: 10 | |
type: dependency | |
pattern: | | |
trigger = @Verb | |
subject:ExpandedEntity = nsubj | |
object:ExpandedEntity = prep_in prep_against | |
- name: "triples-passive" | |
label: Triple | |
example: "HE then defeated Republican nominee John McCain in the general election, and WAS INAGURATED as PRESIDENT" | |
priority: 10 | |
type: dependency | |
pattern: | | |
trigger = @Verb | |
# subject of a passive construction | |
subject:ExpandedEntity = nsubjpass | |
# some preposition, and end on an Entity that isn't a Date | |
object:ExpandedEntity = /^prep_/ [!mention=Date] | |
# | |
# Rules for associating properties with triples | |
# | |
# We can try to find a date for some triple | |
- name: "occurred-at" | |
label: HasLocation | |
priority: 20 | |
example: "EVENT in Chicago" | |
type: dependency | |
pattern: | | |
triple:Triple | |
location:Location = /^prep_/+ | |
# We can try to find a date for some triple | |
- name: "occurred-when" | |
label: HasDate | |
priority: 20 | |
example: "EVENT sometime between 2014 and 2016" | |
type: dependency | |
pattern: | | |
triple:Triple | |
date:Date = /^prep_/+ | |
# Look for associations with organizations | |
- name: "is-associated" | |
label: HasAssociation | |
priority: 20 | |
example: "A is a B of C" | |
type: dependency | |
pattern: | | |
triple:Triple | |
organization:Organization = /^prep_/ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment