Skip to content

Instantly share code, notes, and snippets.

@dmesquita
Created March 26, 2017 20:53
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dmesquita/06af52be57d292a18b1db341a6a7eb3d to your computer and use it in GitHub Desktop.
Save dmesquita/06af52be57d292a18b1db341a6a7eb3d to your computer and use it in GitHub Desktop.

spaCy tour

by Déborah Mesquita

import spacy
nlp = spacy.load('en')
raw_text = "Seven years after the death of his wife, Mill was invited to contest Westminster. His feeling on the conduct of elections made him refuse to take any personal action in the matter, and he gave the frankest expression to his political views, but nevertheless he was elected by a large majority. He was not a conventional success in the House; as a speaker he lacked magnetism. But his influence was widely felt."
parsedData = nlp(raw_text)
word = parsedData[0]
print(word.text, word.lower_)
Seven seven
for i,word in enumerate(parsedData):
    print(word.text, word.pos_, word.tag_)
    if i > 5:
        break
    
Seven NUM CD
years NOUN NNS
after ADP IN
the DET DT
death NOUN NN
of ADP IN
his ADJ PRP$
word = parsedData[10] #a palavra 'was'
print("original:",word.text)
print("POS tag:",word.pos_)
print("fine grainned POS tag:", word.tag_)
# VBD significa: VerbForm=fin Tense=past
original: was
POS tag: VERB
fine grainned POS tag: VBD
for word in parsedData:
    if word.ent_type_:
        print(word.text, word.ent_type_, word.pos_, word.tag_)
Seven DATE NUM CD
years DATE NOUN NNS
Mill PERSON PROPN NNP
Westminster PERSON PROPN NNP
House ORG PROPN NNP
for word in parsedData:
    print(word.text, word.dep_)
Seven nummod
years nsubjpass
after prep
the det
death pobj
of prep
his poss
wife pobj
, punct
Mill appos
was auxpass
invited ROOT
to aux
contest xcomp
Westminster dobj
. punct
His poss
feeling nsubj
on prep
the det
conduct pobj
of prep
elections pobj
made ROOT
him dobj
refuse ccomp
to aux
take xcomp
any det
personal amod
action dobj
in prep
the det
matter pobj
, punct
and cc
he nsubj
gave conj
the det
frankest amod
expression dobj
to prep
his poss
political amod
views pobj
, punct
but cc
nevertheless advmod
he nsubjpass
was auxpass
elected conj
by agent
a det
large amod
majority pobj
. punct
He nsubj
was ROOT
not neg
a det
conventional amod
success attr
in prep
the det
House pobj
; punct
as prep
a det
speaker pobj
he nsubj
lacked relcl
magnetism dobj
. punct
But cc
his poss
influence nsubjpass
was auxpass
widely advmod
felt ROOT
. punct
from spacy.attrs import DEP
from spacy.matcher import Matcher
matcher = Matcher(nlp.vocab)
# http://universaldependencies.org/en/dep/nsubj.html
matcher.add_pattern("SubObjDireto", [ {DEP:'nsubj'}])

doc = nlp(raw_text)
matches = matcher(doc)

for ent_id, label, start, end in matcher(doc):
    print(doc[start:end].text)
feeling
he
He
he
my,dog,and_,cat,and__,horse = nlp(u'my dog and cat and horse')
print(cat.similarity(dog))
print(cat.similarity(horse))
print(dog.similarity(horse))
0.801685428714
0.484733507195
0.624627638895
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment