EnkrateiaLucca/chunking.py

## chunking.py
import spacy
sp = spacy.load("en_core_web_sm")

sentence = "Only accept truths that are absolutely evident"
#Tokenizing the sentence
sentence_token = word_tokenize(sentence)
# Tagging the tokens of the sentence
sentence_tagged = nltk.pos_tag(sentence_token)
print(sentence_tagged)
# Stablishing a pattern to give to the chunker
pattern =   r"""
                VP: {<ADJ_SIM><V_PRS>}
                VP: {<ADJ_INO><V.*>}
                VP: {<V_PRS><N_SING><V_SUB>}
                NP: {<N_SING><ADJ.*><N_SING>}
                NP: {<N.*><PRO>}
                VP: {<N_SING><V_.*>}
                VP: {<V.*>+}
                NP: {<ADJ.*>?<N.*>+ <ADJ.*>?}
                DNP: {<DET><NP>}
                PP: {<ADJ_CMPR><P>}
                PP: {<ADJ_SIM><P>}
                PP: {<P><N_SING>}
                PP: {<P>*}
                DDNP: {<NP><DNP>}
                NPP: {<PP><NP>+}
            """
# Chunking
chunker = nltk.RegexpParser(r'{}'.format(pattern))
chunker.parse(sentence_tagged)
Output = chunker.parse(sentence_tagged)
Output.draw()
	import spacy
	sp = spacy.load("en_core_web_sm")

	sentence = "Only accept truths that are absolutely evident"
	#Tokenizing the sentence
	sentence_token = word_tokenize(sentence)
	# Tagging the tokens of the sentence
	sentence_tagged = nltk.pos_tag(sentence_token)
	print(sentence_tagged)
	# Stablishing a pattern to give to the chunker
	pattern = r"""
	VP: {<ADJ_SIM><V_PRS>}
	VP: {<ADJ_INO><V.*>}
	VP: {<V_PRS><N_SING><V_SUB>}
	NP: {<N_SING><ADJ.*><N_SING>}
	NP: {<N.*><PRO>}
	VP: {<N_SING><V_.*>}
	VP: {<V.*>+}
	NP: {<ADJ.>?<N.>+ <ADJ.*>?}
	DNP: {<DET><NP>}
	PP: {<ADJ_CMPR><P>}
	PP: {<ADJ_SIM><P>}
	PP: {<P><N_SING>}
	PP: {<P>*}
	DDNP: {<NP><DNP>}
	NPP: {<PP><NP>+}
	"""
	# Chunking
	chunker = nltk.RegexpParser(r'{}'.format(pattern))
	chunker.parse(sentence_tagged)
	Output = chunker.parse(sentence_tagged)
	Output.draw()