Created
August 14, 2020 09:14
-
-
Save akash-ch2812/eced44d8e8a4140c1ddc1e90a4e49405 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from spacy.matcher import PhraseMatcher | |
from scipy import spatial | |
# method for searching keyword from the text | |
def search_for_keyword(keyword, doc_obj, nlp): | |
phrase_matcher = PhraseMatcher(nlp.vocab) | |
phrase_list = [nlp(keyword)] | |
phrase_matcher.add("Text Extractor", None, *phrase_list) | |
matched_items = phrase_matcher(doc_obj) | |
matched_text = [] | |
for match_id, start, end in matched_items: | |
text = nlp.vocab.strings[match_id] | |
span = doc_obj[start: end] | |
matched_text.append(span.sent.text) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment