Skip to content

Instantly share code, notes, and snippets.

@vu3jej
Last active March 29, 2023 23:55
Show Gist options
  • Save vu3jej/a46eb3d18aa7d8c808af8b8ca4df06a4 to your computer and use it in GitHub Desktop.
Save vu3jej/a46eb3d18aa7d8c808af8b8ca4df06a4 to your computer and use it in GitHub Desktop.
COLOUR NAME EXTRACTION USING SPACY
import spacy
class ColourExtractorStrict:
"""Extract colours along with adjectives"""
def __init__(self, colours):
self.colours = colours
self.pos_ok = ['ADJ', 'NOUN']
self.tagger = spacy.load('en')
def get(self, string):
extracted = set()
doc = self.tagger(string.lower())
pairs = [(word.text, word.pos_) for word in doc]
for index, pair in enumerate(pairs):
text, pos = pair
if text in self.colours:
text_ahead = self.look_ahead(pairs=pairs, index=index)
text_behind = self.look_behind(pairs=pairs, index=index,
colour_pos=pos)
if text_behind:
text_behind.append(text)
if text_ahead:
text_behind.extend(text_ahead)
extracted.add(' '.join(text_behind))
else:
extracted.add(' '.join(text_behind))
elif text_ahead:
extracted.add(' '.join([text] + text_ahead))
else:
extracted.add(text)
return extracted if extracted else False
def look_ahead(self, pairs, index):
ahead = list()
for text, pos in pairs[index + 1:]:
if pos in self.pos_ok:
ahead.append(text)
else:
break
return ahead if ahead else False
def look_behind(self, pairs, index, colour_pos):
behind = list()
for text, pos in reversed(pairs[:index]):
if pos in self.pos_ok:
behind.append(text)
else:
break
return list(reversed(behind)) if behind else False
from colour_extractor import ColourExtractorStrict
colours = ['blue', 'pink', 'lavender', 'heather']
extractor = ColourExtractorStrict(colours=colours)
string = 'Available in a variety of colors, including bold blue heather, ebony, jazzberry pink heather, light steel, navy heather, new frosty lavender, plum port or slate heather'
extractor.get(string=string)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment