Instantly share code, notes, and snippets.

@funktor /age_range_features.py Secret
Last active Oct 14, 2018

Embed
What would you like to do?
def get_word_features(word, word_id):
word_lower = word.lower()
return [word_id + '=' + word_lower,
word_id + '.is_digit=%s' % word_lower.isdigit(),
word_id + '.has_digit=%s' % (bool(re.search('\d+', word_lower))),
word_id + '.is_number=%s' % (bool(re.match(r'\b\d+\b', word_lower))),
word_id + '.has_plus=%s' % (word_lower[-1] == '+'),
word_id + '.is_range=%s' % (bool(re.match(r'\b[0-9]+-[0-9]+\b', word_lower))),
word_id + '.is_age=%s' % (word_lower[:3] == 'age'),
word_id + '.is_age_group=%s' % (word_lower in ['kid', 'child', 'newborn', 'adult', 'teen', 'toddler', 'tween', 'baby', 'infant', 'senior']),
word_id + '.is_numeric=%s' % (word_lower in ['one','two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine', 'ten'])]
def word2features(sentence, pos):
features = get_word_features(sentence[pos], 'curr_word')
if pos > 0:
features += get_word_features(sentence[pos - 1], 'prev_word')
else:
features.append('BOS')
if pos < len(sentence) - 1:
features += get_word_features(sentence[pos + 1], 'next_word')
else:
features.append('EOS')
return features
def sent2features(sent):
return [word2features(sent, i) for i in range(len(sent))]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment