Skip to content

Instantly share code, notes, and snippets.

@LeopoldTal
Last active October 3, 2020 18:32
Show Gist options
  • Save LeopoldTal/92e89ea3d01857b327b27a64fb37d00c to your computer and use it in GitHub Desktop.
Save LeopoldTal/92e89ea3d01857b327b27a64fb37d00c to your computer and use it in GitHub Desktop.
Match words inside phone numbers

This example coding interview asks:

On a phone keypad, each letter corresponds to a digit.

A phone keypad, where digit 2 corresponds to letters A, B, C

This means that phone numbers can contain English words, e.g. 1-(800)-356-9377 contains flowers.

Given a phone number, and a list of words, return the words contained in the phone number.

get_new_trie_node = lambda parent, parent_label, letter: {
'letter': letter,
'label': parent_label + letter,
'is_goal': False,
'children': {},
'parent': parent
}
def add_needle(sub_trie, needle):
if needle == '':
sub_trie['is_goal'] = True
return
letter = needle[0]
children = sub_trie['children']
if letter not in children:
children[letter] = get_new_trie_node(
sub_trie,
sub_trie['label'],
letter
)
add_needle(children[letter], needle[1:])
class MatcherTrie:
def __init__(self, needles):
self.trie = self.build_matcher_trie(needles)
def build_matcher_trie(self, needles):
trie = self.build_prefix_trie(needles)
self.add_failure_links(trie)
self.add_goal_sets(trie)
return trie
def build_prefix_trie(self, needles):
trie = get_new_trie_node(None, '', '')
for needle in needles:
add_needle(trie, needle)
return trie
def add_failure_links(self, trie_root):
trie_root['fail'] = trie_root
open_list = list(trie_root['children'].values())
while open_list:
node = open_list.pop(0)
letter = node['letter']
parent = node['parent']
if parent == trie_root:
node['fail'] = trie_root
else:
suffix_parent = parent['fail']
found = False
while not found:
if letter in suffix_parent['children']:
node['fail'] = suffix_parent['children'][letter]
found = True
elif suffix_parent == trie_root:
node['fail'] = trie_root
found = True
else:
suffix_parent = suffix_parent['fail']
open_list.extend(list(node['children'].values()))
def add_goal_sets(self, trie_root):
trie_root['goals'] = set()
open_list = [ trie_root ]
while open_list:
node = open_list.pop(0)
node['goals'] = node['fail']['goals'].copy()
if node['is_goal']:
node['goals'].add(node['label'])
open_list.extend(list(node['children'].values()))
def get_matches(self, haystack):
matching_needles = set()
trie_root = self.trie
sub_trie = trie_root
for letter in haystack:
done = False
while not done:
if letter in sub_trie['children']:
sub_trie = sub_trie['children'][letter]
done = True
elif sub_trie == trie_root:
done = True
else:
sub_trie = sub_trie['fail']
matching_needles.update(sub_trie['goals'])
return matching_needles
def multimatch(haystack, needles):
matcher_trie = MatcherTrie(needles)
return matcher_trie.get_matches(haystack)
from matcher import multimatch
def normalise_phone_number(raw_phone_number):
return ''.join(char for char in raw_phone_number if '0' <= char <= '9')
PHONE_PAD_DICT = {
'a': 2, 'b': 2, 'c': 2,
'd': 3, 'e': 3, 'f': 3,
'g': 4, 'h': 4, 'i': 4,
'j': 5, 'k': 5, 'l': 5,
'm': 6, 'n': 6, 'o': 6,
'p': 7, 'q': 7, 'r': 7, 's': 7,
't': 8, 'u': 8, 'v': 8,
'w': 9, 'x': 9, 'y': 9, 'z': 9
}
def letters_to_digits(word):
return ''.join(
str(PHONE_PAD_DICT[letter])
for letter in word.lower()
if letter in PHONE_PAD_DICT
)
def phone_pad_match(raw_phone_number, words_to_match):
phone_number = normalise_phone_number(raw_phone_number)
if not phone_number:
raise ValueError('Phone number is blank')
if not words_to_match:
raise ValueError('List of words to match is empty')
if not all(words_to_match):
raise ValueError('Word to match is blank')
words_by_digits = {}
for word in words_to_match:
as_digits = letters_to_digits(word)
if as_digits not in words_by_digits:
words_by_digits[as_digits] = []
words_by_digits[as_digits].append(word)
digit_substrings = multimatch(phone_number, words_by_digits.keys())
return set(
word
for match in digit_substrings
for word in words_by_digits[match]
)
import pytest
from phonepad import (
normalise_phone_number,
letters_to_digits,
multimatch,
phone_pad_match
)
test_phone_numbers = [
('3662277', '3662277'),
(' 253 6368 ', '2536368'),
('1-(800)-356-9377', '18003569377')
]
@pytest.mark.parametrize('raw_phone_number,expected', test_phone_numbers)
def test_normalise_phone_number(raw_phone_number, expected):
assert normalise_phone_number(raw_phone_number) == expected
test_words = [
('flowers', '3569377'),
('clement', '2536368'),
('clemdot', '2536368'),
('CLEM DOT!', '2536368'),
]
@pytest.mark.parametrize('word,expected', test_words)
def test_letters_to_digits(word, expected):
assert letters_to_digits(word) == expected
test_matches = [
('clement', [ 'a', 'b', 't', 'm'], ['t', 'm']),
('dog', ['d', 'do', 'dog', 'dot'], ['d', 'do', 'dog']),
('flowers', ['flower', 'lower', 'flour'], ['flower', 'lower']),
('flower', ['lowers', 'flower', 'lower', 'flour'], ['flower', 'lower']),
('flower', ['flowers', 'low', 'flow', 'flo'], ['low', 'flow', 'flo']),
('foobar', ['foo', 'bar'], ['foo', 'bar']),
('baz', ['bar'], []),
('otheater', ['theater', 'other'], ['theater']),
(
'abccab',
['a', 'ab', 'bab', 'bc', 'bca', 'c', 'caa'],
['a', 'ab', 'bc', 'c']
)
]
@pytest.mark.parametrize('haystack,needles,expected', test_matches)
def test_multimatch(haystack, needles, expected):
matched = multimatch(haystack, needles)
assert matched == set(expected)
test_needles = (
'flowers', 'clement', 'clemdot',
'foo', 'bar', 'baz', 'foobar', 'emo', 'cap', 'car', 'cat'
)
test_phone_pad_matches = [
('1-(800)-356-9377', ['flowers']),
('253 6368', ['clement', 'clemdot']),
('3662277', ['foo', 'bar', 'foobar', 'emo', 'cap', 'car'])
]
class TestPhonePadMatch:
@pytest.mark.parametrize('raw_phone_number,expected', test_phone_pad_matches)
def test_phone_pad_match(self, raw_phone_number, expected):
matched = phone_pad_match(raw_phone_number, test_needles)
assert matched == set(expected)
def test_empty_phone_number(self):
with pytest.raises(ValueError):
phone_pad_match('', ['foo'])
def test_no_needles(self):
with pytest.raises(ValueError):
phone_pad_match('1-(800)-356-9377', [])
def test_empty_needle(self):
with pytest.raises(ValueError):
phone_pad_match('1-(800)-356-9377', ['foo', '', 'bar'])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment