Skip to content

Instantly share code, notes, and snippets.

@itsmunim
Last active August 12, 2016 17:40
Show Gist options
  • Save itsmunim/dcb497b63b0f85d698b1bc21b41476a5 to your computer and use it in GitHub Desktop.
Save itsmunim/dcb497b63b0f85d698b1bc21b41476a5 to your computer and use it in GitHub Desktop.
# def ngrams(tokens, n):
# num_tokens = len(tokens)
# if n > num_tokens:
# return
# ngram_list = []
# for i in xrange(num_tokens):
# token_groups = []
# for j in xrange(i, i+n):
# if j < num_tokens:
# token_groups.append(tokens[j])
# ngram_list.append(token_groups)
# return ngram_list
_map = {
('A'): 'X1',
('B'): 'X2',
('C'): 'X3',
('D'): 'X4',
('E'): 'X5',
('A', 'B'): 'Y1',
('B', 'C', 'D'): 'Y2',
('C', 'D', 'F'): 'Y3',
('C', 'D', 'G'): 'Y4'
}
def get_similar_sequence_keys(_map, sequence):
similar_sequence_keys = []
for k in _map:
if ''.join(k).startswith(''.join(sequence)):
similar_sequence_keys.append(k)
return similar_sequence_keys
def run():
import re
token_regex = re.compile(r'\S+')
tokens = token_regex.findall(raw_input().strip())
token_length = len(tokens)
output = []
index = 0
while index < len(tokens):
token = tokens[index]
similar_sequence_keys = sorted(get_similar_sequence_keys(_map, token), reverse=True)
if not similar_sequence_keys:
break
for sequence_key in similar_sequence_keys:
longest_sequence_length = len(sequence_key)
last_index_limit = index + longest_sequence_length
token_sequence_list = tokens[index:last_index_limit]
if len(token_sequence_list) == 1:
token_tuple = token_sequence_list[0]
else:
token_tuple = tuple(token_sequence_list)
if token_tuple in similar_sequence_keys:
output.append(_map.get(token_tuple))
index = last_index_limit
break
return output
if __name__ == "__main__":
output = run()
print output
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment