Skip to content

Instantly share code, notes, and snippets.

@cellularmitosis
Last active October 15, 2019 07:26
Show Gist options
  • Save cellularmitosis/62bda6c440ecca6589c6992e243ce708 to your computer and use it in GitHub Desktop.
Save cellularmitosis/62bda6c440ecca6589c6992e243ce708 to your computer and use it in GitHub Desktop.
Puzzle: possible decodings of a digit sequence

Blog 2019/9/18

<- previous | index | next ->

Puzzle: possible decodings of a digit sequence

For our puzzles guild at work this week, we chose this puzzle: https://www.geeksforgeeks.org/count-possible-decodings-given-digit-sequence/

I took the approach of using a regex-based tokenizer to solve the puzzle, as I've been using those a lot lately while working on lisp interpreters. However, this tokenizer has been modified to produce all possible tokenizations.

The advantage with this approach is that you can change the token definitions without changing any of the code.

#!/usr/bin/env python
import re
# tokendefs = [
# ("A", re.compile("1")),
# ("B", re.compile("2")),
# ("C", re.compile("3")),
# ("D", re.compile("4")),
# ("E", re.compile("5")),
# ("D", re.compile("6")),
# ("G", re.compile("7")),
# ("H", re.compile("8")),
# ("I", re.compile("9")),
# ("J", re.compile("10")),
# ("K", re.compile("11")),
# ("L", re.compile("12")),
# ("M", re.compile("13")),
# ("N", re.compile("14")),
# ("O", re.compile("15")),
# ("P", re.compile("16")),
# ("Q", re.compile("17")),
# ("R", re.compile("18")),
# ("S", re.compile("19")),
# ("T", re.compile("20")),
# ("U", re.compile("21")),
# ("V", re.compile("22")),
# ("W", re.compile("23")),
# ("X", re.compile("24")),
# ("Y", re.compile("25")),
# ("Z", re.compile("26"))
# ]
def load_tokendefs(path):
tokendefs = []
with open(path) as fd:
lines = [line.rstrip() for line in fd.readlines()]
i = 0
while i < len(lines):
label = lines[i]
regex = re.compile(lines[i+1])
tokendefs.append( (label, regex) )
i += 2
return tokendefs
tokendefs = load_tokendefs("tokendefs.txt")
def branching_tokenize(text, offset=0):
"""Return all possible tokenizations."""
branches = []
for label, regex in tokendefs:
m = regex.match(text, offset)
if m:
matched_text = m.group(0)
token = (label, matched_text)
branch = [token]
branches.append(branch)
deep_branches = []
for branch in branches:
token = branch[0]
matched_text = token[1]
sub_branches = branching_tokenize(text, offset + len(matched_text))
if sub_branches == []:
deep_branches.append(branch)
else:
for sub_branch in sub_branches:
deep_branches.append(branch + sub_branch)
return deep_branches
def test(s, verbose=False):
if verbose:
from pprint import pprint
print "input:", s
paths = branching_tokenize(s)
for path in paths:
print " output:", ''.join([token[0] for token in path])
print " tokens:", path
else:
from pprint import pprint
print "%s:" % s, [''.join([token[0] for token in path]) for path in branching_tokenize(s)]
if __name__ == "__main__":
inputs = ["1", "11", "111", "1111", "1234"]
import sys
if "--verbose" in sys.argv:
if len(sys.argv) > 2:
inputs = sys.argv[2:]
for s in inputs:
test(s, verbose=True)
print
else:
if len(sys.argv) > 1:
inputs = sys.argv[1:]
for s in inputs:
test(s)
print
$ ln -sf tokendefs1.txt tokendefs.txt
$ ./alphaparse.py
1: ['A']
11: ['AA', 'K']
111: ['AAA', 'AK', 'KA']
1111: ['AAAA', 'AAK', 'AKA', 'KAA', 'KK']
1234: ['ABCF', 'AWF', 'LCF']
$ ./alphaparse.py --verbose
input: 1
output: A
tokens: [('A', '1')]
input: 11
output: AA
tokens: [('A', '1'), ('A', '1')]
output: K
tokens: [('K', '11')]
input: 111
output: AAA
tokens: [('A', '1'), ('A', '1'), ('A', '1')]
output: AK
tokens: [('A', '1'), ('K', '11')]
output: KA
tokens: [('K', '11'), ('A', '1')]
input: 1111
output: AAAA
tokens: [('A', '1'), ('A', '1'), ('A', '1'), ('A', '1')]
output: AAK
tokens: [('A', '1'), ('A', '1'), ('K', '11')]
output: AKA
tokens: [('A', '1'), ('K', '11'), ('A', '1')]
output: KAA
tokens: [('K', '11'), ('A', '1'), ('A', '1')]
output: KK
tokens: [('K', '11'), ('K', '11')]
input: 1234
output: ABCF
tokens: [('A', '1'), ('B', '2'), ('C', '3'), ('F', '4')]
output: AWF
tokens: [('A', '1'), ('W', '23'), ('F', '4')]
output: LCF
tokens: [('L', '12'), ('C', '3'), ('F', '4')]
$ ln -sf tokendefs2.txt tokendefs.txt
$ ./alphaparse.py --verbose moo
input: moo
output: MOO
tokens: [('MOO', 'moo')]
output: MOO
tokens: [('M', 'm'), ('O', 'o'), ('O', 'o')]
output: MOO
tokens: [('M', 'm'), ('OO', 'oo')]
$ ./alphaparse.py --verbose moooo
input: moooo
output: MOO
tokens: [('MOO', 'moooo')]
output: MOOOO
tokens: [('M', 'm'), ('O', 'o'), ('O', 'o'), ('O', 'o'), ('O', 'o')]
output: MOOOO
tokens: [('M', 'm'), ('O', 'o'), ('O', 'o'), ('OO', 'oo')]
output: MOOOO
tokens: [('M', 'm'), ('O', 'o'), ('OO', 'oo'), ('O', 'o')]
output: MOOOO
tokens: [('M', 'm'), ('OO', 'oo'), ('O', 'o'), ('O', 'o')]
output: MOOOO
tokens: [('M', 'm'), ('OO', 'oo'), ('OO', 'oo')]
$ ln -sf tokendefs3.txt tokendefs.txt
$ ./alphaparse.py --verbose 4242
input: 4242
output: FourTwoFourTwo
tokens: [('Four', '4'), ('Two', '2'), ('Four', '4'), ('Two', '2')]
output: FourTwoEureka!
tokens: [('Four', '4'), ('Two', '2'), ('Eureka!', '42')]
output: Eureka!FourTwo
tokens: [('Eureka!', '42'), ('Four', '4'), ('Two', '2')]
output: Eureka!Eureka!
tokens: [('Eureka!', '42'), ('Eureka!', '42')]
A
1
B
2
C
3
F
4
E
5
F
6
G
7
H
8
I
9
J
10
K
11
L
12
M
13
N
14
O
15
P
16
Q
17
R
18
S
19
T
20
U
21
V
22
W
23
X
24
Y
25
Z
26
MOO
mo*
M
m
O
o
OO
oo
Four
4
Two
2
Eureka!
42
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment