Blog 2019/9/1
<- previous | index | next ->
Trying to work out what patterns would be used for a regex-based lexer for clojure.
It appears to work: https://regex101.com/r/Qs0gLH/4
Note: this doesn't include recognizing keywords.
see also:
Blog 2019/9/1
<- previous | index | next ->
Trying to work out what patterns would be used for a regex-based lexer for clojure.
It appears to work: https://regex101.com/r/Qs0gLH/4
Note: this doesn't include recognizing keywords.
see also:
$ ./symbol-regex.py | |
([a-zA-Z\.\*+!\-_?$%&=<>][a-zA-Z0-9\.\*+!\-_?$%&=<>]+\/[a-zA-Z\.\*+!\-_?$%&=<>][a-zA-Z0-9\.\*+!\-_?$%&=<>]+)|([a-zA-Z\.\*+!\-_?$%&=<>][a-zA-Z0-9\.\*+!\-_?$%&=<>]+\/[a-zA-Z\.\*+!\-_?$%&=<>\/])|([a-zA-Z\.\*+!\-_?$%&=<>]\/[a-zA-Z\.\*+!\-_?$%&=<>][a-zA-Z0-9\.\*+!\-_?$%&=<>]+)|([a-zA-Z\.\*+!\-_?$%&=<>]\/[a-zA-Z\.\*+!\-_?$%&=<>\/])|([a-zA-Z\.\*+!\-_?$%&=<>][a-zA-Z0-9\.\*+!\-_?$%&=<>]+)|([a-zA-Z\.\*+!\-_?$%&=<>\/]) |
#!/usr/bin/env python | |
# a program to generate the regex for a symbol in clojure. | |
""" | |
character classes: | |
- `a` (alpha): `[a-zA-Z]` | |
- `n` (num): `[0-9]` | |
- `s` special: `[\.\*+!\-_?$%&=<>]` | |
- `l` slash: `\/` | |
- `c` colon: `:` | |
a symbol can be: | |
- multi-char prefix, multi-char name: `[as][ans]+l[as][ans]+` | |
- multi-char prefix, single-char name: `[as][ans]+l[asl]` | |
- single-char prefix, multi-char name: `[as]l[as][ans]+` | |
- single-char prefix, single-char name: `[as]l[asl]` | |
- multi-char name: `[as][ans]+` | |
- single-char name: `[asl]` | |
""" | |
import sys | |
# character classes: | |
a = "a-zA-Z" | |
n = "0-9" | |
s = "\.\*+!\-_?$%&=<>" | |
l = "\/" | |
# a symbol can be: | |
sys.stdout.write("(") | |
# a multi-character prefix, multi-character name: `[as][ans]+l[as][ans]+` | |
sys.stdout.write("[%s%s][%s%s%s]+%s[%s%s][%s%s%s]+" % (a,s, a,n,s, l, a,s, a,n,s)) | |
# or | |
sys.stdout.write(")|(") | |
# a multi-character prefix, single-character name: `[as][ans]+l[asl]` | |
sys.stdout.write("[%s%s][%s%s%s]+%s[%s%s%s]" % (a,s, a,n,s, l, a,s,l)) | |
# or | |
sys.stdout.write(")|(") | |
# a single-character prefix, multi-character name: `[as]l[as][ans]+` | |
sys.stdout.write("[%s%s]%s[%s%s][%s%s%s]+" % (a,s, l, a,s, a,n,s)) | |
# or | |
sys.stdout.write(")|(") | |
# a single-character prefix, single-character name: `[as]l[asl]` | |
sys.stdout.write("[%s%s]%s[%s%s%s]" % (a,s, l, a,s,l)) | |
# or | |
sys.stdout.write(")|(") | |
# a multi-character name: `[as][ans]+` | |
sys.stdout.write("[%s%s][%s%s%s]+" % (a,s, a,n,s)) | |
# or | |
sys.stdout.write(")|(") | |
# a single-character name: `[asl]` | |
sys.stdout.write("[%s%s%s]" % (a, s, l)) | |
sys.stdout.write(")\n") |