Skip to content

Instantly share code, notes, and snippets.

@mohd-akram
Last active September 26, 2020 18:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mohd-akram/e0101f7d66cdaabbb60dbc6d5e48e58a to your computer and use it in GitHub Desktop.
Save mohd-akram/e0101f7d66cdaabbb60dbc6d5e48e58a to your computer and use it in GitHub Desktop.
A script to convert an ERE (extended regular expression) to one or more BREs
#!/usr/bin/env awk -f
# usage: ere2bre <ere>
function bracket( \
pattern, i, \
c, c2, term, len, end \
) {
++i # skip opening bracket
if (substr(pattern, i, 1) == "]") ++i # skip right bracket
len = length(pattern)
for (; i <= len; i++) {
c = substr(pattern, i, 1)
c2 = substr(pattern, i, 2)
if (c == "\\") ++i
else if (c2 == term) { term = ""; ++i }
else if (c2 == "[:" && !term) { term = ":]"; ++i }
else if (c2 == "[." && !term) { term = ".]"; ++i }
else if (c2 == "[=" && !term) { term = "=]"; ++i }
else if (c == "]" && !term) { end = i; break }
}
if (!end) {
print "Unbalanced brackets" > "/dev/stderr"
exit 1
}
return end
}
function groups( \
pattern, out, \
idx, start, len, i, c, depth \
) {
start = 1
len = length(pattern)
for (i = 1; i <= len; i++) {
c = substr(pattern, i, 1)
if (c == "\\") ++i
else if (c == "[") i = bracket(pattern, i)
else if (c == "(") {
if (!depth) {
out[++idx] = substr(pattern, start, i-start)
start = i
}
++depth
}
else if (c == ")") {
--depth
if (!depth) {
out[++idx] = substr(pattern, start, i-start+1)
start = i+1
}
}
}
out[++idx] = substr(pattern, start)
return idx
}
function alts( \
pattern, out, \
idx, start, len, i, c, depth \
) {
start = 1
len = length(pattern)
for (i = 1; i <= len; i++) {
c = substr(pattern, i, 1)
if (c == "\\") ++i
else if (c == "[") i = bracket(pattern, i)
else if (c == "(") ++depth
else if (c == ")") --depth
else if (c == "|" && !depth) {
out[++idx] = substr(pattern, start, i-start)
start = i+1
}
}
if (depth) {
print "Unbalanced parentheses" > "/dev/stderr"
exit 1
}
out[++idx] = substr(pattern, start)
return idx
}
function replace( \
pattern, \
new, len, i, c, end \
) {
len = length(pattern)
for (i = 1; i <= len; i++) {
c = substr(pattern, i, 1)
if (c == "\\") {
++i
c = substr(pattern, i, 1)
if ( \
c != "?" && c != "+" && c != "|" && \
c != "{" && c != "}" && c != "(" && c != ")" \
)
c = "\\" c
}
else if (c == "[") {
end = bracket(pattern, i)
c = substr(pattern, i, end-i+1)
i = end
}
else if (c == "?") c = "\\{0,1\\}"
else if (c == "+") c = "\\{1,\\}"
else if (c == "{") c = "\\{"
else if (c == "}") c = "\\}"
else if (c == "(") c = "\\("
else if (c == ")") c = "\\)"
new = new c
}
return new
}
function permutate( \
opts, lens, out, oidx, \
res, idx, len, i, s \
) {
len = lens[0]
if (!idx) idx = 1
if (idx > len) {
for (i = 1; i <= len; i++)
s = s res[i]
out[++oidx] = s
return oidx
}
for (i = 1; i <= lens[idx]; i++) {
res[idx] = opts[idx,i]
oidx = permutate(opts, lens, out, oidx, res, idx+1)
}
return oidx
}
function flatten( \
pattern, out, \
parts, idx, len, gs, gslen, wrap, ps, pslen, opts, lens, i, j, k \
) {
len = alts(pattern, parts)
for (i = 1; i <= len; i++) {
gslen = groups(parts[i], gs)
if (gslen == 1) {
out[++idx] = gs[1]
continue
}
for (j = 1; j <= gslen; j++) {
wrap = gs[j] ~ /^\(/
if (wrap)
gsub(/^\(|\)$/, "", gs[j])
pslen = flatten(gs[j], ps)
for (k = 1; k <= pslen; k++)
opts[j,k] = wrap ? "(" ps[k] ")" : ps[k]
lens[j] = k-1
}
lens[0] = j-1
idx = permutate(opts, lens, out, idx)
}
return idx
}
BEGIN {
len = flatten(ARGV[1], res)
while (++i <= len) print replace(res[i])
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment