Last active
March 10, 2022 15:28
-
-
Save gibson042/4f40d357f66bb99c8777181ab2beb2bc to your computer and use it in GitHub Desktop.
highlight.awk - ANSI-format matching input (e.g. to supplement grep)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/awk -f | |
BEGIN { | |
USAGE="Usage: %s rule=<awk ERE pattern>/<ANSI SGR format>[.<extra format>]... [rule=...]..."; | |
EXAMPLE="Example: </etc/resolv.conf %s rule='#.*'/dim rule='([0-9]+[.]){3}[0-9]+'/bold.red"; | |
CMD=ENVIRON["_"]; | |
if(!CMD) CMD="$0"; | |
USAGE=sprintf(USAGE, CMD) "\n" sprintf(EXAMPLE, CMD); | |
} | |
function usage(msg, code) { | |
if(msg!="") print "[ERROR] " msg > "/dev/stderr"; | |
print USAGE > "/dev/stderr" | |
if(code) exit code; | |
# EX_USAGE, cf. http://man.openbsd.org/sysexits | |
exit 64; | |
} | |
BEGIN { | |
init_ansi(); | |
# Process arguments into global configuration: | |
# `PATTERNS` and `PATTERNS_MID` and `FORMATS`, each having `N` items. | |
N=0; | |
rule_prefix="rule="; | |
for(i=1; i<ARGC; i++) { | |
arg=ARGV[i]; | |
# Skip an argument that identifies a file (i.e., doesn't match keyword=value syntax). | |
if(!match(arg, "^[a-zA-Z_][[:alnum:]_]*=")) continue; | |
# Process a rule argument. | |
if(substr(arg, 1, length(rule_prefix))==rule_prefix) { | |
for(j=length(arg); j>0; j--) if(substr(arg, j, 1)=="/") break; | |
if(j==0) usage("missing keywords: " arg); | |
N++; | |
pattern=substr(arg, length(rule_prefix)+1, j-length(rule_prefix)-1); | |
PATTERNS[N]=pattern; | |
PATTERNS_MID[N]=""; | |
while(match(pattern, /[[]([\\].|[^\\])*[]]|[\\].|\^/)) { | |
if(RLENGTH>1) r=substr(pattern, RSTART, RLENGTH); | |
else r="$"; | |
PATTERNS_MID[N]=PATTERNS_MID[N] substr(pattern, 1, RSTART-1) r; | |
pattern=slicestr(pattern, RSTART+RLENGTH); | |
} | |
PATTERNS_MID[N]=PATTERNS_MID[N] pattern; | |
for(k=split(slicestr(arg, j+1), formats, "."); k>=1; k--) { | |
if(!(formats[k] in ANSI)) { | |
err_template="unknown format: %s\nvalid formats: %s"; | |
usage(sprintf(err_template, formats[k], ansi_keywords())); | |
} | |
FORMATS[N]=ANSI[formats[k]] FORMATS[N]; | |
} | |
ARGV[i]=""; | |
continue; | |
} | |
usage("unknown argument: " arg); | |
} | |
# Require at least one formatting argument | |
# so absence leads to iterative education. | |
if(N==0) usage(sprintf("missing argument %s...", rule_prefix)); | |
} | |
{ | |
# Apply replacements to each input line, | |
# always preferring the earliest match | |
# and the first applicable replacement for it. | |
rem=$0; | |
out=""; | |
match_count=0; | |
while(rem!="") { | |
L=length(rem); | |
# Start with a fake match past the end of input | |
# and successively check for an earlier nonempty match. | |
m_idx=L+1; | |
for(i=1; i<=N; i++) { | |
pattern=PATTERNS[i]; | |
if(match_count) pattern=PATTERNS_MID[i]; | |
if(match(rem, pattern) && RSTART<m_idx && RLENGTH>0) { | |
m_idx=RSTART; | |
m_len=RLENGTH; | |
m_fmt=FORMATS[i]; | |
} | |
} | |
# If no match was found, we're done with this line. | |
if(m_idx>L) break; | |
# Add the formatting and consume input up to the end of the match. | |
out=out slicestr(rem, 1, m_idx-1) m_fmt slicestr(rem, m_idx, m_len) ANSI["OFF"]; | |
rem=slicestr(rem, m_idx+m_len); | |
match_count++; | |
} | |
print out rem; | |
} | |
# Define a global `ANSI` containing SGR control sequences for use in configuring highlight rules. | |
# https://en.wikipedia.org/wiki/ANSI_escape_code#SGR | |
function init_ansi(__, CODES, n, kv, k, v) { | |
CODES="bold=1 dim=2 italic=3 underline=4 invert=7"; | |
CODES=CODES " black=30 red=31 green=32 yellow=33 blue=34 magenta=35 cyan=36 white=37"; | |
n=split("OFF=0 " CODES, sgr, " "); | |
for(; n>=1; n--) { | |
split(sgr[n], kv, "="); k=kv[1]; v=kv[2]; | |
ANSI[k]="\x1B[" v "m"; | |
# Add a background and bright (plus background) derivative for each color. | |
if(v>=30 && v<=37) { | |
ANSI[k "bg"]="\x1B[" (v+10) "m"; | |
if(k=="black") k="gray"; else k="bright" k; | |
ANSI[k]="\x1B[" (v+60) "m"; | |
ANSI[k "bg"]="\x1B[" (v+60+10) "m"; | |
} | |
} | |
} | |
function ansi_keywords(__, n, keyword,codes, codes_str) { | |
n=0; | |
for(keyword in ANSI) { | |
if(keyword=="OFF") continue; | |
for(i=++n; i>1; i--) { | |
if(keyword > codes[i-1]) break; | |
codes[i]=codes[i-1]; | |
} | |
codes[i]=keyword; | |
} | |
codes_str=""; | |
for(i=1; i<=n; i++) codes_str=codes_str " " codes[i]; | |
return slicestr(codes_str, 2); | |
} | |
function slicestr(s, i, n, __, L) { | |
L=length(s); | |
if(n=="") n=L; | |
# Truncate `n` to not exceed string length. | |
if(i+n>L+1) n=L+1-i; | |
# If the length is zero or the starting point is past the end, return empty. | |
if(n<1 || i>L) return ""; | |
return substr(s,i,n); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment