Skip to content

Instantly share code, notes, and snippets.

@gibson042
Last active March 10, 2022 15:28
Show Gist options
  • Save gibson042/4f40d357f66bb99c8777181ab2beb2bc to your computer and use it in GitHub Desktop.
Save gibson042/4f40d357f66bb99c8777181ab2beb2bc to your computer and use it in GitHub Desktop.
highlight.awk - ANSI-format matching input (e.g. to supplement grep)
#!/usr/bin/awk -f
BEGIN {
USAGE="Usage: %s rule=<awk ERE pattern>/<ANSI SGR format>[.<extra format>]... [rule=...]...";
EXAMPLE="Example: </etc/resolv.conf %s rule='#.*'/dim rule='([0-9]+[.]){3}[0-9]+'/bold.red";
CMD=ENVIRON["_"];
if(!CMD) CMD="$0";
USAGE=sprintf(USAGE, CMD) "\n" sprintf(EXAMPLE, CMD);
}
function usage(msg, code) {
if(msg!="") print "[ERROR] " msg > "/dev/stderr";
print USAGE > "/dev/stderr"
if(code) exit code;
# EX_USAGE, cf. http://man.openbsd.org/sysexits
exit 64;
}
BEGIN {
init_ansi();
# Process arguments into global configuration:
# `PATTERNS` and `PATTERNS_MID` and `FORMATS`, each having `N` items.
N=0;
rule_prefix="rule=";
for(i=1; i<ARGC; i++) {
arg=ARGV[i];
# Skip an argument that identifies a file (i.e., doesn't match keyword=value syntax).
if(!match(arg, "^[a-zA-Z_][[:alnum:]_]*=")) continue;
# Process a rule argument.
if(substr(arg, 1, length(rule_prefix))==rule_prefix) {
for(j=length(arg); j>0; j--) if(substr(arg, j, 1)=="/") break;
if(j==0) usage("missing keywords: " arg);
N++;
pattern=substr(arg, length(rule_prefix)+1, j-length(rule_prefix)-1);
PATTERNS[N]=pattern;
PATTERNS_MID[N]="";
while(match(pattern, /[[]([\\].|[^\\])*[]]|[\\].|\^/)) {
if(RLENGTH>1) r=substr(pattern, RSTART, RLENGTH);
else r="$";
PATTERNS_MID[N]=PATTERNS_MID[N] substr(pattern, 1, RSTART-1) r;
pattern=slicestr(pattern, RSTART+RLENGTH);
}
PATTERNS_MID[N]=PATTERNS_MID[N] pattern;
for(k=split(slicestr(arg, j+1), formats, "."); k>=1; k--) {
if(!(formats[k] in ANSI)) {
err_template="unknown format: %s\nvalid formats: %s";
usage(sprintf(err_template, formats[k], ansi_keywords()));
}
FORMATS[N]=ANSI[formats[k]] FORMATS[N];
}
ARGV[i]="";
continue;
}
usage("unknown argument: " arg);
}
# Require at least one formatting argument
# so absence leads to iterative education.
if(N==0) usage(sprintf("missing argument %s...", rule_prefix));
}
{
# Apply replacements to each input line,
# always preferring the earliest match
# and the first applicable replacement for it.
rem=$0;
out="";
match_count=0;
while(rem!="") {
L=length(rem);
# Start with a fake match past the end of input
# and successively check for an earlier nonempty match.
m_idx=L+1;
for(i=1; i<=N; i++) {
pattern=PATTERNS[i];
if(match_count) pattern=PATTERNS_MID[i];
if(match(rem, pattern) && RSTART<m_idx && RLENGTH>0) {
m_idx=RSTART;
m_len=RLENGTH;
m_fmt=FORMATS[i];
}
}
# If no match was found, we're done with this line.
if(m_idx>L) break;
# Add the formatting and consume input up to the end of the match.
out=out slicestr(rem, 1, m_idx-1) m_fmt slicestr(rem, m_idx, m_len) ANSI["OFF"];
rem=slicestr(rem, m_idx+m_len);
match_count++;
}
print out rem;
}
# Define a global `ANSI` containing SGR control sequences for use in configuring highlight rules.
# https://en.wikipedia.org/wiki/ANSI_escape_code#SGR
function init_ansi(__, CODES, n, kv, k, v) {
CODES="bold=1 dim=2 italic=3 underline=4 invert=7";
CODES=CODES " black=30 red=31 green=32 yellow=33 blue=34 magenta=35 cyan=36 white=37";
n=split("OFF=0 " CODES, sgr, " ");
for(; n>=1; n--) {
split(sgr[n], kv, "="); k=kv[1]; v=kv[2];
ANSI[k]="\x1B[" v "m";
# Add a background and bright (plus background) derivative for each color.
if(v>=30 && v<=37) {
ANSI[k "bg"]="\x1B[" (v+10) "m";
if(k=="black") k="gray"; else k="bright" k;
ANSI[k]="\x1B[" (v+60) "m";
ANSI[k "bg"]="\x1B[" (v+60+10) "m";
}
}
}
function ansi_keywords(__, n, keyword,codes, codes_str) {
n=0;
for(keyword in ANSI) {
if(keyword=="OFF") continue;
for(i=++n; i>1; i--) {
if(keyword > codes[i-1]) break;
codes[i]=codes[i-1];
}
codes[i]=keyword;
}
codes_str="";
for(i=1; i<=n; i++) codes_str=codes_str " " codes[i];
return slicestr(codes_str, 2);
}
function slicestr(s, i, n, __, L) {
L=length(s);
if(n=="") n=L;
# Truncate `n` to not exceed string length.
if(i+n>L+1) n=L+1-i;
# If the length is zero or the starting point is past the end, return empty.
if(n<1 || i>L) return "";
return substr(s,i,n);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment