Skip to content

Instantly share code, notes, and snippets.

@wookietreiber
Created August 5, 2020 10:08
Show Gist options
  • Save wookietreiber/69f1c9954371eaa783d1a47c362c8cee to your computer and use it in GitHub Desktop.
Save wookietreiber/69f1c9954371eaa783d1a47c362c8cee to your computer and use it in GitHub Desktop.
search for pattern in column in XSV
#!/bin/bash
set \
-o errexit \
-o pipefail \
-o noglob \
-o nounset \
app=$(basename "$0" .sh)
# -----------------------------------------------------------------------------
# usage
# -----------------------------------------------------------------------------
# default options
default_delimiter=','
function usage { cat << EOF
$app
USAGE
$app [options] column=needle...
DESCRIPTION
Find needles in columns in XSV formatted input.
Note: Assumes that XSV formatted input contains a header!
ARGUMENTS
column=needle search for needle in column
(exact matching!)
OPTIONS
--delim=x delimiter aka (field) separator
default: '$default_delimiter'
-?, --help shows this help text
EXAMPLES
check if Spectrum Scale filesystem is write-mounted
mmlsmount gpfs95 -L -Y | $app --delim=: nodeName=node001 mountMode=RW
EOF
}
# -----------------------------------------------------------------------------
# options
# -----------------------------------------------------------------------------
delimiter=$default_delimiter
for arg in "$@"
do
case "$arg" in
-\?|--help)
usage
exit
;;
--delim=*)
delimiter=${arg##--delim=}
shift
;;
--)
# begin of positional arguments (explicit)
shift
break
;;
-*)
echo "$app: unrecognized option: $arg" >&2
usage >&2
exit 2
;;
*)
# begin of positional arguments (implicit)
break
;;
esac
done
# -----------------------------------------------------------------------------
# arguments
# -----------------------------------------------------------------------------
columns=()
needles=()
for arg in "$@"
do
case "$arg" in
*=*)
IFS='=' read -r column needle <<< "$arg"
columns+=("$column")
needles+=("$needle")
;;
*)
echo "$app: argument format needs to be: column=needle" >&2
usage >&2
exit 2
;;
esac
done
# bail out if there are no arguments
if [[ -z "${columns[*]}" || -z "${needles[*]}" ]]
then
echo "$app: no arguments given" >&2
usage >&2
exit 2
fi
# -----------------------------------------------------------------------------
# app
# -----------------------------------------------------------------------------
awk -F "$delimiter" -v columns="${columns[*]}" -v needles="${needles[*]}" '
BEGIN {
# found is our exit status
found = 1
# split columns and needles (again)
split(columns, columns_a, " ")
split(needles, needles_a, " ")
# build up searches
# - columns are the array keys
# - needles are the array values
for (i in columns_a) {
searches[columns_a[i]] = needles_a[i]
}
}
# first line has header
NR == 1 {
# associate header name with column number
for (i=1; i <= NF; i++) {
header[$i] = i
}
}
# for all lines but header ...
NR > 1 {
line_found = 0
# ... go through each of the searches
# ... if any do not match
for (column in searches) {
if ($(header[column]) != searches[column]) {
line_found = 1
break
}
}
if (line_found == 0) {
# ... if found stop processing
found = 0
exit
}
}
END {
exit found
}
'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment