MrDOS/mock.awk

## mock.awk
# In retaliation to https://blog.theodo.fr/2018/03/regex-warrior/.
#
# Usage:
#
#     awk -v FS=';' -v OFS=';' -f mock.awk <mock.csv

# “NR” is the number of the record we're operating on; effectively a sequential
# counter. We don't want to filter on the first row, which is the header.
NR > 1 && $7 == "false" {
    next;
}

# awk patterns can check for regex matches on a field. We'll use that to remove
# users with old IDs.
NR > 1 && $2 ~ /^[0-9]{5}$/ {
    next;
}

# No pattern means “just do this action for each record”.
{
    # To remove the 6th column (“admin”), we'll shift all subsequent column
    # values to the left by one.
    for (i = 6; i < NF; i++) {
        $i = $(i + 1);
    }
    # And we'll decrement the number of fields in the record so `print` does
    # the right thing.
    NF--;

    # To make the “url” field relative, we'll use a regex. Regexes are probably
    # the right tool to use in this case. Fortunately, awk has pretty good
    # support built in. However, because the pattern literal requires the use
    # of forward slashes as delimiters, we do have to escape the forward
    # slashes in the pattern.
    sub(/.*:\/\/[^/]+\//, "", $5);
}

# We have two ways of splitting the name: we could either do the inverse of
# removing a column (shift subsequent columns to the right to make room for the
# new value), or we could do like the sed original did and just replace the
# separation character with the output field separator. Let's be lame.
#
# We'll split this into two patterns: one to replace the header, and one to
# split the values.
#
# Note that we're using the `OFS` variable here, not a semicolon literal: this
# leaves us the flexibility of changing our output separator later.
NR == 1 {
    $1 = "first name" OFS "last name";
}

NR > 1 {
    sub(/ /, OFS, $1);
}

# Print out the record.
{
    print;
}
	# In retaliation to https://blog.theodo.fr/2018/03/regex-warrior/.
	#
	# Usage:
	#
	# awk -v FS=';' -v OFS=';' -f mock.awk <mock.csv

	# “NR” is the number of the record we're operating on; effectively a sequential
	# counter. We don't want to filter on the first row, which is the header.
	NR > 1 && $7 == "false" {
	next;
	}

	# awk patterns can check for regex matches on a field. We'll use that to remove
	# users with old IDs.
	NR > 1 && $2 ~ /^[0-9]{5}$/ {
	next;
	}

	# No pattern means “just do this action for each record”.
	{
	# To remove the 6th column (“admin”), we'll shift all subsequent column
	# values to the left by one.
	for (i = 6; i < NF; i++) {
	$i = $(i + 1);
	}
	# And we'll decrement the number of fields in the record so `print` does
	# the right thing.
	NF--;

	# To make the “url” field relative, we'll use a regex. Regexes are probably
	# the right tool to use in this case. Fortunately, awk has pretty good
	# support built in. However, because the pattern literal requires the use
	# of forward slashes as delimiters, we do have to escape the forward
	# slashes in the pattern.
	sub(/.*:\/\/[^/]+\//, "", $5);
	}

	# We have two ways of splitting the name: we could either do the inverse of
	# removing a column (shift subsequent columns to the right to make room for the
	# new value), or we could do like the sed original did and just replace the
	# separation character with the output field separator. Let's be lame.
	#
	# We'll split this into two patterns: one to replace the header, and one to
	# split the values.
	#
	# Note that we're using the `OFS` variable here, not a semicolon literal: this
	# leaves us the flexibility of changing our output separator later.
	NR == 1 {
	$1 = "first name" OFS "last name";
	}

	NR > 1 {
	sub(/ /, OFS, $1);
	}

	# Print out the record.
	{
	print;
	}