Skip to content

Instantly share code, notes, and snippets.

@brunerd
Last active December 18, 2020 21:53
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save brunerd/e2a129bbdf31a2ef44aef3e7d101a5e4 to your computer and use it in GitHub Desktop.
Save brunerd/e2a129bbdf31a2ef44aef3e7d101a5e4 to your computer and use it in GitHub Desktop.
Minified, functionalized version of Lorance Stinson's csv2csv shell utility
#!/bin/sh
# csv2csv.function.min
# csv2csv by Lorance Stinson, 2007
# This file is in the public domain.
# For more information email LoranceStinson+csv@gmail.com.
# Or see http://lorance.freeshell.org/csvutils/
# Additions by Joel Bruner, 2020 (https://github.com/brunerd)
#csv2csv function minified
function csv2csv { IFS=$' \t\n'; local NL=$'\n'; local TAB=$'\t';local CSV_ESCAPE=${CSV_ESCAPE:='"'}; local CSV_SEPARATOR=${CSV_SEPARATOR:=","}; local CSV_QUOTE=${CSV_QUOTE:='"'}; local OCSV_ESCAPE=${CSV_ESCAPE:='"'}; local OCSV_SEPARATOR=${CSV_SEPARATOR:=","}; local OCSV_QUOTE=${CSV_QUOTE:='"'}; local FILE="-"; local NL_TEXT=""; local NUM_COLS=""; local OUTPUT=""; local QUOTE_LEVEL="0"; local SKIP_ROWS="0"; local TAB_TEXT=""; while [ "$#" -gt 0 ] ; do case "$1" in -c) [ "$2" ] && local COL_LIST="$2" || usage="$1 requires an argument."; shift 2;; -e) [ "$2" ] && CSV_ESCAPE="$2" || usage="$1 requires an argument."; shift 2;; -E) [ "$2" ] && OCSV_ESCAPE="$2" || usage="$1 requires an argument."; shift 2;; -h) usage=" "; shift 1;; -l) [ "$2" ] && QUOTE_LEVEL="$2" || usage="$1 requires an argument."; shift 2;; -N) [ "$2" ] && NL_TEXT="$2" || usage="$1 requires an argument."; shift 2;; -n) [ "$2" ] && NUM_COLS="$2" || usage="$1 requires an argument."; shift 2;; -o) [ "$2" ] && OUTPUT="$2" || usage="$1 requires an argument."; shift 2;; -q) [ "$2" ] && CSV_QUOTE="$2" || usage="$1 requires an argument."; shift 2;; -Q) [ "$2" ] && OCSV_QUOTE="$2" || usage="$1 requires an argument."; shift 2;; -r) [ "$2" ] && SKIP_ROWS="$2" || usage="$1 requires an argument."; shift 2;; -s) [ "$2" ] && CSV_SEPARATOR="$2" || usage="$1 requires an argument."; shift 2;; -S) [ "$2" ] && OCSV_SEPARATOR="$2" || usage="$1 requires an argument."; shift 2;; -T) [ "$2" ] && TAB_TEXT="$2" || usage="$1 requires an argument."; shift 2;; -z) local CSV_MAWK_BUG="Y"; shift;; --) [ "$2" ] && FILE="$2"; break;; -) FILE="-"; shift;; -*) usage="Invalid option $1";; *) FILE="$1"; shift;; esac; done; [ "X$FILE" != "X-" -a ! -f "$FILE" -a ! -r "$FILE" ] && usage="Unable to read the file ($FILE)."; if [ -n "${usage}" ] ; then cat<<EOHELP
${usage}${NL}Usage: `basename $0` [OPTIONS] [INPUT]${NL}Converts a CSV file to a CSV file. This is useful for changing the CSV${NL}format or replacing newlines.${NL}Options:${NL}${TAB}-c <LIST> Output the columns specified by LIST. The list should${NL}${TAB}${TAB}${TAB}${TAB}contain the column numbers to output, starting from 1,${NL}${TAB}${TAB}${TAB}${TAB}separated by commas. Ranges are not allowed. Overrides -n.${NL}${TAB}-e <ESCAPE> The input CSV escape character. Defaults to $CSV_ESCAPE.${NL}${TAB}-E <ESCAPE> The output CSV escape character. Defaults to $OCSV_ESCAPE.${NL}${TAB}-h This text.${NL}${TAB}-l <LEVEL> Quote level for output. Defaults to 0.${NL}${TAB}${TAB}${TAB} -1 = Do not quote any fields. Use with caution.${NL}${TAB}${TAB}${TAB}${TAB}0 = Only strings that require it are quoted.${NL}${TAB}${TAB}${TAB}${TAB}1 = All non-number fields are quoted. -?[0-9.]${NL}${TAB}${TAB}${TAB}${TAB}2 = All fields are quoted except empty ones.${NL}${TAB}${TAB}${TAB}${TAB}3 = All fields are quoted including empty ones.${NL}${TAB}-N <TEXT> Replace embedded newlines with TEXT. Defaults to a newline.${NL}${TAB}-n <NUMBER> Output exactly NUMBER of columns. If a row has more columns${NL}${TAB}${TAB}${TAB}${TAB}the extra will be removed and if it has fewer extra blank${NL}${TAB}${TAB}${TAB}${TAB}columns will be added. Ignored when -c is used.${NL}${TAB}-o <FILE> Write the results to FILE.${NL}${TAB}-q <QUOTE> The input CSV quote character. Defaults to $CSV_ESCAPE.${NL}${TAB}-Q <QUOTE> The output CSV quote character. Defaults to $OCSV_ESCAPE.${NL}${TAB}-r <NUM> Skip NUM rows of the CSV file or input.${NL}${TAB}-s <SEP> The input CSV field separator. Defaults to $CSV_SEPARATOR.${NL}${TAB}-S <SEP> The output CSV field separator. Defaults to $OCSV_SEPARATOR.${NL}${TAB}-T <TEXT> Replace embedded tabs with TEXT. Defaults to a tab.${NL}${NL}Replacement for -N and -T are not checked for the output escape character.${NL}
EOHELP
exit 1; fi; if [ "$OUTPUT" ] ; then exec 1>$OUTPUT; fi; awk -v csv_sep="$CSV_SEPARATOR" -v csv_quote="$CSV_QUOTE" -v csv_escape="$CSV_ESCAPE" -v ocsv_sep="$OCSV_SEPARATOR" -v ocsv_quote="$OCSV_QUOTE" -v ocsv_escape="$OCSV_ESCAPE" -v col_list="$COL_LIST" -v quotelevel="$QUOTE_LEVEL" -v nltext="$NL_TEXT" -v numcols="$NUM_COLS" -v row_attr="$ROW_ATTR" -v skip="$SKIP_ROWS" -v tabtext="$TAB_TEXT" \-v csv_mawk_bug="$CSV_MAWK_BUG" 'function parse_csv(string,csv,sep,quote,escape,newline,fields,instr,pos,strtrim) { if (length(string) == 0) return 0;string = sep string; fields = 0; while (length(string) > 0) { strtrim = 0; if (substr(string,2,1) == quote) {instr = 1; pos = 2; strtrim = 1; do {pos++;if (pos != length(string) && substr(string,pos,1) == escape && (substr(string,pos + 1,1) == quote || substr(string,pos + 1,1) == escape)) { string = substr(string,1,pos - 1) substr(string,pos + 1); } else if (substr(string,pos,1) == quote) { instr = 0;} else if (newline && pos >= length(string)) { if (getline == -1) { csverr = "Unable to read the next line."; return -1; }; string = string newline $0;}} while (pos < length(string) && instr); if (instr) {csverr = "Missing end quote.";return -1;}} else {if (length(string) == 1 || substr(string,2,1) == sep) { csv[fields] = ""; fields++;if (length(string) == 1){ return fields; string = substr(string,2); continue;}}; pos = index(substr(string,2),sep);if (pos == 0) { csv[fields] = substr(string,2); fields++; return fields; }};if ((pos != length(string) && substr(string,pos + 1,1) != sep)) { csverr = "Missing separator.";return -1;}; csv[fields] = substr(string,2 + strtrim,pos - (1 + strtrim * 2)); fields++; string = substr(string,pos + 1); } return fields; }; BEGIN { if (ocsv_escape == "\\") escapere = "\\\\"; else escapere = ocsv_escape;if (col_list) cols_count = split(col_list,cols,/,/)}{ line++; fields = parse_csv($0,csv,csv_sep,csv_quote,csv_escape,"\n");if (fields < 0 ) { print "ERROR: " csverr " at line # "line | "cat 1>&2";next;} if (line <= skip) next; if (numcols){ if (fields < numcols) { for (col = fields; col < numcols; col++) csv[col] = "";}fields = numcols;}if (col_list) { for (col = 0; col < fields; col++) old[col] = csv[col]; for (col = 0; col < cols_count; col++) { field = cols[col + 1] - 1; if (field > fields - 1) old[field] = ""; csv[col] = old[field]}fields = cols_count} for (col = 0; col < fields; col++) { if (length(csv[col]) != 0) { if (((csv[col] ~ ocsv_sep) || (csv[col] ~ escapere) || (csv[col] ~ ocsv_quote) || (csv[col] ~ /\n/) || (csv[col] ~ /\t/) || (quotelevel >= 2) || ((quotelevel == 1) && (csv[col] !~ /^-*[0-9.][0-9.]*$/))) && (quotelevel != -1)) { if (csv_mawk_bug && ocsv_escape == "\\") { gsub (escapere,"\\\\\\",csv[col]) } else {gsub (escapere,ocsv_escape ocsv_escape,csv[col])}; if (ocsv_escape != ocsv_quote) {gsub (ocsv_quote,ocsv_escape ocsv_quote,csv[col]);}; if (nltext) gsub (/\n/,nltext,csv[col]); if (tabtext) gsub (/\t/,tabtext,csv[col]); printf "%s%s%s", ocsv_quote, csv[col], ocsv_quote; } else printf "%s", csv[col]; } else if (quotelevel == 3) printf "%s%s", ocsv_quote, ocsv_quote; if (col < fields - 1) printf "%s", ocsv_sep} printf "\n" }' "$FILE"
}
#for standalone use, invoke the function
IFS=$'\t\n'; csv2csv ${@}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment