# Serial method uses 40-50% all available CPU prior to `sort` step. Assuming linear scaling, best we could achieve is halving the time.
# Grand Assertion: this pipeline actually gives correct answer! This is a very complex way to calculate this, SQL would be so much easier...

# cut -d ' ' -f 2,3,5                                                       - Take fields 2, 3, and 5 (store, timestamp, transaction)
# tr -d '[A-Za-z\"/\- ]'					            - Strip out all the characters and spaces, to just leave the store number, timestamp, and commas to represent the number of items
# awk '{print (substr($1,1,5)"-"substr($1,6,6)), length(substr($1,14))+1}'  - Split the string at the store, yearmo boundary, then count number of commas + 1 (since 3 commas = 4 items) 
# awk '{a[$1]+=$2;}END{for(i in a)print i" "a[i];}'	                    - Sum by store-yearmo combo
# sort									    - Sort such that the store number is together, then the month

time cut -d ' ' -f 2,3,5 transactions.csv | tr -d '[A-Za-z\"/\- ]' | awk '{print (substr($1,1,5)"-"substr($1,6,6)), length(substr($1,14))+1}' | awk '{a[$1]+=$2;}END{for(i in a)print i" "a[i];}' | sort

real	14m5.657s

# Parallelize the substring awk step
# Actually lowers processor utilization!

awksub2 () { awk '{print (substr($1,1,5)"-"substr($1,6,6)), length(substr($1,14))+1}';}
export -f awksub2
time cut -d ' ' -f 2,3,5 transactions.csv | tr -d '[A-Za-z\"/\- ]' | parallel --pipe -m  awksub2 | awk '{a[$1]+=$2;}END{for(i in a)print i" "a[i];}' | sort

real	19m27.407s (worse!)

# Move parallel to aggregation step

awksub3 () { awk '{a[$1]+=$2;}END{for(i in a)print i" "a[i];}';}
export -f awksub3
time cut -d ' ' -f 2,3,5 transactions.csv | tr -d '[A-Za-z\"/\- ]' | awk '{print (substr($1,1,5)"-"substr($1,6,6)), length(substr($1,14))+1}' | parallel --pipe awksub3  | awksub3 | sort

real	19m24.851s (Same as other parallel run)