jobel-code/gist_aggregate_multiple_tsv_files_into_one.bash

## gist_aggregate_multiple_tsv_files_into_one.bash
# TO AGGREGATE RUN # THIS WILL ADD THE HEADERS ON FOR EACH FILE. USE ONLY FOR DEBUG
# cat *.tsv > aggregated_files_with_headers.csv

# TO FINAL AGGREGATION RUN IN CONSOLE
# SOURCE: https://unix.stackexchange.com/questions/60577/concatenate-multiple-files-with-same-header
# The first line of the awk script matches the first line of a file (FNR==1)
# except if it's also the first line across all files (NR==1).
# When these conditions are met, the expression while (/^<header>/) getline; is executed,
# which causes awk to keep reading another line (skipping the current one) as long as
# the current one matches the regexp ^<header>.
# The second line of the awk script prints everything except for the lines that were previously skipped.

## NOTE THE EXTENSION IS .aqs is a tab-separated-values as aqs, you can choose your own extension

awk 'FNR==1 && NR!=1{next;}{print}' *.tsv >> aggregated_file.aqs
	# TO AGGREGATE RUN # THIS WILL ADD THE HEADERS ON FOR EACH FILE. USE ONLY FOR DEBUG
	# cat *.tsv > aggregated_files_with_headers.csv

	# TO FINAL AGGREGATION RUN IN CONSOLE
	# SOURCE: https://unix.stackexchange.com/questions/60577/concatenate-multiple-files-with-same-header
	# The first line of the awk script matches the first line of a file (FNR==1)
	# except if it's also the first line across all files (NR==1).
	# When these conditions are met, the expression while (/^<header>/) getline; is executed,
	# which causes awk to keep reading another line (skipping the current one) as long as
	# the current one matches the regexp ^<header>.
	# The second line of the awk script prints everything except for the lines that were previously skipped.

	## NOTE THE EXTENSION IS .aqs is a tab-separated-values as aqs, you can choose your own extension

	awk 'FNR==1 && NR!=1{next;}{print}' *.tsv >> aggregated_file.aqs