Last active
April 1, 2022 15:25
-
-
Save katychuang/cbf95ab8d78d7c154568999b255cbbf4 to your computer and use it in GitHub Desktop.
This example sorts an input file by column using bubble sort to swap lines into the correct ascending order by a specified field.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This program sorts input by a specified column | |
# Author: Katherine Chuang (@katychuang on Github) | |
# example awk -v column=2 -f prog.awk data.csv | |
# | |
# Program Steps: | |
# 1. Read input and store items in an array of array. | |
# 2. Sort by column specified (user input letter c) | |
# 3. Print the sorted version | |
BEGIN { | |
if(!column) { | |
print "-----> no column value defined, setting to default 1" | |
column = 1; | |
} | |
print "_Input File, read in line by line_" | |
} | |
# Reads in every line and store into array of arrays | |
{ | |
for (f = 1; f <= NF; f++){ | |
a[FNR][f] = $f; | |
} | |
show_row(a[FNR], ("Line " FNR )) | |
} | |
END { | |
print "Done reading file, begin sorting" | |
sort_by_col(a,column) | |
printf("\n_Final result, sorted by col %s_\n", col) | |
show_matrix(a,column) | |
} | |
# sort so that the rows are in ascending order by specified column | |
# this is an implementation of bubblesort | |
function sort_by_col(arr, col) { | |
for(idx=1; idx<=length(arr); idx++){ | |
for(left=1; left<=length(arr)-idx; left++){ | |
right = left + 1; | |
if(arr[left][col] > arr[right][col]) | |
swap_rows(arr[left], arr[right]); | |
} | |
} | |
} | |
# swaps two rows, each is an array | |
function swap_rows(A,B){ | |
for(i in A){ | |
temp[i] = A[i] | |
} | |
for(i in B){ | |
A[i] = B[i] | |
B[i] = temp[i] | |
} | |
} | |
# print output | |
function show_matrix(matrix, col){ | |
for(i in matrix){ | |
show_row(matrix[i], "Line " i) | |
} | |
} | |
# show items in an array | |
function show_row(myarray, rowid){ | |
printf("%s: ", rowid) | |
for(e in myarray) | |
printf("%s ",myarray[e]); | |
printf("\n") | |
} | |
# arrays are passed by reference https://www.gnu.org/software/gawk/manual/html_node/Pass-By-Value_002fReference.html |
We can make this file beautiful and searchable if this error is corrected: No commas found in this CSV file in line 0.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
ten 10 | |
nine 9 | |
eight 8 | |
seven 7 | |
six 6 | |
five 5 | |
four 4 | |
three 3 | |
two 2 | |
one 1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Example of running this make target with a user specified value is: make COL=2 | |
COL ?= 1 # sets default value if not provided | |
default: | |
awk -v column=$(COL) -f sorted_records_by_col.awk data.csv |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment