Skip to content

Instantly share code, notes, and snippets.

@fabiolimace
Last active July 19, 2024 12:59
Show Gist options
  • Save fabiolimace/7bfb971bae404b170e279019a2316f6b to your computer and use it in GitHub Desktop.
Save fabiolimace/7bfb971bae404b170e279019a2316f6b to your computer and use it in GitHub Desktop.
Convert a fixed-width file to a delimiter-separated file such as CSV and TSV.
#!/bin/bash
#
# Convert a fixed-width file to a delimiter-separated file such as CSV and TSV.
#
# Positional parameters:
#
# FIELD_WIDTHS: a space-separated list the numbers of characters of each field in order of occurence.
# SEPARATOR: the character used to separate the fields in the output. The default separator is a tabulation.
# INPUT_FILE: the optional input file or `/dev/stdin` if no input file is provided.
# OUTPUT_FILE: the optional output file or `/dev/stdout` if no output file is provided.
#
# Usage:
#
# cat <<EOF | ./fixed-width-format-cut-paste.sh "5 5 5 5 5" "," "/dev/stdin" "/dev/output"
# 1111122222333334444455555
# 1234512345123451234512345
# EOF
# 11111,22222,33333,44444,55555
# 12345,12345,12345,12345,12345
#
# Read: https://en.wikipedia.org/wiki/Flat-file_database#Fixed-width_formats
#
field_widths="${1}";
separator=`printf "${2-\t}"`;
input_file="${3:-/dev/stdin}";
output_file="${4:-/dev/stdout}";
tempdir=`mktemp --directory`;
field=1;
position=1;
cat /dev/stdin > "${tempdir}/input-file";
for i in $field_widths; do
cut -c ${position}-$(( $position + $i - 1 )) "${tempdir}/input-file" > "${tempdir}/field-${field}";
position=$(( $position + $i ));
field=$(( field + 1 ));
done;
paste -d "$separator" "${tempdir}/field-"* > "${output_file}";
rm -rf "${tempdir}";
#!/usr/bin/awk -f
#
# Convert a fixed-width file to a delimiter-separated file such as CSV and TSV.
#
# Parameters:
#
# FIELD_WIDTHS: a space-separated list the numbers of characters of each field in order of occurence.
# SEPARATOR: the character used to separate the fields in the output. The default separator is a tabulation.
#
# Usage:
#
# cat <<EOF | awk -v FIELD_WIDTHS="5 5 5 5 5" -v SEPARATOR="," -f fixed-width-format.awk
# 1111122222333334444455555
# 1234512345123451234512345
# EOF
# 11111,22222,33333,44444,55555
# 12345,12345,12345,12345,12345
#
# It may be useful for implementations of AWK other than GNU's AWK, which supports splitting fixed-width files into fields.
#
# Read: https://en.wikipedia.org/wiki/Flat-file_database#Fixed-width_formats
#
BEGIN {
SEP = SEPARATOR ? SEPARATOR : "\t";
split(FIELD_WIDTHS, FIELD_WIDTHS_ARRAY);
NUMBER_OF_FIELDS = length(FIELD_WIDTHS_ARRAY);
}
function split_into_fields() {
position = 0;
for (i = 1; i <= NUMBER_OF_FIELDS; i++) {
position = position + FIELD_WIDTHS_ARRAY[i] + 1;
if (position <= length($0)) {
$0 = substr($0, 1, position - 1) SEP substr($0, position);
}
}
}
{
split_into_fields();
print $0;
}
#!/bin/bash
#
# Convert a fixed-width file to a delimiter-separated file such as CSV and TSV.
#
# Positional parameters:
#
# FIELD_WIDTHS: a space-separated list the numbers of characters of each field in order of occurence.
# SEPARATOR: the character used to separate the fields in the output. The default separator is a tabulation.
# INPUT_FILE: the optional file to be converted or `/dev/stdin` if no input file is provided.
#
# Usage:
#
# cat <<EOF | ./fixed-width-format.sh "5 5 5 5 5" "," "/dev/stdin"
# 1111122222333334444455555
# 1234512345123451234512345
# EOF
# 11111,22222,33333,44444,55555
# 12345,12345,12345,12345,12345
#
# Read: https://en.wikipedia.org/wiki/Flat-file_database#Fixed-width_formats
#
field_widths="${1}";
separator=`printf "${2-\t}"`;
input_file="${3:-/dev/stdin}";
split_into_fields() {
local line="${1}"
local position=0;
for i in $field_widths; do
position=$(( $position + $i + 1 ));
if [ $position -le "${#line}" ]; then
line="${line:0:$position-1}${separator}${line:$position-1}"
fi;
done;
echo "${line}";
}
while read -r line; do
split_into_fields "$line"
done < "${input_file}";
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment