Last active
November 7, 2018 15:30
-
-
Save curtmack/45c612f6c310384b350bc201becb5ba7 to your computer and use it in GitHub Desktop.
Parse CSV file with awk. Uses the Text::CSV CPAN module.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
# C0 Unit Separator control code. | |
# Vanishingly unlikely to be used in data, so it will work for our purposes. | |
# | |
# NOTE: If your /bin/sh built-in echo does not support escape sequences, | |
# try the following instead, assuming you have GNU coreutils echo | |
# installed at /bin/echo: | |
# USEP=$(/bin/echo -ne "\x1f") | |
# | |
USEP=$(echo -n "\037") | |
DIR=$(dirname $0) | |
perl $DIR/csvusep.pl | awk "-F$USEP" "$@" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl | |
use Text::CSV; | |
use utf8; | |
binmode(STDOUT, ":utf8"); | |
my $csv = Text::CSV->new({binary => 1}); | |
while(my $row = $csv->getline(STDIN)) { | |
my @fixrow; | |
foreach my $col (@{$row}) { | |
$col =~ s/^\s+//; | |
$col =~ s/\s+$//; | |
push @fixrow, $col; | |
} | |
# C0 Unit Separator control code. | |
# Vanishingly unlikely to be used in data, so it will work for our purposes. | |
print(join("\x1f", @fixrow) . "\n"); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment