Skip to content

Instantly share code, notes, and snippets.

@nihilismus
Last active August 29, 2015 14:26
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save nihilismus/48b120014982bebf784f to your computer and use it in GitHub Desktop.
Save nihilismus/48b120014982bebf784f to your computer and use it in GitHub Desktop.
#!/bin/sh
# Fixes CSV files from DENUE (INEGI), which have some tuples/registers
# splitted in two lines.
#
# Copyright © 2015 Antonio Hernández Blas <hba.nihilismus@gmail.com>
# This program is free software. It comes without any warranty, to
# the extent permitted by applicable law. You can redistribute it
# and/or modify it under the terms of the Do What The Fuck You Want
# To Public License, Version 2, as published by Sam Hocevar. See
# http://www.wtfpl.net/ for more details.
me=$(basename $0)
input_file=$1
output_file=FIXED_${input_file}
tmp_file=TMP_${input_file}
if [ ! -f "${input_file}" ]; then
echo
echo "Error, you must indicate which CSV file to proccess"
echo "Example:"
echo " [$(whoami)@$(hostname) $(basename $(pwd))]$ sh ${me} DENUE_INEGI_x.csv"
echo
exit 1
fi
cat /dev/null > ${output_file}
echo
echo "> Input file: ${input_file}"
fromdos < ${input_file} > ${tmp_file}
total_lines=$(wc -l ${tmp_file} | cut -d ' ' -f 1)
current_line=1
current_error=1
echo "> Number of lines to process: ${total_lines} ..."
echo
cat ${tmp_file} | while read line; do
if $(echo $line | grep -qE '"$'); then
echo -en "Current line: ${current_line}\r\
\rCurrent line: ${current_line}"
echo $line >> ${output_file}
else
echo
echo " Error #${current_error} in line: ${current_line}"
echo -n $line >> ${output_file}
current_error=$(expr ${current_error} + 1)
fi
current_line=$(expr ${current_line} + 1)
done
echo
echo
echo "> Done"
echo "> Output file: ${output_file}"
echo
rm -f ${tmp_file}
#EOF
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment