Skip to content

Instantly share code, notes, and snippets.

@randerzander
Last active August 29, 2015 14:19
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save randerzander/0d2537b1970ba94648bb to your computer and use it in GitHub Desktop.
Save randerzander/0d2537b1970ba94648bb to your computer and use it in GitHub Desktop.
csv2ddl
set -eu
FILE=$1
HEADER_LINE_NUM=$2
DELIM=$3
TABLE_NAME=$4
LOCATION=$5
# This script assumes columns are all strings. Edit the DDL file after running the script and change column types at will.
HEADERS=`sed "${HEADER_LINE_NUM}q;d" $FILE | tr -d '\b\r'`
IFS=$DELIM read -a array <<< "$HEADERS"
LAST_COL=${array[$[${#array[@]}-1]]}
echo 'drop table if exists '$TABLE_NAME'_raw;' > $TABLE_NAME.ddl
echo 'create external table '$TABLE_NAME'_raw (' >> $TABLE_NAME.ddl
for element in "${array[@]}"
do
if [ $element != $LAST_COL ]; then
echo " $element string," >> $TABLE_NAME.ddl
else
echo " $element string" >> $TABLE_NAME.ddl
fi
done
echo ")" >> $TABLE_NAME.ddl
echo 'ROW FORMAT SERDE "org.apache.hadoop.hive.serde2.OpenCSVSerde"' >> $TABLE_NAME.ddl
echo 'location "'$LOCATION'"' >> $TABLE_NAME.ddl
echo 'tblproperties ("skip.header.line.count"="'$HEADER_LINE_NUM'")' >> $TABLE_NAME.ddl
echo ';' >> $TABLE_NAME.ddl
echo 'drop table if exists '$TABLE_NAME'_orc;' >> $TABLE_NAME.ddl
echo 'create table '$TABLE_NAME'_orc stored as orc as' >> $TABLE_NAME.ddl
echo 'select' >> $TABLE_NAME.ddl
for element in "${array[@]}"
do
if [ $element != $LAST_COL ]; then
echo " trim($element) as $element," >> $TABLE_NAME.ddl
else
echo " trim($element) as $element" >> $TABLE_NAME.ddl
fi
done
echo 'from '$TABLE_NAME'_raw;' >> $TABLE_NAME.ddl
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment