Last active
August 29, 2015 14:19
-
-
Save randerzander/0d2537b1970ba94648bb to your computer and use it in GitHub Desktop.
csv2ddl
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
set -eu | |
FILE=$1 | |
HEADER_LINE_NUM=$2 | |
DELIM=$3 | |
TABLE_NAME=$4 | |
LOCATION=$5 | |
# This script assumes columns are all strings. Edit the DDL file after running the script and change column types at will. | |
HEADERS=`sed "${HEADER_LINE_NUM}q;d" $FILE | tr -d '\b\r'` | |
IFS=$DELIM read -a array <<< "$HEADERS" | |
LAST_COL=${array[$[${#array[@]}-1]]} | |
echo 'drop table if exists '$TABLE_NAME'_raw;' > $TABLE_NAME.ddl | |
echo 'create external table '$TABLE_NAME'_raw (' >> $TABLE_NAME.ddl | |
for element in "${array[@]}" | |
do | |
if [ $element != $LAST_COL ]; then | |
echo " $element string," >> $TABLE_NAME.ddl | |
else | |
echo " $element string" >> $TABLE_NAME.ddl | |
fi | |
done | |
echo ")" >> $TABLE_NAME.ddl | |
echo 'ROW FORMAT SERDE "org.apache.hadoop.hive.serde2.OpenCSVSerde"' >> $TABLE_NAME.ddl | |
echo 'location "'$LOCATION'"' >> $TABLE_NAME.ddl | |
echo 'tblproperties ("skip.header.line.count"="'$HEADER_LINE_NUM'")' >> $TABLE_NAME.ddl | |
echo ';' >> $TABLE_NAME.ddl | |
echo 'drop table if exists '$TABLE_NAME'_orc;' >> $TABLE_NAME.ddl | |
echo 'create table '$TABLE_NAME'_orc stored as orc as' >> $TABLE_NAME.ddl | |
echo 'select' >> $TABLE_NAME.ddl | |
for element in "${array[@]}" | |
do | |
if [ $element != $LAST_COL ]; then | |
echo " trim($element) as $element," >> $TABLE_NAME.ddl | |
else | |
echo " trim($element) as $element" >> $TABLE_NAME.ddl | |
fi | |
done | |
echo 'from '$TABLE_NAME'_raw;' >> $TABLE_NAME.ddl |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment