Skip to content

Instantly share code, notes, and snippets.

@lxneng
Forked from shantanuo/schema_convert.sh
Created April 3, 2018 02:56
Show Gist options
  • Save lxneng/914585bbc7704d39b4d13c8b58c93387 to your computer and use it in GitHub Desktop.
Save lxneng/914585bbc7704d39b4d13c8b58c93387 to your computer and use it in GitHub Desktop.
script to convert mysql schema to be compatible with data warehouse software
#!/bin/sh
# script to convert mysql schema to be compatible with data warehouse software
# make sure that s3cmd and maatkit utility is installed
db_name=${1:-'test'}
> /root/$db_name.txt
temppath='/mnt/data/pdump1'
host='localhost'
user='maatkit'
password='maatkit123'
bucket=`date '+%b%d'`
s3path="s3://$bucket$db_name"
access='ABC'
secret='PQR+XYZ'
options="emptyasnull blanksasnull maxerror 5000 ignoreblanklines escape delimiter '\t' gzip "
adminmail='you.name@gmail.com'
# reset dump path
rm -rf $temppath
mkdir $temppath
#mkdir $temppath/$db_name
chmod 777 $temppath
# use maatkit to dump tab separated data
time mk-parallel-dump -h"$host" -u"$user" -p"$password" --base-dir "$temppath" --databases $db_name --tab
# if maatkit is not installed, use built-in tab parameter
#extra="--tab=$temppath/$db_name -f --no-create-info"
#time mysqldump $db_name $extra
# generate drop table statements
for tbl_name in ` mysql $db_name -Bse"show tables" `
do
echo "drop table $tbl_name ;" >> /root/$db_name.txt
done
# create table statements modified using sed
mysqldump $db_name --skip-triggers --no-data --compact --compatible=ansi,no_table_options,no_key_options,no_field_options --force |
grep -v ' KEY "' |
grep -v ' UNIQUE KEY "' |
grep -v ' PRIMARY KEY ' |
grep -v ' CONSTRAINT "' |
sed '/^SET/d' |
sed 's/ unsigned / /g' |
sed 's/ zerofill / /g' |
sed 's/ auto_increment/ primary key autoincrement/g' |
sed 's/ tinyint([0-9]*) / smallint /g' |
sed 's/ smallint([0-9]*) / smallint /g' |
sed 's/ mediumint([0-9]*) / integer /g' |
sed 's/ int([0-9]*) / integer /g' |
sed 's/ bigint([0-9]*) / bigint /g' |
sed 's/ double / float /g' |
sed 's/ double(.*) / float /g' |
sed 's/ double, / float, /g' |
sed 's/ double(.*), / float, /g' |
sed 's/ float(.*)/ float /g' |
sed 's/ decimal(.*)/ decimal(29,2) /g' |
sed 's/ time / varchar(255) /g' |
sed 's/ time,/ varchar(255), /g' |
sed 's/ tinytext/ varchar(255) /g' |
sed 's/ text / varchar(max) /g' |
sed 's/ text,/ varchar(max), /g' |
sed 's/ mediumtext/ varchar(max) /g' |
sed 's/ longtext/ varchar(max) /g' |
sed 's/ tinyblob/ varchar(max) /g' |
sed 's/ blob/ varchar(max) /g' |
sed 's/ mediumblob/ varchar(max) /g' |
sed 's/ longblob/ varchar(max) /g' |
sed 's/ set(.*)/ varchar(max) /g' |
sed 's/ CHARACTER SET \w*/ /g' |
sed 's/ enum([^)]*) / varchar(255) /g' |
sed "s/ COMMENT.*'[^']*'/ /" |
sed "s/date DEFAULT '0000-00-00'/ date /g" |
sed "s/date NOT NULL DEFAULT '0000-00-00'/ date /g" |
sed "s/timestamp NOT NULL DEFAULT '0000-00-00 00:00:00'/ timestamp /g" |
sed "s/timestamp DEFAULT '0000-00-00 00:00:00'/ timestamp /g" |
sed "s/datetime NOT NULL DEFAULT '0000-00-00 00:00:00'/ datetime /g" |
sed "s/datetime DEFAULT '0000-00-00 00:00:00'/ datetime /g" |
sed 's/ on update [^,]*//g' |
sed 's/a/aA/g;s/__/aB/g;s/#/aC/g' | sed 's/aC/#/g;s/aB/__/g;s/aA/a/g' |
sed 's/ DEFAULT .*,/,/' |
sed 's/ DEFAULT .*//' |
sed 's/NOT NULL AUTO_INCREMENT,/,/' |
sed 's/NOT NULL//' |
sed 's/ char(.*)/ varchar(max)/' |
sed '/\/\*/d' |
sed -r ':a; s%(.*)/\*.*\*/%\1%; ta; /\/\*/ !b; N; ba' |
perl -e 'local $/;$_=<>;s/,\n\)/\n\)/gs;print "\n";print;print "\n"' |
perl -pe '
if (/^(INSERT.+?)\(/) {
$a=$1;
s/\\'\''/'\'\''/g;
s/\\n/\n/g;
s/\),\(/\);\n$a\(/g;
}
' >> /root/$db_name.txt 2> /root/$db_name.err
# generate copy statements
for tbl_name in ` mysql $db_name -Bse"show tables" `
do
echo "copy $tbl_name from '$s3path/$tbl_name.' credentials 'aws_access_key_id=$access;aws_secret_access_key=$secret' $options;" >> /root/$db_name.txt
done
# compress files
time gzip $temppath/$db_name/*.txt
# copy to amazon s3
s3cmd mb $s3path
s3cmd sync /$temppath/$db_name/ $s3path
# send the create table statements file by email
echo "create table statements compatible with redshift for db $db_name attached. " | mutt -s "redshift commands for $db_name" -a /root/$db_name.txt -- $adminmail
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment