Skip to content

Instantly share code, notes, and snippets.

@silviud
Forked from shantanuo/mysql_to_redshift.sh
Created March 12, 2014 13:17
Show Gist options
  • Save silviud/9506749 to your computer and use it in GitHub Desktop.
Save silviud/9506749 to your computer and use it in GitHub Desktop.
#!/bin/sh
# download this script
# make sure s3cmd is installed
# yum install s3cmd
# if yum mentioned above does not work then
# wget http://downloads.sourceforge.net/project/s3tools/s3cmd/1.5.0-alpha1/s3cmd-1.5.0-alpha1.tar.gz
# tar xvf s3cmd-1.5.0-alpha1.tar.gz
# cd s3cmd-1.5.0-alpha1
# python setup.py install
# python 2.6+ is required
# make sure postgresql is supported by PHP
# php -r"phpinfo();" | grep pgsql
mydb=${1:-'test'}
mytable=${2:-'email_credit_summary'}
s3folder=`date '+%b%d'`
mydatadir=`mysqladmin variables | grep datadir | awk '{print $4}'`
access_key='XYZ'
secret_key='ABC'
redshift_host='viva-mar5-deliveryreport.ccf0ocqhkalc.us-east-1.redshift.amazonaws.com'
redshift_port='5439'
redshift_dbname='mydb'
redshift_user='root'
redshift_password='PASSWD'
redshift_param="delimiter '\t' emptyasnull blanksasnull maxerror 5000 ignoreblanklines gzip"
whereclause="1=1 limit 1000"
myoptions="--compact --compatible=mssql,no_key_options,no_table_options,no_field_options --where=\"$whereclause\""
rm -f $mydatadir/$mytable.txt
rm -f $mydatadir/$mytable.txt.gz
echo "time mysqldump $myoptions $mydb $mytable --tab=$mydatadir" > mydump.txt
cat mydump.txt | sh
gzip $mydatadir/$mytable.txt
s3cmd mb s3://$s3folder
s3cmd put $mydatadir/$mytable.txt.gz s3://$s3folder
s3cmd put $mydatadir/$mytable.sql s3://$s3folder
cat /var/lib/mysql/$mytable.sql | sed 's/(11)//' | sed 's/ int/ integer/' > $mytable.redshift.sql
cat $mytable.redshift.sql |
grep -v ' KEY "' |
grep -v ' UNIQUE KEY "' |
grep -v ' PRIMARY KEY ' |
sed '/^SET/d' |
sed 's/ unsigned / /g' |
sed 's/ auto_increment/ primary key autoincrement/g' |
sed 's/ tinyint([0-9]*) / smallint /g' |
sed 's/ smallint([0-9]*) / smallint /g' |
sed 's/ mediumint([0-9]*) / integer /g' |
sed 's/ int([0-9]*) / integer /g' |
sed 's/ integer([0-9]*) / integer /g' |
sed 's/ bigint([0-9]*) / bigint /g' |
sed 's/ double / float /g' |
sed 's/ tinyblob / text /g' |
sed 's/ blob / text /g' |
sed 's/ mediumblob / text /g' |
sed 's/ longblob / text /g' |
sed 's/ tinytext / text /g' |
sed 's/ mediumtext / text /g' |
sed 's/ longtext / text /g' |
sed 's/ tinyblob/ text /g' |
sed 's/ blob/ text /g' |
sed 's/ mediumblob/ text /g' |
sed 's/ longblob/ text /g' |
sed 's/ tinytext/ text /g' |
sed 's/ mediumtext/ text /g' |
sed 's/ longtext/ text /g' |
sed 's/ CHARACTER SET \w*/ /g' |
sed 's/ enum([^)]*) / varchar(255) /g' |
sed "s/ COMMENT.*'[^']*'/ /" |
sed "s/date DEFAULT '0000-00-00'/ date /g" |
sed "s/date NOT NULL DEFAULT '0000-00-00'/ date /g" |
sed "s/timestamp NOT NULL DEFAULT '0000-00-00 00:00:00'/ timestamp /g" |
sed "s/timestamp DEFAULT '0000-00-00 00:00:00'/ timestamp /g" |
sed "s/datetime NOT NULL DEFAULT '0000-00-00 00:00:00'/ datetime /g" |
sed "s/datetime DEFAULT '0000-00-00 00:00:00'/ datetime /g" |
sed 's/ on update [^,]*//g' |
sed 's/a/aA/g;s/__/aB/g;s/#/aC/g' | gcc -P -E $arg - | sed 's/aC/#/g;s/aB/__/g;s/aA/a/g' |
perl -e 'local $/;$_=<>;s/,\n\)/\n\)/gs;print "\n";print;print "\n"' |
perl -pe '
if (/^(INSERT.+?)\(/) {
$a=$1;
s/\\'\''/'\'\''/g;
s/\\n/\n/g;
s/\),\(/\);\n$a\(/g;
}
' > $mytable.redshift.me.sql.txt 2> $mytable.redshift.me.err
echo "copy $mytable from 's3://$s3folder/$mytable.txt' CREDENTIALS 'aws_access_key_id=$access_key;aws_secret_access_key=$secret_key' $redshift_param " >> $mytable.redshift.me.sql.txt
cat $mytable.redshift.me.sql.txt | tr '\n' ' ' > $mytable.redshift.me1.sql.txt
cat > myredshift.php << heredoc
<?php
\$conn=pg_connect("host=$redshift_host port=$redshift_port dbname=$redshift_dbname user=$redshift_user password=$redshift_password");
\$str=file_get_contents("$mytable.redshift.me1.sql.txt");
pg_query(\$conn, \$str);
?>
heredoc
php myredshift.php
exit
1) Download the script
wget --no-check-certificate https://gist.github.com/shantanuo/5011482/raw/fb7fce38c2a4d296c67db715e02167caee8c79bf/mysql_to_redshift.sh
2) change the passwords
sed -i 's_XYZ_accesskey_' mysql_to_redshift.sh
sed -i 's_ABC_secretkey_' mysql_to_redshift.sh
sed -i 's_PASSWD_Root1234_' mysql_to_redshift.sh
// remove the limit of 1000 rows
sed -i 's/limit 1000//' mysql_to_redshift.sh
3) run the script with DB_name TBL_name
time sh -xv mysql_to_redshift.sh test email_lists
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment