-
-
Save jimhe/5ab94d4404df66680fa7626745363139 to your computer and use it in GitHub Desktop.
Copy MySQL table to big query. If you need to copy all tables, use the loop given at the end.Exit with error code 3 if blob or text columns are found. The csv files are first copied to google cloud before being imported to big query.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
MYSQL_USER=$1 | |
MYSQL_PASSWORD=$2 | |
MYSQL_HOST=$3 | |
TABLE_SCHEMA=$4 | |
TABLE_NAME=$5 | |
gsutil > /dev/null 2>&1 || { | |
echo "gsutil not available! Please install google-cloud-sdk: brew cask install google-cloud-sdk" | |
exit 1 | |
} | |
bq version > /dev/null 2>&1 || { | |
echo "bq not available! Please install google-cloud-sdk: brew cask install google-cloud-sdk" | |
exit 1 | |
} | |
file_prefix="${TABLE_SCHEMA}_${TABLE_NAME}" | |
bucket_name="${TABLE_SCHEMA}_mysql" | |
splitat="4000000000" | |
bulkfiles=200 | |
maxbad=300 | |
mysqlargs="--user=$MYSQL_USER --password=$MYSQL_PASSWORD --host=$MYSQL_HOST" | |
# make sure schema and table names are supplied | |
if [ $# -ne 5 ]; then | |
echo "Usage: mysql_to_bigquery.sh <mysql user> <mysql password> <mysql host> <schema> <table>" | |
exit 1 | |
fi | |
# make sure the table does not has blob or text columns | |
cat > blob_query.txt << heredoc | |
select sum(IF((DATA_TYPE LIKE '%blob%'),1, 0)) from INFORMATION_SCHEMA.columns where TABLE_SCHEMA = '$TABLE_SCHEMA' AND TABLE_NAME = '$TABLE_NAME' | |
heredoc | |
mycount=`mysql $mysqlargs -Bs < blob_query.txt` | |
if [ $mycount -ne 0 ]; then | |
echo "blob or text column found in table $TABLE_NAME" | |
exit 3 | |
fi | |
# create google cloud bucket | |
gsutil mb gs://$bucket_name > /tmp/gsutil_mb 2>&1 | |
if [ $? -ne 0 ]; then | |
grep 'already exists' /tmp/gsutil_mb | |
if [ $? -ne 0 ]; then | |
echo "bucket $bucket_name could not be created in cloud" | |
exit 4 | |
fi | |
fi | |
# create JSON schema from mysql $mysqlargs table structure | |
cat > json_query.txt << heredoc | |
select CONCAT('{"name": "', COLUMN_NAME, '","type":"', IF(DATA_TYPE like "%int%", "INTEGER",IF(DATA_TYPE = "decimal","FLOAT","STRING")) , '"},') as json from information_schema.columns where TABLE_SCHEMA = '$TABLE_SCHEMA' AND TABLE_NAME = '$TABLE_NAME'; | |
heredoc | |
echo '[' > $TABLE_NAME.json | |
mysql $mysqlargs -Bs < json_query.txt | sed '$s/,$//' >> $TABLE_NAME.json | |
mysql $mysqlargs $TABLE_SCHEMA -Bse"show create table $TABLE_NAME\G" > $TABLE_NAME.sql | |
echo ']' >> $TABLE_NAME.json | |
# copy json and create table data to cloud | |
gsutil cp $TABLE_NAME.json gs://$bucket_name/ | |
# dump data | |
mysql $mysqlargs $TABLE_SCHEMA -Bse"select * from $TABLE_NAME" > $TABLE_NAME.txt1 | |
tr -d "\r" < $TABLE_NAME.txt1 > $TABLE_NAME.txt | |
sed -i "s/$/\t$TABLE_SCHEMA/" $TABLE_NAME.txt | |
sed -i 's/(Ctrl-v)(Ctrl-m)//g' $TABLE_NAME.txt | |
# split files with prefix | |
split -b $splitat $TABLE_NAME.txt $file_prefix | |
# loop and upload files to google cloud | |
for file in `ls ${file_prefix}*` | |
do | |
# big query does not seem to like double quotes and NULL | |
sed -i -e 's/\"//g' $file | |
sed -i -e 's/NULL//g' $file | |
gzip $file | |
# copy to google cloud | |
gsutil cp $file.gz gs://$bucket_name/ | |
if [ $? -ne 0 ];then | |
echo "$file could not be copied to cloud" | |
exit 3 | |
fi | |
rm -f $file.gz | |
done | |
# import data to big query | |
for mylist in `gsutil ls gs://$bucket_name/${file_prefix}*.gz | xargs -n$bulkfiles | tr ' ', ','` | |
do | |
echo $mylist | |
bq mk $bucket_name | |
bq load --nosync -F '\t' --job_id="$file" --max_bad_record=$maxbad $bucket_name.$TABLE_NAME $mylist $TABLE_NAME.json | |
if [ $? -ne 0 ]; then | |
echo "bq load failed for $file" | |
exit 2 | |
fi | |
done | |
rm -f $TABLE_NAME.json $TABLE_NAME.sql $TABLE_NAME.txt | |
exit | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment