Created
December 24, 2017 00:14
-
-
Save dotmanila/5ceb005fe2fdd6bcedce8ee7d10015c1 to your computer and use it in GitHub Desktop.
Clickhouse incremental refresh script using changelog data from https://gist.github.com/dotmanila/53a2c0b9b7a3be4f9e99b94aa203c66d
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
_echo() { | |
echo "$(date +%Y-%m-%d_%H_%M_%S) incr-refresh $1" | |
} | |
TBL='hits' | |
# MySQL clickhouse_changelog command | |
MYCL="mysql -h source_db_host mydb -BNe" | |
# MySQL source table command | |
MYDB="mysql -h source_db_host mydb -BNe" | |
weeks=$($MYCL "SELECT created_at FROM clickhouse_changelog ORDER BY created_at ASC") | |
cweek=$($MYCL "SELECT DATE_ADD(CURDATE(), INTERVAL - WEEKDAY(CURDATE()) DAY)") | |
xcode=0 | |
_echo "Starting changelog processing for mydb.hits" | |
_echo "Current week is: $cweek" | |
if [ -z "$weeks" ]; then | |
_echo "Weeks is empty, nothing to do" | |
else | |
for week in $weeks; do | |
_echo "Processing week: $week" | |
sql=$(cat <<EOF | |
SELECT | |
id, DATE_FORMAT(created_at, "%Y-%m-%d"), | |
type, user_id, location_id, UNIX_TIMESTAMP(created_at) | |
FROM $TBL | |
WHERE created_at BETWEEN '$week 00:00:00' | |
AND DATE_ADD('$week 23:59:59', INTERVAL 6 DAY) | |
EOF | |
) | |
$MYDB "$sql" > $week.txt | |
$MYCL"DELETE FROM clickhouse_changelog WHERE AND created_at = '$week'" | |
clickhouse-client -d mydb --query="ALTER TABLE $TBL DROP PARTITION '$week'" | |
cat $week.txt | clickhouse-client -d mydb --query="INSERT INTO $TBL FORMAT TabSeparated" | |
rm -f $week.txt | |
if [ "x$cweek" == "x$week" ]; then | |
xcode=1 | |
fi | |
done | |
fi | |
_echo "Changelog import for $TBL complete" | |
if [ "x$xcode" == "x1" ]; then | |
exit 0 | |
fi | |
minid=$(clickhouse-client -d mydb -q "SELECT MAX(id) FROM $TBL") | |
_echo "Inserting new records for $TBL > id: $minid" | |
$MYDB "SELECT $COLS FROM $TBL WHERE id > $minid" > hits.txt | |
ROWS=$(wc -l hits.txt|awk '{print $1}') | |
if [ "x$ROWS" != "x0" ]; then | |
cat hits.txt | clickhouse-client -d mydb --query="INSERT INTO $TBL FORMAT TabSeparated" | |
rm -f hits.txt | |
else | |
_echo "No new rows found" | |
fi | |
_echo "Incremental import for $TBL complete" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment