Clickhouse incremental refresh script using changelog data from https://gist.github.com/dotmanila/53a2c0b9b7a3be4f9e99b94aa203c66d
#!/bin/bash | |
_echo() { | |
echo "$(date +%Y-%m-%d_%H_%M_%S) incr-refresh $1" | |
} | |
TBL='hits' | |
# MySQL clickhouse_changelog command | |
MYCL="mysql -h source_db_host mydb -BNe" | |
# MySQL source table command | |
MYDB="mysql -h source_db_host mydb -BNe" | |
weeks=$($MYCL "SELECT created_at FROM clickhouse_changelog ORDER BY created_at ASC") | |
cweek=$($MYCL "SELECT DATE_ADD(CURDATE(), INTERVAL - WEEKDAY(CURDATE()) DAY)") | |
xcode=0 | |
_echo "Starting changelog processing for mydb.hits" | |
_echo "Current week is: $cweek" | |
if [ -z "$weeks" ]; then | |
_echo "Weeks is empty, nothing to do" | |
else | |
for week in $weeks; do | |
_echo "Processing week: $week" | |
sql=$(cat <<EOF | |
SELECT | |
id, DATE_FORMAT(created_at, "%Y-%m-%d"), | |
type, user_id, location_id, UNIX_TIMESTAMP(created_at) | |
FROM $TBL | |
WHERE created_at BETWEEN '$week 00:00:00' | |
AND DATE_ADD('$week 23:59:59', INTERVAL 6 DAY) | |
EOF | |
) | |
$MYDB "$sql" > $week.txt | |
$MYCL"DELETE FROM clickhouse_changelog WHERE AND created_at = '$week'" | |
clickhouse-client -d mydb --query="ALTER TABLE $TBL DROP PARTITION '$week'" | |
cat $week.txt | clickhouse-client -d mydb --query="INSERT INTO $TBL FORMAT TabSeparated" | |
rm -f $week.txt | |
if [ "x$cweek" == "x$week" ]; then | |
xcode=1 | |
fi | |
done | |
fi | |
_echo "Changelog import for $TBL complete" | |
if [ "x$xcode" == "x1" ]; then | |
exit 0 | |
fi | |
minid=$(clickhouse-client -d mydb -q "SELECT MAX(id) FROM $TBL") | |
_echo "Inserting new records for $TBL > id: $minid" | |
$MYDB "SELECT $COLS FROM $TBL WHERE id > $minid" > hits.txt | |
ROWS=$(wc -l hits.txt|awk '{print $1}') | |
if [ "x$ROWS" != "x0" ]; then | |
cat hits.txt | clickhouse-client -d mydb --query="INSERT INTO $TBL FORMAT TabSeparated" | |
rm -f hits.txt | |
else | |
_echo "No new rows found" | |
fi | |
_echo "Incremental import for $TBL complete" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment