Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Clickhouse incremental refresh script using changelog data from https://gist.github.com/dotmanila/53a2c0b9b7a3be4f9e99b94aa203c66d
#!/bin/bash
_echo() {
echo "$(date +%Y-%m-%d_%H_%M_%S) incr-refresh $1"
}
TBL='hits'
# MySQL clickhouse_changelog command
MYCL="mysql -h source_db_host mydb -BNe"
# MySQL source table command
MYDB="mysql -h source_db_host mydb -BNe"
weeks=$($MYCL "SELECT created_at FROM clickhouse_changelog ORDER BY created_at ASC")
cweek=$($MYCL "SELECT DATE_ADD(CURDATE(), INTERVAL - WEEKDAY(CURDATE()) DAY)")
xcode=0
_echo "Starting changelog processing for mydb.hits"
_echo "Current week is: $cweek"
if [ -z "$weeks" ]; then
_echo "Weeks is empty, nothing to do"
else
for week in $weeks; do
_echo "Processing week: $week"
sql=$(cat <<EOF
SELECT
id, DATE_FORMAT(created_at, "%Y-%m-%d"),
type, user_id, location_id, UNIX_TIMESTAMP(created_at)
FROM $TBL
WHERE created_at BETWEEN '$week 00:00:00'
AND DATE_ADD('$week 23:59:59', INTERVAL 6 DAY)
EOF
)
$MYDB "$sql" > $week.txt
$MYCL"DELETE FROM clickhouse_changelog WHERE AND created_at = '$week'"
clickhouse-client -d mydb --query="ALTER TABLE $TBL DROP PARTITION '$week'"
cat $week.txt | clickhouse-client -d mydb --query="INSERT INTO $TBL FORMAT TabSeparated"
rm -f $week.txt
if [ "x$cweek" == "x$week" ]; then
xcode=1
fi
done
fi
_echo "Changelog import for $TBL complete"
if [ "x$xcode" == "x1" ]; then
exit 0
fi
minid=$(clickhouse-client -d mydb -q "SELECT MAX(id) FROM $TBL")
_echo "Inserting new records for $TBL > id: $minid"
$MYDB "SELECT $COLS FROM $TBL WHERE id > $minid" > hits.txt
ROWS=$(wc -l hits.txt|awk '{print $1}')
if [ "x$ROWS" != "x0" ]; then
cat hits.txt | clickhouse-client -d mydb --query="INSERT INTO $TBL FORMAT TabSeparated"
rm -f hits.txt
else
_echo "No new rows found"
fi
_echo "Incremental import for $TBL complete"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.