dolphinscheduler可以通过shell节点校验数据是否符合要求
PT_DATE=${system.biz.date}
PT_PATH=/user/hive/warehouse/default.db/test/pt_d=${PT_DATE}
set -e
kinit -kt /etc/krb5/geosmart.keytab geosmart
hdfs dfs -test -e ${PT_PATH}
if [ $? -eq 0 ] ;then
echo 'partition ${PT_PATH} exist'
exit 0
fi
echo 'partition ${PT_PATH} not exist'
exit 1
set -e
TABLE='default.test'
MIN_PT_COUNT=5000
pt_count=`spark-sql \
--keytab /etc/krb5/geosamrt.keytab \
--principal geosamrt -S \
-e "select count(1) from $TABLE where pt_d='${PT_DATE}'"
`
pt_count=`echo "$pt_count" | tail -1`
if [ "$pt_count" -gt "$MIN_PT_COUNT" ] ;then
echo "$TABLE of partition ${PT_DATE},count=[$pt_count],greater than min[$MIN_PT_COUNT]"
exit 0
fi
echo "$TABLE of partition ${PT_DATE},count=[$pt_count],lower than min[$MIN_PT_COUNT]"
exit 1