Sivabalan Narayanan nsivabalan

## log event times
22/08/01 23:40:04
22/08/01 23:40:04
22/08/01 23:40:04
22/08/01 23:40:04
22/08/01 23:40:53
22/08/01 23:40:53
22/08/01 23:40:53
22/08/01 23:40:53
22/08/01 23:40:53
22/08/01 23:40:53

## gist:ae3bc624665198e3a39c4f682e1df8ed
22/08/04 18:53:06 INFO HoodieLogFormatWriter: HoodieLogFile{pathStr='hdfs://hdfs-namenodes:8020/tmp/jenki
ns-infra-hudi/hudi/job-run/LongSpark2.4.7HudiTestsManualEKS_Siva/data/2022-08-04/1/MERGE_ON_READdeltastre
amer-long-running-multi-partitions-metadata.yamltest-metadata-aggressive-clean-archival.properties/output
/.hoodie/metadata/.hoodie/archived/.commits_.archive.1_1-0-1', fileLen=0} exists. Appending to existing f
ile
22/08/04 18:53:06 INFO DFSClient: Exception in createBlockOutputStream
java.io.IOException: Got error, status message , ack with firstBadLink as 10.2.6.189:9866
        at org.apache.hadoop.hdfs.protocol.datatransfer.DataTransferProtoUtil.checkBlockOpStatus(DataTransferProtoUtil.java:142)
        at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.createBlockOutputStream(DFSOutputStream.java:1359)
        at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.setupPipelineForAppendOrRecovery(DFSOutputStream.java:1184)

## hive driver ClassNotFound
22/08/04 15:32:26 INFO Utils: Supplied authorities: hiveserver:10000
22/08/04 15:32:26 INFO Utils: Resolved authority: hiveserver:10000
22/08/04 15:32:26 WARN HiveConnection: Failed to connect to hiveserver:10000
22/08/04 15:32:26 INFO DagScheduler: Forcing shutdown of executor service, this might kill running tasks
22/08/04 15:32:26 ERROR HoodieTestSuiteJob: Failed to run Test Suite
java.util.concurrent.ExecutionException: java.lang.NoClassDefFoundError: org/apache/logging/log4j/core/appender/AbstractAppender
	at java.util.concurrent.FutureTask.report(FutureTask.java:122)
	at java.util.concurrent.FutureTask.get(FutureTask.java:206)
	at org.apache.hudi.integ.testsuite.dag.scheduler.DagScheduler.execute(DagScheduler.java:113)
	at org.apache.hudi.integ.testsuite.dag.scheduler.DagScheduler.schedule(DagScheduler.java:68)

## hive query failure
22/08/04 13:52:20 INFO HiveSyncTool: Sync complete for pqm_cow_dhspyam_tacapro_2022_08_04_33_table
22/08/04 13:52:20 INFO metastore: Closed a connection to metastore, current connections: 0
22/08/04 13:52:20 INFO DagScheduler: Finished executing b20fc7ae-e6d9-4b25-90a8-28ba0a54c3f5
22/08/04 13:52:20 WARN DagScheduler: Executing node "first_hive_query" :: {"hive_props":["set hive.input.format = org.apache.hudi.hadoop.HoodieParquetInputFormat"],"name":"25f871d6-4bd3-4c21-85cf-ff5edaf02ca2","hive_queries":[{"select count(*) from testdb.table1":400},{"select count(*) from testdb.table1 group by `_row_key` having count(*) > 1":0}],"config":"first_hive_query"}
22/08/04 13:52:20 INFO DagNode: Executing hive query node 25f871d6-4bd3-4c21-85cf-ff5edaf02ca2
22/08/04 13:52:20 ERROR DagScheduler: Exception executing node
org.apache.hudi.exception.HoodieValidationException: Hive query validation failed due to No suitable driver found for random
	at org.apache.hudi.integ.testsuite.dag.nodes.HiveQueryNode.execute(HiveQueryN

## hudi mor sql update
./bin/spark-sql --packages org.apache.hudi:hudi-spark-bundle_2.11:0.10.0,org.apache.spark:spark-avro_2.11:2.4.4 \
--conf 'spark.serializer=org.apache.spark.serializer.KryoSerializer'   \
--conf 'spark.sql.extensions=org.apache.spark.sql.hudi.HoodieSparkSessionExtension'  \
--conf 'spark.kryoserializer.buffer.max=1024m' --conf spark.rdd.compress=true --driver-memory 6g


create table hudi_mor1 (
  VendorID int,
  tpep_pickup_datetime string,

## inline FS exception
scala> df.write.format("hudi").
     |   options(getQuickstartWriteConfigs).
     |   option(PRECOMBINE_FIELD_OPT_KEY, "ts").
     |   option(RECORDKEY_FIELD_OPT_KEY, "uuid").
     |   option(PARTITIONPATH_FIELD_OPT_KEY, "partitionpath").
     |   option(TABLE_NAME, tableName).
     |   mode(Append).
     |   save(basePath)
warning: there was one deprecation warning; re-run with -deprecation for details
22/03/31 13:08:59 ERROR AbstractHoodieLogRecordReader: Got exception when reading log file

## gist:0fe5512e91a549dce5655cef57b1c523
257508 [Spring Shell] WARN  org.apache.hudi.common.table.log.AbstractHoodieLogRecordReader  - XXXX Hoodie rec: HoodieKey { recordKey=__all_partitions__ partitionPath=files} : {"_hoodie_commit_time": null, "_hoodie_commit_seqno": null, "_hoodie_record_key": null, "_hoodie_partition_path": null, "_hoodie_file_name": null, "key": "__all_partitions__", "type": 1, "filesystemMetadata": {".": {"size": 0, "isDeleted": false}}, "BloomFilterMetadata": null, "ColumnStatsMetadata": null}
257511 [Spring Shell] INFO  org.apache.hudi.common.table.log.HoodieLogFormatReader  - Moving to the next reader for logfile HoodieLogFile{pathStr='file:/Users/nsb/Documents/personal/temp/onehouse_tbl_debug_mar23_s3/.hoodie/metadata/files/.files-0000_20220323070911012.log.3_0-82-771', fileLen=-1}
257511 [Spring Shell] INFO  org.apache.hudi.common.table.log.AbstractHoodieLogRecordReader  - Scanning log file HoodieLogFile{pathStr='file:/Users/nsb/Documents/personal/temp/onehouse_tbl_debug_mar23_s3/.hoodie/metadata/files/.files-0000_2022032

## gist:92e33e114941b2b54f53d04fa32a3cbd
2022-03-14 04:09:48,953 INFO heartbeat.HeartbeatUtils: Deleted the heartbeat for instant 20220314040941672
2022-03-14 04:09:48,953 INFO heartbeat.HoodieHeartbeatClient: Deleted heartbeat file for instant 20220314040941
672
2022-03-14 04:09:48,954 WARN clean.CleanActionExecutor: Failed to perform previous clean operation, instant: [=
=>20220314040941672__clean__REQUESTED]
org.apache.hudi.exception.HoodieException: Unable to instantiate class org.apache.hudi.common.bootstrap.index.H
FileBootstrapIndex
        at org.apache.hudi.common.util.ReflectionUtils.loadClass(ReflectionUtils.java:91)
        at org.apache.hudi.common.bootstrap.index.BootstrapIndex.getBootstrapIndex(BootstrapIndex.java:163)
        at org.apache.hudi.common.table.view.AbstractTableFileSystemView.init(AbstractTableFileSystemView.java:

## basic configs diff
diff --git a/website/docs/basic_configurations.md b/website/docs/basic_configurations.md
index 3eb8cc2ba..47fd991da 100644
--- a/website/docs/basic_configurations.md
+++ b/website/docs/basic_configurations.md
@@ -55,6 +55,20 @@ Options useful for writing tables via `write.format.option(...)`

 ---

+> #### hoodie.datasource.write.table.type
+> The table type for the underlying data, for this write. This can’t change between writes.<br></br>

## clustering exception
22/03/04 16:15:39 ERROR DagScheduler: Exception executing node
org.apache.hudi.exception.HoodieClusteringException: unable to transition clustering inflight to complete: 20220304161518565
    at org.apache.hudi.client.SparkRDDWriteClient.completeClustering(SparkRDDWriteClient.java:394)
    at org.apache.hudi.client.SparkRDDWriteClient.completeTableService(SparkRDDWriteClient.java:473)
    at org.apache.hudi.client.SparkRDDWriteClient.cluster(SparkRDDWriteClient.java:360)
    at org.apache.hudi.client.BaseHoodieWriteClient.lambda$inlineClustering$15(BaseHoodieWriteClient.java:1196)
    at org.apache.hudi.common.util.Option.ifPresent(Option.java:96)
    at org.apache.hudi.client.BaseHoodieWriteClient.inlineClustering(BaseHoodieWriteClient.java:1194)
    at org.apache.hudi.client.BaseHoodieWriteClient.runTableServicesInline(BaseHoodieWriteClient.java:502)
    at org.apache.hudi.client.BaseHoodieWriteClient.commitStats(BaseHoodieWriteClient.java:211)
	22/08/01 23:40:04
	22/08/01 23:40:04
	22/08/01 23:40:04
	22/08/01 23:40:04
	22/08/01 23:40:53
	22/08/01 23:40:53
	22/08/01 23:40:53
	22/08/01 23:40:53
	22/08/01 23:40:53
	22/08/01 23:40:53
	22/08/04 18:53:06 INFO HoodieLogFormatWriter: HoodieLogFile{pathStr='hdfs://hdfs-namenodes:8020/tmp/jenki
	ns-infra-hudi/hudi/job-run/LongSpark2.4.7HudiTestsManualEKS_Siva/data/2022-08-04/1/MERGE_ON_READdeltastre
	amer-long-running-multi-partitions-metadata.yamltest-metadata-aggressive-clean-archival.properties/output
	/.hoodie/metadata/.hoodie/archived/.commits_.archive.1_1-0-1', fileLen=0} exists. Appending to existing f
	ile
	22/08/04 18:53:06 INFO DFSClient: Exception in createBlockOutputStream
	java.io.IOException: Got error, status message , ack with firstBadLink as 10.2.6.189:9866
	at org.apache.hadoop.hdfs.protocol.datatransfer.DataTransferProtoUtil.checkBlockOpStatus(DataTransferProtoUtil.java:142)
	at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.createBlockOutputStream(DFSOutputStream.java:1359)
	at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.setupPipelineForAppendOrRecovery(DFSOutputStream.java:1184)
	22/08/04 15:32:26 INFO Utils: Supplied authorities: hiveserver:10000
	22/08/04 15:32:26 INFO Utils: Resolved authority: hiveserver:10000
	22/08/04 15:32:26 WARN HiveConnection: Failed to connect to hiveserver:10000
	22/08/04 15:32:26 INFO DagScheduler: Forcing shutdown of executor service, this might kill running tasks
	22/08/04 15:32:26 ERROR HoodieTestSuiteJob: Failed to run Test Suite
	java.util.concurrent.ExecutionException: java.lang.NoClassDefFoundError: org/apache/logging/log4j/core/appender/AbstractAppender
	at java.util.concurrent.FutureTask.report(FutureTask.java:122)
	at java.util.concurrent.FutureTask.get(FutureTask.java:206)
	at org.apache.hudi.integ.testsuite.dag.scheduler.DagScheduler.execute(DagScheduler.java:113)
	at org.apache.hudi.integ.testsuite.dag.scheduler.DagScheduler.schedule(DagScheduler.java:68)
	22/08/04 13:52:20 INFO HiveSyncTool: Sync complete for pqm_cow_dhspyam_tacapro_2022_08_04_33_table
	22/08/04 13:52:20 INFO metastore: Closed a connection to metastore, current connections: 0
	22/08/04 13:52:20 INFO DagScheduler: Finished executing b20fc7ae-e6d9-4b25-90a8-28ba0a54c3f5
	22/08/04 13:52:20 WARN DagScheduler: Executing node "first_hive_query" :: {"hive_props":["set hive.input.format = org.apache.hudi.hadoop.HoodieParquetInputFormat"],"name":"25f871d6-4bd3-4c21-85cf-ff5edaf02ca2","hive_queries":[{"select count() from testdb.table1":400},{"select count() from testdb.table1 group by `_row_key` having count(*) > 1":0}],"config":"first_hive_query"}
	22/08/04 13:52:20 INFO DagNode: Executing hive query node 25f871d6-4bd3-4c21-85cf-ff5edaf02ca2
	22/08/04 13:52:20 ERROR DagScheduler: Exception executing node
	org.apache.hudi.exception.HoodieValidationException: Hive query validation failed due to No suitable driver found for random
	at org.apache.hudi.integ.testsuite.dag.nodes.HiveQueryNode.execute(HiveQueryN
	./bin/spark-sql --packages org.apache.hudi:hudi-spark-bundle_2.11:0.10.0,org.apache.spark:spark-avro_2.11:2.4.4 \
	--conf 'spark.serializer=org.apache.spark.serializer.KryoSerializer' \
	--conf 'spark.sql.extensions=org.apache.spark.sql.hudi.HoodieSparkSessionExtension' \
	--conf 'spark.kryoserializer.buffer.max=1024m' --conf spark.rdd.compress=true --driver-memory 6g



	create table hudi_mor1 (
	VendorID int,
	tpep_pickup_datetime string,
	scala> df.write.format("hudi").
	\| options(getQuickstartWriteConfigs).
	\| option(PRECOMBINE_FIELD_OPT_KEY, "ts").
	\| option(RECORDKEY_FIELD_OPT_KEY, "uuid").
	\| option(PARTITIONPATH_FIELD_OPT_KEY, "partitionpath").
	\| option(TABLE_NAME, tableName).
	\| mode(Append).
	\| save(basePath)
	warning: there was one deprecation warning; re-run with -deprecation for details
	22/03/31 13:08:59 ERROR AbstractHoodieLogRecordReader: Got exception when reading log file
	257508 [Spring Shell] WARN org.apache.hudi.common.table.log.AbstractHoodieLogRecordReader - XXXX Hoodie rec: HoodieKey { recordKey=__all_partitions__ partitionPath=files} : {"_hoodie_commit_time": null, "_hoodie_commit_seqno": null, "_hoodie_record_key": null, "_hoodie_partition_path": null, "_hoodie_file_name": null, "key": "__all_partitions__", "type": 1, "filesystemMetadata": {".": {"size": 0, "isDeleted": false}}, "BloomFilterMetadata": null, "ColumnStatsMetadata": null}
	257511 [Spring Shell] INFO org.apache.hudi.common.table.log.HoodieLogFormatReader - Moving to the next reader for logfile HoodieLogFile{pathStr='file:/Users/nsb/Documents/personal/temp/onehouse_tbl_debug_mar23_s3/.hoodie/metadata/files/.files-0000_20220323070911012.log.3_0-82-771', fileLen=-1}
	257511 [Spring Shell] INFO org.apache.hudi.common.table.log.AbstractHoodieLogRecordReader - Scanning log file HoodieLogFile{pathStr='file:/Users/nsb/Documents/personal/temp/onehouse_tbl_debug_mar23_s3/.hoodie/metadata/files/.files-0000_2022032
	2022-03-14 04:09:48,953 INFO heartbeat.HeartbeatUtils: Deleted the heartbeat for instant 20220314040941672
	2022-03-14 04:09:48,953 INFO heartbeat.HoodieHeartbeatClient: Deleted heartbeat file for instant 20220314040941
	672
	2022-03-14 04:09:48,954 WARN clean.CleanActionExecutor: Failed to perform previous clean operation, instant: [=
	=>20220314040941672__clean__REQUESTED]
	org.apache.hudi.exception.HoodieException: Unable to instantiate class org.apache.hudi.common.bootstrap.index.H
	FileBootstrapIndex
	at org.apache.hudi.common.util.ReflectionUtils.loadClass(ReflectionUtils.java:91)
	at org.apache.hudi.common.bootstrap.index.BootstrapIndex.getBootstrapIndex(BootstrapIndex.java:163)
	at org.apache.hudi.common.table.view.AbstractTableFileSystemView.init(AbstractTableFileSystemView.java:
	diff --git a/website/docs/basic_configurations.md b/website/docs/basic_configurations.md
	index 3eb8cc2ba..47fd991da 100644
	--- a/website/docs/basic_configurations.md
	+++ b/website/docs/basic_configurations.md
	@@ -55,6 +55,20 @@ Options useful for writing tables via `write.format.option(...)`

	---

	+> #### hoodie.datasource.write.table.type
	+> The table type for the underlying data, for this write. This can’t change between writes.<br></br>
	22/03/04 16:15:39 ERROR DagScheduler: Exception executing node
	org.apache.hudi.exception.HoodieClusteringException: unable to transition clustering inflight to complete: 20220304161518565
	at org.apache.hudi.client.SparkRDDWriteClient.completeClustering(SparkRDDWriteClient.java:394)
	at org.apache.hudi.client.SparkRDDWriteClient.completeTableService(SparkRDDWriteClient.java:473)
	at org.apache.hudi.client.SparkRDDWriteClient.cluster(SparkRDDWriteClient.java:360)
	at org.apache.hudi.client.BaseHoodieWriteClient.lambda$inlineClustering$15(BaseHoodieWriteClient.java:1196)
	at org.apache.hudi.common.util.Option.ifPresent(Option.java:96)
	at org.apache.hudi.client.BaseHoodieWriteClient.inlineClustering(BaseHoodieWriteClient.java:1194)
	at org.apache.hudi.client.BaseHoodieWriteClient.runTableServicesInline(BaseHoodieWriteClient.java:502)
	at org.apache.hudi.client.BaseHoodieWriteClient.commitStats(BaseHoodieWriteClient.java:211)